diff --git a/deps/icu-small/README-FULL-ICU.txt b/deps/icu-small/README-FULL-ICU.txt index df63187d3ae6e4..2af8c5faf34727 100644 --- a/deps/icu-small/README-FULL-ICU.txt +++ b/deps/icu-small/README-FULL-ICU.txt @@ -1,8 +1,8 @@ ICU sources - auto generated by shrink-icu-src.py This directory contains the ICU subset used by --with-intl=full-icu -It is a strict subset of ICU 68 source files with the following exception(s): -* deps/icu-small/source/data/in/icudt68l.dat.bz2 : compressed data file +It is a strict subset of ICU 69 source files with the following exception(s): +* deps/icu-small/source/data/in/icudt69l.dat.bz2 : compressed data file To rebuild this directory, see ../../tools/icu/README.md diff --git a/deps/icu-small/source/common/bytestriebuilder.cpp b/deps/icu-small/source/common/bytestriebuilder.cpp index ec1ab7d8f5080e..28256f272a74a3 100644 --- a/deps/icu-small/source/common/bytestriebuilder.cpp +++ b/deps/icu-small/source/common/bytestriebuilder.cpp @@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) { U_ASSERT(i>=0); if(i<=BytesTrie::kMaxOneByteDelta) { return write(i); + } else { + char intBytes[5]; + return write(intBytes, internalEncodeDelta(i, intBytes)); } - char intBytes[5]; - int32_t length; +} + +int32_t +BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) { + U_ASSERT(i>=0); + if(i<=BytesTrie::kMaxOneByteDelta) { + intBytes[0]=(char)i; + return 1; + } + int32_t length=1; if(i<=BytesTrie::kMaxTwoByteDelta) { intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8)); - length=1; } else { if(i<=BytesTrie::kMaxThreeByteDelta) { intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16)); - length=2; } else { if(i<=0xffffff) { intBytes[0]=(char)BytesTrie::kFourByteDeltaLead; - length=3; } else { intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead; intBytes[1]=(char)(i>>24); - length=4; + length=2; } - intBytes[1]=(char)(i>>16); + intBytes[length++]=(char)(i>>16); } - intBytes[1]=(char)(i>>8); + intBytes[length++]=(char)(i>>8); } intBytes[length++]=(char)i; - return write(intBytes, length); + return length; } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/charstr.cpp b/deps/icu-small/source/common/charstr.cpp index 318a185b3f1d64..c35622882c4523 100644 --- a/deps/icu-small/source/common/charstr.cpp +++ b/deps/icu-small/source/common/charstr.cpp @@ -14,6 +14,8 @@ * created by: Markus W. Scherer */ +#include + #include "unicode/utypes.h" #include "unicode/putil.h" #include "charstr.h" @@ -141,6 +143,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error return *this; } +CharString &CharString::appendNumber(int32_t number, UErrorCode &status) { + if (number < 0) { + this->append('-', status); + if (U_FAILURE(status)) { + return *this; + } + } + + if (number == 0) { + this->append('0', status); + return *this; + } + + int32_t numLen = 0; + while (number != 0) { + int32_t residue = number % 10; + number /= 10; + this->append(std::abs(residue) + '0', status); + numLen++; + if (U_FAILURE(status)) { + return *this; + } + } + + int32_t start = this->length() - numLen, end = this->length() - 1; + while(start < end) { + std::swap(this->data()[start++], this->data()[end--]); + } + + return *this; +} + char *CharString::getAppendBuffer(int32_t minCapacity, int32_t desiredCapacityHint, int32_t &resultCapacity, diff --git a/deps/icu-small/source/common/charstr.h b/deps/icu-small/source/common/charstr.h index 6619faac618193..175acd1c0a2b40 100644 --- a/deps/icu-small/source/common/charstr.h +++ b/deps/icu-small/source/common/charstr.h @@ -127,6 +127,9 @@ class U_COMMON_API CharString : public UMemory { return append(s.data(), s.length(), errorCode); } CharString &append(const char *s, int32_t sLength, UErrorCode &status); + + CharString &appendNumber(int32_t number, UErrorCode &status); + /** * Returns a writable buffer for appending and writes the buffer's capacity to * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS(). diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h index c9156f253cf1c7..a925f3df637378 100644 --- a/deps/icu-small/source/common/cmemory.h +++ b/deps/icu-small/source/common/cmemory.h @@ -31,14 +31,63 @@ #include #include #include "unicode/localpointer.h" +#include "uassert.h" #if U_DEBUG && defined(UPRV_MALLOC_COUNT) #include #endif - -#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size) -#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size) +// uprv_memcpy and uprv_memmove +#if defined(__clang__) +#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \ + /* Suppress warnings about addresses that will never be NULL */ \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Waddress\"") \ + U_ASSERT(dst != NULL); \ + U_ASSERT(src != NULL); \ + _Pragma("clang diagnostic pop") \ + U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \ +} UPRV_BLOCK_MACRO_END +#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \ + /* Suppress warnings about addresses that will never be NULL */ \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Waddress\"") \ + U_ASSERT(dst != NULL); \ + U_ASSERT(src != NULL); \ + _Pragma("clang diagnostic pop") \ + U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \ +} UPRV_BLOCK_MACRO_END +#elif defined(__GNUC__) +#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \ + /* Suppress warnings about addresses that will never be NULL */ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Waddress\"") \ + U_ASSERT(dst != NULL); \ + U_ASSERT(src != NULL); \ + _Pragma("GCC diagnostic pop") \ + U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \ +} UPRV_BLOCK_MACRO_END +#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \ + /* Suppress warnings about addresses that will never be NULL */ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Waddress\"") \ + U_ASSERT(dst != NULL); \ + U_ASSERT(src != NULL); \ + _Pragma("GCC diagnostic pop") \ + U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \ +} UPRV_BLOCK_MACRO_END +#else +#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \ + U_ASSERT(dst != NULL); \ + U_ASSERT(src != NULL); \ + U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \ +} UPRV_BLOCK_MACRO_END +#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \ + U_ASSERT(dst != NULL); \ + U_ASSERT(src != NULL); \ + U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \ +} UPRV_BLOCK_MACRO_END +#endif /** * \def UPRV_LENGTHOF diff --git a/deps/icu-small/source/common/dictbe.cpp b/deps/icu-small/source/common/dictbe.cpp index 6d3bc878078847..88533cedcecc83 100644 --- a/deps/icu-small/source/common/dictbe.cpp +++ b/deps/icu-small/source/common/dictbe.cpp @@ -265,13 +265,9 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text, goto foundBest; } do { - int32_t wordsMatched = 1; if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { - if (wordsMatched < 2) { - // Followed by another dictionary word; mark first word as a good candidate - words[wordsFound%THAI_LOOKAHEAD].markCurrent(); - wordsMatched = 2; - } + // Followed by another dictionary word; mark first word as a good candidate + words[wordsFound%THAI_LOOKAHEAD].markCurrent(); // If we're already at the end of the range, we're done if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { @@ -503,13 +499,9 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text, goto foundBest; } do { - int32_t wordsMatched = 1; if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { - if (wordsMatched < 2) { - // Followed by another dictionary word; mark first word as a good candidate - words[wordsFound%LAO_LOOKAHEAD].markCurrent(); - wordsMatched = 2; - } + // Followed by another dictionary word; mark first word as a good candidate + words[wordsFound%LAO_LOOKAHEAD].markCurrent(); // If we're already at the end of the range, we're done if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { @@ -699,13 +691,9 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text, goto foundBest; } do { - int32_t wordsMatched = 1; if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { - if (wordsMatched < 2) { - // Followed by another dictionary word; mark first word as a good candidate - words[wordsFound%BURMESE_LOOKAHEAD].markCurrent(); - wordsMatched = 2; - } + // Followed by another dictionary word; mark first word as a good candidate + words[wordsFound%BURMESE_LOOKAHEAD].markCurrent(); // If we're already at the end of the range, we're done if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { @@ -908,13 +896,9 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text, goto foundBest; } do { - int32_t wordsMatched = 1; if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { - if (wordsMatched < 2) { - // Followed by another dictionary word; mark first word as a good candidate - words[wordsFound % KHMER_LOOKAHEAD].markCurrent(); - wordsMatched = 2; - } + // Followed by another dictionary word; mark first word as a good candidate + words[wordsFound % KHMER_LOOKAHEAD].markCurrent(); // If we're already at the end of the range, we're done if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { diff --git a/deps/icu-small/source/common/edits.cpp b/deps/icu-small/source/common/edits.cpp index 95f0c19a728cf4..92ca36fb5d04a3 100644 --- a/deps/icu-small/source/common/edits.cpp +++ b/deps/icu-small/source/common/edits.cpp @@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT { } Edits &Edits::operator=(const Edits &other) { + if (this == &other) { return *this; } // self-assignment: no-op length = other.length; delta = other.delta; numChanges = other.numChanges; diff --git a/deps/icu-small/source/common/filteredbrk.cpp b/deps/icu-small/source/common/filteredbrk.cpp index ae7cf5270aeabf..bc9c576223d70e 100644 --- a/deps/icu-small/source/common/filteredbrk.cpp +++ b/deps/icu-small/source/common/filteredbrk.cpp @@ -20,6 +20,7 @@ #include "ubrkimpl.h" // U_ICUDATA_BRKITR #include "uvector.h" #include "cmemory.h" +#include "umutex.h" U_NAMESPACE_BEGIN @@ -139,13 +140,30 @@ class SimpleFilteredSentenceBreakData : public UMemory { public: SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards ) : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { } - SimpleFilteredSentenceBreakData *incr() { refcount++; return this; } - SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; } - virtual ~SimpleFilteredSentenceBreakData(); + SimpleFilteredSentenceBreakData *incr() { + umtx_atomic_inc(&refcount); + return this; + } + SimpleFilteredSentenceBreakData *decr() { + if(umtx_atomic_dec(&refcount) <= 0) { + delete this; + } + return 0; + } + virtual ~SimpleFilteredSentenceBreakData(); + + bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); } + bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); } - LocalPointer fForwardsPartialTrie; // Has ".a" for "a.M." - LocalPointer fBackwardsTrie; // i.e. ".srM" for Mrs. - int32_t refcount; + const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; } + const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; } + +private: + // These tries own their data arrays. + // They are shared and must therefore not be modified. + LocalPointer fForwardsPartialTrie; // Has ".a" for "a.M." + LocalPointer fBackwardsTrie; // i.e. ".srM" for Mrs. + u_atomic_int32_t refcount; }; SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {} @@ -244,7 +262,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt fData(new SimpleFilteredSentenceBreakData(forwards, backwards)), fDelegate(adopt) { - // all set.. + if (fData == nullptr) { + delete forwards; + delete backwards; + if (U_SUCCESS(status)) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } } SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() { @@ -261,59 +285,62 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) { int32_t bestValue = -1; // loops while 'n' points to an exception. utext_setNativeIndex(fText.getAlias(), n); // from n.. - fData->fBackwardsTrie->reset(); - UChar32 uch; //if(debug2) u_printf(" n@ %d\n", n); // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") - if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here?? + if(utext_previous32(fText.getAlias())==u' ') { // TODO: skip a class of chars here?? // TODO only do this the 1st time? //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); } else { //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); - uch = utext_next32(fText.getAlias()); + utext_next32(fText.getAlias()); //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); } - UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; - - while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and.. - USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie - if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far - bestPosn = utext_getNativeIndex(fText.getAlias()); - bestValue = fData->fBackwardsTrie->getValue(); - } - //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias())); + { + // Do not modify the shared trie! + UCharsTrie iter(fData->getBackwardsTrie()); + UChar32 uch; + while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) { // more to consume backwards + UStringTrieResult r = iter.nextForCodePoint(uch); + if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far + bestPosn = utext_getNativeIndex(fText.getAlias()); + bestValue = iter.getValue(); + } + if(!USTRINGTRIE_HAS_NEXT(r)) { + break; + } + //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias())); + } } - if(USTRINGTRIE_MATCHES(r)) { // exact match? - //if(debug2) u_printf("revfBackwardsTrie->getValue(); - bestPosn = utext_getNativeIndex(fText.getAlias()); - //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); - } + //if(bestValue >= 0) { + //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); + //} if(bestPosn>=0) { //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? - //int32_t bestValue = fBackwardsTrie->getValue(); + //int32_t bestValue = iter.getValue(); ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue); if(bestValue == kMATCH) { // exact match! //if(debug2) u_printf(" exact backward match\n"); return kExceptionHere; // See if the next is another exception. } else if(bestValue == kPARTIAL - && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie + && fData->hasForwardsPartialTrie()) { // make sure there's a forward trie //if(debug2) u_printf(" partial backward match\n"); // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie // to see if it matches something going forward. - fData->fForwardsPartialTrie->reset(); UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .. //if(debug2) u_printf("Retrying at %d\n", bestPosn); + // Do not modify the shared trie! + UCharsTrie iter(fData->getForwardsPartialTrie()); + UChar32 uch; while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && - USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) { + USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) { //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias())); } if(USTRINGTRIE_MATCHES(rfwd)) { @@ -339,7 +366,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) { int32_t SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) { if(n == UBRK_DONE || // at end or - fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions + !fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions return n; } // OK, do we need to break here? @@ -369,7 +396,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) { int32_t SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) { if(n == 0 || n == UBRK_DONE || // at end or - fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions + !fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions return n; } // OK, do we need to break here? @@ -420,7 +447,7 @@ SimpleFilteredSentenceBreakIterator::previous(void) { UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) { if (!fDelegate->isBoundary(offset)) return false; // no break to suppress - if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions + if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions UErrorCode status = U_ZERO_ERROR; resetState(status); diff --git a/deps/icu-small/source/common/hash.h b/deps/icu-small/source/common/hash.h index fa1e4ee9affc7b..0b0f349999ce82 100644 --- a/deps/icu-small/source/common/hash.h +++ b/deps/icu-small/source/common/hash.h @@ -85,16 +85,22 @@ class U_COMMON_API Hashtable : public UMemory { inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status); + inline int32_t putiAllowZero(const UnicodeString& key, int32_t value, UErrorCode& status); + inline void* get(const UnicodeString& key) const; inline int32_t geti(const UnicodeString& key) const; + inline int32_t getiAndFound(const UnicodeString& key, UBool &found) const; + inline void* remove(const UnicodeString& key); inline int32_t removei(const UnicodeString& key); inline void removeAll(void); + inline UBool containsKey(const UnicodeString& key) const; + inline const UHashElement* find(const UnicodeString& key) const; /** @@ -203,6 +209,11 @@ inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCo return uhash_puti(hash, new UnicodeString(key), value, &status); } +inline int32_t Hashtable::putiAllowZero(const UnicodeString& key, int32_t value, + UErrorCode& status) { + return uhash_putiAllowZero(hash, new UnicodeString(key), value, &status); +} + inline void* Hashtable::get(const UnicodeString& key) const { return uhash_get(hash, &key); } @@ -211,6 +222,10 @@ inline int32_t Hashtable::geti(const UnicodeString& key) const { return uhash_geti(hash, &key); } +inline int32_t Hashtable::getiAndFound(const UnicodeString& key, UBool &found) const { + return uhash_getiAndFound(hash, &key, &found); +} + inline void* Hashtable::remove(const UnicodeString& key) { return uhash_remove(hash, &key); } @@ -219,6 +234,10 @@ inline int32_t Hashtable::removei(const UnicodeString& key) { return uhash_removei(hash, &key); } +inline UBool Hashtable::containsKey(const UnicodeString& key) const { + return uhash_containsKey(hash, &key); +} + inline const UHashElement* Hashtable::find(const UnicodeString& key) const { return uhash_find(hash, &key); } diff --git a/deps/icu-small/source/common/localematcher.cpp b/deps/icu-small/source/common/localematcher.cpp index 5795cbf87e633a..132aee290e81a8 100644 --- a/deps/icu-small/source/common/localematcher.cpp +++ b/deps/icu-small/source/common/localematcher.cpp @@ -345,9 +345,8 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) { int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode) { if (U_FAILURE(errorCode)) { return suppLength; } - int32_t index = uhash_geti(supportedLsrToIndex, &lsr); - if (index == 0) { - uhash_puti(supportedLsrToIndex, const_cast(&lsr), i + 1, &errorCode); + if (!uhash_containsKey(supportedLsrToIndex, &lsr)) { + uhash_putiAllowZero(supportedLsrToIndex, const_cast(&lsr), i, &errorCode); if (U_SUCCESS(errorCode)) { supportedLSRs[suppLength] = &lsr; supportedIndexes[suppLength++] = i; @@ -685,12 +684,11 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai int32_t bestSupportedLsrIndex = -1; for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) { // Quick check for exact maximized LSR. - // Returns suppIndex+1 where 0 means not found. if (supportedLsrToIndex != nullptr) { desiredLSR.setHashCode(); - int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR); - if (index != 0) { - int32_t suppIndex = index - 1; + UBool found = false; + int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found); + if (found) { if (remainingIter != nullptr) { remainingIter->rememberCurrent(desiredIndex, errorCode); } diff --git a/deps/icu-small/source/common/localeprioritylist.cpp b/deps/icu-small/source/common/localeprioritylist.cpp index 8916b121be3057..4455eedb75e67c 100644 --- a/deps/icu-small/source/common/localeprioritylist.cpp +++ b/deps/icu-small/source/common/localeprioritylist.cpp @@ -187,17 +187,18 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e if (U_FAILURE(errorCode)) { return false; } } LocalPointer clone; - int32_t index = uhash_geti(map, &locale); - if (index != 0) { + UBool found = false; + int32_t index = uhash_getiAndFound(map, &locale, &found); + if (found) { // Duplicate: Remove the old item and append it anew. - LocaleAndWeight &lw = list->array[index - 1]; + LocaleAndWeight &lw = list->array[index]; clone.adoptInstead(lw.locale); lw.locale = nullptr; lw.weight = 0; ++numRemoved; } if (weight <= 0) { // do not add q=0 - if (index != 0) { + if (found) { // Not strictly necessary but cleaner. uhash_removei(map, &locale); } @@ -217,7 +218,7 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e return false; } } - uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode); + uhash_putiAllowZero(map, clone.getAlias(), listLength, &errorCode); if (U_FAILURE(errorCode)) { return false; } LocaleAndWeight &lw = list->array[listLength]; lw.locale = clone.orphan(); diff --git a/deps/icu-small/source/common/locdispnames.cpp b/deps/icu-small/source/common/locdispnames.cpp index a60dd0c9c1ffa7..3166c76bf5a7fd 100644 --- a/deps/icu-small/source/common/locdispnames.cpp +++ b/deps/icu-small/source/common/locdispnames.cpp @@ -698,7 +698,7 @@ uloc_getDisplayName(const char *locale, } /* end switch */ if (len>0) { - /* we addeed a component, so add separator and write it if there's room. */ + /* we added a component, so add separator and write it if there's room. */ if(len+sepLen<=cap) { const UChar * plimit = p + len; for (; p < plimit; p++) { diff --git a/deps/icu-small/source/common/locid.cpp b/deps/icu-small/source/common/locid.cpp index 874e4a70556f31..0d506293a99eca 100644 --- a/deps/icu-small/source/common/locid.cpp +++ b/deps/icu-small/source/common/locid.cpp @@ -254,7 +254,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale) Locale::~Locale() { - if (baseName != fullName) { + if ((baseName != fullName) && (baseName != fullNameBuffer)) { uprv_free(baseName); } baseName = NULL; @@ -466,7 +466,7 @@ Locale& Locale::operator=(const Locale& other) { } Locale& Locale::operator=(Locale&& other) U_NOEXCEPT { - if (baseName != fullName) uprv_free(baseName); + if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName); if (fullName != fullNameBuffer) uprv_free(fullName); if (other.fullName == other.fullNameBuffer) { @@ -524,7 +524,7 @@ static const char* const KNOWN_CANONICALIZED[] = { "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA", "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN", "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP", - "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF", + "nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF", "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si", "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr", "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta", @@ -627,6 +627,17 @@ class AliasDataBuilder { LocalMemory& types, LocalMemory& replacementIndexes, int32_t &length, UErrorCode &status); + + // Read the subdivisionAlias data from alias to + // strings+types+replacementIndexes + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement variant. + void readSubdivisionAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, UErrorCode &status); }; /** @@ -647,6 +658,7 @@ class AliasData : public UMemory { const CharStringMap& scriptMap() const { return script; } const CharStringMap& territoryMap() const { return territory; } const CharStringMap& variantMap() const { return variant; } + const CharStringMap& subdivisionMap() const { return subdivision; } static void U_CALLCONV loadData(UErrorCode &status); static UBool U_CALLCONV cleanup(); @@ -658,11 +670,13 @@ class AliasData : public UMemory { CharStringMap scriptMap, CharStringMap territoryMap, CharStringMap variantMap, + CharStringMap subdivisionMap, CharString* strings) : language(std::move(languageMap)), script(std::move(scriptMap)), territory(std::move(territoryMap)), variant(std::move(variantMap)), + subdivision(std::move(subdivisionMap)), strings(strings) { } @@ -676,6 +690,7 @@ class AliasData : public UMemory { CharStringMap script; CharStringMap territory; CharStringMap variant; + CharStringMap subdivision; CharString* strings; friend class AliasDataBuilder; @@ -866,6 +881,34 @@ AliasDataBuilder::readVariantAlias( status); } +/** + * Read the subdivisionAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement regions. + */ +void +AliasDataBuilder::readSubdivisionAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8); + }, +#else + [](const char*) {}, +#endif + [](const UnicodeString&) { }, + status); +} + /** * Initializes the alias data from the ICU resource bundles. The alias data * contains alias of language, country, script and variants. @@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) { ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status)); LocalUResourceBundlePointer variantAlias( ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status)); + LocalUResourceBundlePointer subdivisionAlias( + ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status)); if (U_FAILURE(status)) { return nullptr; } int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0, - variantLength = 0; + variantLength = 0, subdivisionLength = 0; // Read the languageAlias into languageTypes, languageReplacementIndexes // and strings @@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) { variantReplacementIndexes, variantLength, status); + // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes + // and strings + LocalMemory subdivisionTypes; + LocalMemory subdivisionReplacementIndexes; + readSubdivisionAlias(subdivisionAlias.getAlias(), + &strings, + subdivisionTypes, + subdivisionReplacementIndexes, + subdivisionLength, status); + if (U_FAILURE(status)) { return nullptr; } @@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) { status); } + // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes. + CharStringMap subdivisionMap(2, status); + for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) { + subdivisionMap.put(subdivisionTypes[i], + strings.get(subdivisionReplacementIndexes[i]), + status); + } + if (U_FAILURE(status)) { return nullptr; } @@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) { std::move(scriptMap), std::move(territoryMap), std::move(variantMap), + std::move(subdivisionMap), strings.orphanCharStrings()); if (data == nullptr) { @@ -1105,6 +1169,14 @@ class AliasReplacer { // Replace by using variantAlias. bool replaceVariant(UErrorCode& status); + + // Replace by using subdivisionAlias. + bool replaceSubdivision(StringPiece subdivision, + CharString& output, UErrorCode& status); + + // Replace transformed extensions. + bool replaceTransformedExtensions( + CharString& transformedExtensions, CharString& output, UErrorCode& status); }; CharString& @@ -1294,7 +1366,6 @@ AliasReplacer::replaceLanguage( } } if (replacedExtensions != nullptr) { - // TODO(ICU-21292) // DO NOTHING // UTS35 does not specifiy what should we do if we have extensions in the // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have @@ -1435,6 +1506,106 @@ AliasReplacer::replaceVariant(UErrorCode& status) return false; } +bool +AliasReplacer::replaceSubdivision( + StringPiece subdivision, CharString& output, UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + const char *replacement = data->subdivisionMap().get(subdivision.data()); + if (replacement != nullptr) { + const char* firstSpace = uprv_strchr(replacement, ' '); + // Found replacement data for this subdivision. + size_t len = (firstSpace != nullptr) ? + (firstSpace - replacement) : uprv_strlen(replacement); + if (2 <= len && len <= 8) { + output.append(replacement, (int32_t)len, status); + if (2 == len) { + // Add 'zzzz' based on changes to UTS #35 for CLDR-14312. + output.append("zzzz", 4, status); + } + } + return true; + } + return false; +} + +bool +AliasReplacer::replaceTransformedExtensions( + CharString& transformedExtensions, CharString& output, UErrorCode& status) +{ + // The content of the transformedExtensions will be modified in this + // function to NULL-terminating (tkey-tvalue) pairs. + if (U_FAILURE(status)) { + return false; + } + int32_t len = transformedExtensions.length(); + const char* str = transformedExtensions.data(); + const char* tkey = ultag_getTKeyStart(str); + int32_t tlangLen = (tkey == str) ? 0 : + ((tkey == nullptr) ? len : static_cast((tkey - str - 1))); + CharStringByteSink sink(&output); + if (tlangLen > 0) { + Locale tlang = LocaleBuilder() + .setLanguageTag(StringPiece(str, tlangLen)) + .build(status); + tlang.canonicalize(status); + tlang.toLanguageTag(sink, status); + if (U_FAILURE(status)) { + return false; + } + T_CString_toLowerCase(output.data()); + } + if (tkey != nullptr) { + // We need to sort the tfields by tkey + UVector tfields(status); + if (U_FAILURE(status)) { + return false; + } + do { + const char* tvalue = uprv_strchr(tkey, '-'); + if (tvalue == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + const char* nextTKey = ultag_getTKeyStart(tvalue); + if (nextTKey != nullptr) { + *((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue + } + tfields.insertElementAt((void*)tkey, tfields.size(), status); + if (U_FAILURE(status)) { + return false; + } + tkey = nextTKey; + } while (tkey != nullptr); + tfields.sort([](UElement e1, UElement e2) -> int8_t { + // uprv_strcmp return int and in some platform, such as arm64-v8a, + // it may return positive values > 127 which cause the casted value + // of int8_t negative. + int res = uprv_strcmp( + (const char*)e1.pointer, (const char*)e2.pointer); + return (res == 0) ? 0 : ((res > 0) ? 1 : -1); + }, status); + for (int32_t i = 0; i < tfields.size(); i++) { + if (output.length() > 0) { + output.append('-', status); + } + const char* tfield = (const char*) tfields.elementAt(i); + const char* tvalue = uprv_strchr(tfield, '-'); + // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue. + U_ASSERT(tvalue != nullptr); + *((char*)tvalue++) = '\0'; // NULL terminate tkey + output.append(tfield, status).append('-', status); + const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr); + output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status); + } + } + if (U_FAILURE(status)) { + return false; + } + return true; +} + CharString& AliasReplacer::outputToString( CharString& out, UErrorCode status) @@ -1453,8 +1624,12 @@ AliasReplacer::outputToString( out.append(SEP_CHAR, status); } variants.sort([](UElement e1, UElement e2) -> int8_t { - return uprv_strcmp( + // uprv_strcmp return int and in some platform, such as arm64-v8a, + // it may return positive values > 127 which cause the casted value + // of int8_t negative. + int res = uprv_strcmp( (const char*)e1.pointer, (const char*)e2.pointer); + return (res == 0) ? 0 : ((res > 0) ? 1 : -1); }, status); int32_t variantsStart = out.length(); for (int32_t i = 0; i < variants.size(); i++) { @@ -1497,7 +1672,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status region = nullptr; } const char* variantsStr = locale.getVariant(); - const char* extensionsStr = locale_getKeywordsStart(locale.getName()); CharString variantsBuff(variantsStr, -1, status); if (!variantsBuff.isEmpty()) { if (U_FAILURE(status)) { return false; } @@ -1516,8 +1690,12 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status // Sort the variants variants.sort([](UElement e1, UElement e2) -> int8_t { - return uprv_strcmp( + // uprv_strcmp return int and in some platform, such as arm64-v8a, + // it may return positive values > 127 which cause the casted value + // of int8_t negative. + int res = uprv_strcmp( (const char*)e1.pointer, (const char*)e2.pointer); + return (res == 0) ? 0 : ((res > 0) ? 1 : -1); }, status); // A changed count to assert when loop too many times. @@ -1561,11 +1739,52 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status if (U_FAILURE(status)) { return false; } // Nothing changed and we know the order of the vaiants are not change // because we have no variant or only one. - if (changed == 0 && variants.size() <= 1) { + const char* extensionsStr = locale_getKeywordsStart(locale.getName()); + if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) { return false; } outputToString(out, status); + if (U_FAILURE(status)) { + return false; + } if (extensionsStr != nullptr) { + changed = 0; + Locale temp(locale); + LocalPointer iter(locale.createKeywords(status)); + if (U_SUCCESS(status) && !iter.isNull()) { + const char* key; + while ((key = iter->next(nullptr, status)) != nullptr) { + if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 || + uprv_strcmp("t", key) == 0) { + CharString value; + CharStringByteSink valueSink(&value); + locale.getKeywordValue(key, valueSink, status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + continue; + } + CharString replacement; + if (uprv_strlen(key) == 2) { + if (replaceSubdivision(value.toStringPiece(), replacement, status)) { + changed++; + temp.setKeywordValue(key, replacement.data(), status); + } + } else { + U_ASSERT(uprv_strcmp(key, "t") == 0); + if (replaceTransformedExtensions(value, replacement, status)) { + changed++; + temp.setKeywordValue(key, replacement.data(), status); + } + } + if (U_FAILURE(status)) { + return false; + } + } + } + } + if (changed != 0) { + extensionsStr = locale_getKeywordsStart(temp.getName()); + } out.append(extensionsStr, status); } if (U_FAILURE(status)) { @@ -1573,8 +1792,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status } // If the tag is not changed, return. if (uprv_strcmp(out.data(), locale.getName()) == 0) { - U_ASSERT(changed == 0); - U_ASSERT(variants.size() > 1); out.clear(); return false; } @@ -1636,7 +1853,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) { fIsBogus = FALSE; /* Free our current storage */ - if (baseName != fullName) { + if ((baseName != fullName) && (baseName != fullNameBuffer)) { uprv_free(baseName); } baseName = NULL; @@ -1672,6 +1889,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err); if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) { + U_ASSERT(baseName == nullptr); /*Go to heap for the fullName if necessary*/ fullName = (char *)uprv_malloc(sizeof(char)*(length + 1)); if(fullName == 0) { @@ -1825,7 +2043,7 @@ Locale::hashCode() const void Locale::setToBogus() { /* Free our current storage */ - if(baseName != fullName) { + if((baseName != fullName) && (baseName != fullNameBuffer)) { uprv_free(baseName); } baseName = NULL; diff --git a/deps/icu-small/source/common/loclikelysubtags.cpp b/deps/icu-small/source/common/loclikelysubtags.cpp index a031bfa5872642..aa592e6ea80731 100644 --- a/deps/icu-small/source/common/loclikelysubtags.cpp +++ b/deps/icu-small/source/common/loclikelysubtags.cpp @@ -320,7 +320,8 @@ XLikelySubtags::~XLikelySubtags() { LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const { const char *name = locale.getName(); if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=") - // Private use language tag x-subtag-subtag... + // Private use language tag x-subtag-subtag... which CLDR changes to + // und-x-subtag-subtag... return LSR(name, "", "", LSR::EXPLICIT_LSR); } return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(), diff --git a/deps/icu-small/source/common/norm2allmodes.h b/deps/icu-small/source/common/norm2allmodes.h index e8bd52c6ae39d7..584835da57b6be 100644 --- a/deps/icu-small/source/common/norm2allmodes.h +++ b/deps/icu-small/source/common/norm2allmodes.h @@ -38,7 +38,7 @@ class Normalizer2WithImpl : public Normalizer2 { virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { if(U_FAILURE(errorCode)) { dest.setToBogus(); return dest; @@ -64,13 +64,13 @@ class Normalizer2WithImpl : public Normalizer2 { virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { return normalizeSecondAndAppend(first, second, true, errorCode); } virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, - UErrorCode &errorCode) const { + UErrorCode &errorCode) const U_OVERRIDE { return normalizeSecondAndAppend(first, second, false, errorCode); } UnicodeString & @@ -107,7 +107,7 @@ class Normalizer2WithImpl : public Normalizer2 { UnicodeString &safeMiddle, ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; virtual UBool - getDecomposition(UChar32 c, UnicodeString &decomposition) const { + getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE { UChar buffer[4]; int32_t length; const UChar *d=impl.getDecomposition(c, buffer, length); @@ -122,7 +122,7 @@ class Normalizer2WithImpl : public Normalizer2 { return true; } virtual UBool - getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE { UChar buffer[30]; int32_t length; const UChar *d=impl.getRawDecomposition(c, buffer, length); @@ -137,18 +137,18 @@ class Normalizer2WithImpl : public Normalizer2 { return true; } virtual UChar32 - composePair(UChar32 a, UChar32 b) const { + composePair(UChar32 a, UChar32 b) const U_OVERRIDE { return impl.composePair(a, b); } virtual uint8_t - getCombiningClass(UChar32 c) const { + getCombiningClass(UChar32 c) const U_OVERRIDE { return impl.getCC(impl.getNorm16(c)); } // quick checks virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { if(U_FAILURE(errorCode)) { return false; } @@ -161,11 +161,11 @@ class Normalizer2WithImpl : public Normalizer2 { return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); } virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; } virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { if(U_FAILURE(errorCode)) { return 0; } @@ -194,27 +194,57 @@ class DecomposeNormalizer2 : public Normalizer2WithImpl { private: virtual void normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { + ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { impl.decompose(src, limit, &buffer, errorCode); } using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. virtual void normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { + ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); } + + void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + const uint8_t *s = reinterpret_cast(src.data()); + impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode); + sink.Flush(); + } + virtual UBool + isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { + if(U_FAILURE(errorCode)) { + return false; + } + const uint8_t *s = reinterpret_cast(sp.data()); + const uint8_t *sLimit = s + sp.length(); + return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode); + } + virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE { return impl.decompose(src, limit, NULL, errorCode); } using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; } - virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); } - virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); } - virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } + virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { + return impl.hasDecompBoundaryBefore(c); + } + virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { + return impl.hasDecompBoundaryAfter(c); + } + virtual UBool isInert(UChar32 c) const U_OVERRIDE { + return impl.isDecompInert(c); + } }; class ComposeNormalizer2 : public Normalizer2WithImpl { @@ -321,24 +351,30 @@ class FCDNormalizer2 : public Normalizer2WithImpl { private: virtual void normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { + ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { impl.makeFCD(src, limit, &buffer, errorCode); } using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. virtual void normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { + ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); } virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE { return impl.makeFCD(src, limit, NULL, errorCode); } using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } - virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } - virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } + virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { + return impl.hasFCDBoundaryBefore(c); + } + virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { + return impl.hasFCDBoundaryAfter(c); + } + virtual UBool isInert(UChar32 c) const U_OVERRIDE { + return impl.isFCDInert(c); + } }; struct Norm2AllModes : public UMemory { diff --git a/deps/icu-small/source/common/normalizer2impl.cpp b/deps/icu-small/source/common/normalizer2impl.cpp index cbf6b4d980450a..c0ad5c69f3e07d 100644 --- a/deps/icu-small/source/common/normalizer2impl.cpp +++ b/deps/icu-small/source/common/normalizer2impl.cpp @@ -731,9 +731,131 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16, return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode); } +// Dual functionality: +// sink != nullptr: normalize +// sink == nullptr: isNormalized/spanQuickCheckYes +const uint8_t * +Normalizer2Impl::decomposeUTF8(uint32_t options, + const uint8_t *src, const uint8_t *limit, + ByteSink *sink, Edits *edits, UErrorCode &errorCode) const { + U_ASSERT(limit != nullptr); + UnicodeString s16; + uint8_t minNoLead = leadByteForCP(minDecompNoCP); + + const uint8_t *prevBoundary = src; + // only for quick check + uint8_t prevCC = 0; + + for (;;) { + // Fast path: Scan over a sequence of characters below the minimum "no" code point, + // or with (decompYes && ccc==0) properties. + const uint8_t *fastStart = src; + const uint8_t *prevSrc; + uint16_t norm16 = 0; + + for (;;) { + if (src == limit) { + if (prevBoundary != limit && sink != nullptr) { + ByteSinkUtil::appendUnchanged(prevBoundary, limit, + *sink, options, edits, errorCode); + } + return src; + } + if (*src < minNoLead) { + ++src; + } else { + prevSrc = src; + UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); + if (!isMostDecompYesAndZeroCC(norm16)) { + break; + } + } + } + // isMostDecompYesAndZeroCC(norm16) is false, that is, norm16>=minYesNo, + // and the current character at [prevSrc..src[ is not a common case with cc=0 + // (MIN_NORMAL_MAYBE_YES or JAMO_VT). + // It could still be a maybeYes with cc=0. + if (prevSrc != fastStart) { + // The fast path looped over yes/0 characters before the current one. + if (sink != nullptr && + !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, + *sink, options, edits, errorCode)) { + break; + } + prevBoundary = prevSrc; + prevCC = 0; + } + + // Medium-fast path: Quick check. + if (isMaybeOrNonZeroCC(norm16)) { + // Does not decompose. + uint8_t cc = getCCFromYesOrMaybe(norm16); + if (prevCC <= cc || cc == 0) { + prevCC = cc; + if (cc <= 1) { + if (sink != nullptr && + !ByteSinkUtil::appendUnchanged(prevBoundary, src, + *sink, options, edits, errorCode)) { + break; + } + prevBoundary = src; + } + continue; + } + } + if (sink == nullptr) { + return prevBoundary; // quick check: "no" or cc out of order + } + + // Slow path + // Decompose up to and including the current character. + if (prevBoundary != prevSrc && norm16HasDecompBoundaryBefore(norm16)) { + if (!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, + *sink, options, edits, errorCode)) { + break; + } + prevBoundary = prevSrc; + } + ReorderingBuffer buffer(*this, s16, errorCode); + if (U_FAILURE(errorCode)) { + break; + } + decomposeShort(prevBoundary, src, STOP_AT_LIMIT, FALSE /* onlyContiguous */, + buffer, errorCode); + // Decompose until the next boundary. + if (buffer.getLastCC() > 1) { + src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, FALSE /* onlyContiguous */, + buffer, errorCode); + } + if (U_FAILURE(errorCode)) { + break; + } + if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals() + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + break; + } + // We already know there was a change if the original character decomposed; + // otherwise compare. + if (isMaybeOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) { + if (!ByteSinkUtil::appendUnchanged(prevBoundary, src, + *sink, options, edits, errorCode)) { + break; + } + } else { + if (!ByteSinkUtil::appendChange(prevBoundary, src, buffer.getStart(), buffer.length(), + *sink, edits, errorCode)) { + break; + } + } + prevBoundary = src; + prevCC = 0; + } + return src; +} + const uint8_t * Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, - UBool stopAtCompBoundary, UBool onlyContiguous, + StopAt stopAt, UBool onlyContiguous, ReorderingBuffer &buffer, UErrorCode &errorCode) const { if (U_FAILURE(errorCode)) { return nullptr; @@ -746,21 +868,28 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, UChar32 c = U_SENTINEL; if (norm16 >= limitNoNo) { if (isMaybeOrNonZeroCC(norm16)) { - // No boundaries around this character. + // No comp boundaries around this character. + uint8_t cc = getCCFromYesOrMaybe(norm16); + if (cc == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) { + return prevSrc; + } c = codePointFromValidUTF8(prevSrc, src); - if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) { + if (!buffer.append(c, cc, errorCode)) { return nullptr; } + if (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1) { + return src; + } continue; } // Maps to an isCompYesAndZeroCC. - if (stopAtCompBoundary) { + if (stopAt != STOP_AT_LIMIT) { return prevSrc; } c = codePointFromValidUTF8(prevSrc, src); c = mapAlgorithmic(c, norm16); norm16 = getRawNorm16(c); - } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) { + } else if (stopAt != STOP_AT_LIMIT && norm16 < minNoNoCompNoMaybeCC) { return prevSrc; } // norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8. @@ -768,7 +897,8 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, // its norm16==INERT is normalization-inert, // so it gets copied unchanged in the fast path, // and we stop the slow path where invalid UTF-8 begins. - U_ASSERT(norm16 != INERT); + // c >= 0 is the result of an algorithmic mapping. + U_ASSERT(c >= 0 || norm16 != INERT); if (norm16 < minYesNo) { if (c < 0) { c = codePointFromValidUTF8(prevSrc, src); @@ -798,11 +928,15 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, } else { leadCC = 0; } + if (leadCC == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) { + return prevSrc; + } if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) { return nullptr; } } - if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + if ((stopAt == STOP_AT_COMP_BOUNDARY && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) || + (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1)) { return src; } } @@ -1954,10 +2088,10 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous, break; } // We know there is not a boundary here. - decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous, + decomposeShort(prevSrc, src, STOP_AT_LIMIT, onlyContiguous, buffer, errorCode); // Decompose until the next boundary. - src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous, + src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous, buffer, errorCode); if (U_FAILURE(errorCode)) { break; diff --git a/deps/icu-small/source/common/normalizer2impl.h b/deps/icu-small/source/common/normalizer2impl.h index 4218a30a3452df..bdb6767a925c90 100644 --- a/deps/icu-small/source/common/normalizer2impl.h +++ b/deps/icu-small/source/common/normalizer2impl.h @@ -491,6 +491,12 @@ class U_COMMON_API Normalizer2Impl : public UObject { UnicodeString &safeMiddle, ReorderingBuffer &buffer, UErrorCode &errorCode) const; + + /** sink==nullptr: isNormalized()/spanQuickCheckYes() */ + const uint8_t *decomposeUTF8(uint32_t options, + const uint8_t *src, const uint8_t *limit, + ByteSink *sink, Edits *edits, UErrorCode &errorCode) const; + UBool compose(const UChar *src, const UChar *limit, UBool onlyContiguous, UBool doCompose, @@ -649,6 +655,9 @@ class U_COMMON_API Normalizer2Impl : public UObject { UChar32 minNeedDataCP, ReorderingBuffer *buffer, UErrorCode &errorCode) const; + + enum StopAt { STOP_AT_LIMIT, STOP_AT_DECOMP_BOUNDARY, STOP_AT_COMP_BOUNDARY }; + const UChar *decomposeShort(const UChar *src, const UChar *limit, UBool stopAtCompBoundary, UBool onlyContiguous, ReorderingBuffer &buffer, UErrorCode &errorCode) const; @@ -656,7 +665,7 @@ class U_COMMON_API Normalizer2Impl : public UObject { ReorderingBuffer &buffer, UErrorCode &errorCode) const; const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit, - UBool stopAtCompBoundary, UBool onlyContiguous, + StopAt stopAt, UBool onlyContiguous, ReorderingBuffer &buffer, UErrorCode &errorCode) const; static int32_t combine(const uint16_t *list, UChar32 trail); diff --git a/deps/icu-small/source/common/pluralmap.h b/deps/icu-small/source/common/pluralmap.h index d898ac4671f797..2a14a07af1fcdf 100644 --- a/deps/icu-small/source/common/pluralmap.h +++ b/deps/icu-small/source/common/pluralmap.h @@ -24,7 +24,7 @@ class U_COMMON_API PluralMapBase : public UMemory { public: /** * The names of all the plural categories. NONE is not an actual plural - * category, but rather represents the absense of a plural category. + * category, but rather represents the absence of a plural category. */ enum Category { NONE = -1, diff --git a/deps/icu-small/source/common/putil.cpp b/deps/icu-small/source/common/putil.cpp index 3ed6a05d22d839..ffcbbcce59b401 100644 --- a/deps/icu-small/source/common/putil.cpp +++ b/deps/icu-small/source/common/putil.cpp @@ -1139,7 +1139,7 @@ uprv_tzname(int n) #endif if (tzid != NULL && isValidOlsonID(tzid) #if U_PLATFORM == U_PF_SOLARIS - /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ + /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */ && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 #endif ) { @@ -1361,7 +1361,7 @@ uprv_pathIsAbsolute(const char *path) /* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR (needed for some Darwin ICU build environments) */ -#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR +#if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" # endif diff --git a/deps/icu-small/source/common/putilimp.h b/deps/icu-small/source/common/putilimp.h index a325c6c359ad26..5b95a68418c428 100644 --- a/deps/icu-small/source/common/putilimp.h +++ b/deps/icu-small/source/common/putilimp.h @@ -527,7 +527,7 @@ U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base); * on the destination pointer and capacity cannot overflow. * * The pinned capacity must fulfill the following conditions (for positive capacities): - * - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.) + * - dest + capacity is a valid pointer according to the machine architecture (AS/400, 64-bit, etc.) * - (dest + capacity) >= dest * - The size (in bytes) of T[capacity] does not exceed 0x7fffffff * diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp index 9b7e70c3cf419f..b821ca44639369 100644 --- a/deps/icu-small/source/common/rbbi.cpp +++ b/deps/icu-small/source/common/rbbi.cpp @@ -812,7 +812,7 @@ int32_t RuleBasedBreakIterator::handleNext() { } #endif - // handleNext alway sets the break tag value. + // handleNext always sets the break tag value. // Set the default for it. fRuleStatusIndex = 0; diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp index 63ff3001c7034e..44f19d86973d75 100644 --- a/deps/icu-small/source/common/rbbi_cache.cpp +++ b/deps/icu-small/source/common/rbbi_cache.cpp @@ -258,7 +258,7 @@ void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode previous(status); } else { // seek() leaves the BreakCache positioned at the preceding boundary - // if the requested position is between two bounaries. + // if the requested position is between two boundaries. // current() pushes the BreakCache position out to the BreakIterator itself. U_ASSERT(startPos > fTextIdx); current(); diff --git a/deps/icu-small/source/common/rbbiscan.cpp b/deps/icu-small/source/common/rbbiscan.cpp index 947a07304fd8b1..7838475290b935 100644 --- a/deps/icu-small/source/common/rbbiscan.cpp +++ b/deps/icu-small/source/common/rbbiscan.cpp @@ -284,7 +284,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) case doEndAssign: { - // We have reached the end of an assignement statement. + // We have reached the end of an assignment statement. // Current scan char is the ';' that terminates the assignment. // Terminate expression, leaves expression parse tree rooted in TOS node. @@ -856,6 +856,10 @@ UChar32 RBBIRuleScanner::nextCharLL() { return (UChar32)-1; } ch = fRB->fRules.char32At(fNextIndex); + if (U_IS_SURROGATE(ch)) { + error(U_ILLEGAL_CHAR_FOUND); + return U_SENTINEL; + } fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1); if (ch == chCR || diff --git a/deps/icu-small/source/common/rbbitblb.cpp b/deps/icu-small/source/common/rbbitblb.cpp index cbd8f315c252d8..4bc8097886929e 100644 --- a/deps/icu-small/source/common/rbbitblb.cpp +++ b/deps/icu-small/source/common/rbbitblb.cpp @@ -151,7 +151,7 @@ void RBBITableBuilder::buildForwardTable() { // // calculate the functions nullable, firstpos, lastpos and followpos on // nodes in the parse tree. - // See the alogrithm description in Aho. + // See the algorithm description in Aho. // Understanding how this works by looking at the code alone will be // nearly impossible. // diff --git a/deps/icu-small/source/common/resource.h b/deps/icu-small/source/common/resource.h index 3795694412a058..48f5b9fa6ec7cc 100644 --- a/deps/icu-small/source/common/resource.h +++ b/deps/icu-small/source/common/resource.h @@ -274,8 +274,10 @@ class U_COMMON_API ResourceSink : public UObject { * * @param key The key string of the enumeration-start resource. * Empty if the enumeration starts at the top level of the bundle. - * @param value Call getArray() or getTable() as appropriate. - * Then reuse for output values from Array and Table getters. + * @param value Call getArray() or getTable() as appropriate. Then reuse for + * output values from Array and Table getters. Note: ResourceTable and + * ResourceArray instances must outlive the ResourceValue instance for + * ResourceTracer to be happy. * @param noFallback true if the bundle has no parent; * that is, its top-level table has the nofallback attribute, * or it is the root bundle of a locale tree. diff --git a/deps/icu-small/source/common/restrace.cpp b/deps/icu-small/source/common/restrace.cpp index 5c6498850e2f8f..1f83372d682a7f 100644 --- a/deps/icu-small/source/common/restrace.cpp +++ b/deps/icu-small/source/common/restrace.cpp @@ -54,6 +54,9 @@ void ResourceTracer::traceOpen() const { CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const { if (fResB) { + // Note: if you get a segfault around here, check that ResourceTable and + // ResourceArray instances outlive ResourceValue instances referring to + // their contents: output.append(fResB->fData->fPath, status); output.append('/', status); output.append(fResB->fData->fName, status); diff --git a/deps/icu-small/source/common/servnotf.h b/deps/icu-small/source/common/servnotf.h index 7918a672473b10..340496e85c61fd 100644 --- a/deps/icu-small/source/common/servnotf.h +++ b/deps/icu-small/source/common/servnotf.h @@ -82,7 +82,7 @@ private: UVector* listeners; /** * Add a listener to be notified when notifyChanged is called. * The listener must not be null. AcceptsListener must return - * true for the listener. Attempts to concurrently + * true for the listener. Attempts to concurrently * register the identical listener more than once will be * silently ignored. */ @@ -90,7 +90,7 @@ private: UVector* listeners; /** * Stop notifying this listener. The listener must - * not be null. Attemps to remove a listener that is + * not be null. Attempts to remove a listener that is * not registered will be silently ignored. */ virtual void removeListener(const EventListener* l, UErrorCode& status); diff --git a/deps/icu-small/source/common/ubrk.cpp b/deps/icu-small/source/common/ubrk.cpp index f8bdf5a6b65822..bb5bdd1b5012fb 100644 --- a/deps/icu-small/source/common/ubrk.cpp +++ b/deps/icu-small/source/common/ubrk.cpp @@ -174,6 +174,18 @@ ubrk_safeClone( return (UBreakIterator *)newBI; } +U_CAPI UBreakIterator * U_EXPORT2 +ubrk_clone(const UBreakIterator *bi, UErrorCode *status) { + if (U_FAILURE(*status)) { + return nullptr; + } + BreakIterator *newBI = ((BreakIterator *)bi)->clone(); + if (newBI == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return (UBreakIterator *)newBI; +} U_CAPI void U_EXPORT2 diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp index c2180629fb436a..7f779441975d23 100644 --- a/deps/icu-small/source/common/ucase.cpp +++ b/deps/icu-small/source/common/ucase.cpp @@ -681,7 +681,7 @@ ucase_isCaseSensitive(UChar32 c) { * - In [CoreProps], C has one of the properties Uppercase, or Lowercase * - Given D = NFD(C), then it is not the case that: * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D) - * (This third criterium does not add any characters to the list + * (This third criterion does not add any characters to the list * for Unicode 3.2. Ignored.) * * D2. A character C is defined to be case-ignorable diff --git a/deps/icu-small/source/common/uchar.cpp b/deps/icu-small/source/common/uchar.cpp index f02ae530ccf6d8..f43f9f024a1300 100644 --- a/deps/icu-small/source/common/uchar.cpp +++ b/deps/icu-small/source/common/uchar.cpp @@ -194,7 +194,7 @@ u_isISOControl(UChar32 c) { /* Some control characters that are used as space. */ #define IS_THAT_CONTROL_SPACE(c) \ - (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) + (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==0x85)) /* Java has decided that U+0085 New Line is not whitespace any more. */ #define IS_THAT_ASCII_CONTROL_SPACE(c) \ @@ -677,14 +677,14 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { sa->add(sa->set, CR+1); /* range TAB..CR */ sa->add(sa->set, 0x1c); sa->add(sa->set, 0x1f+1); - USET_ADD_CP_AND_NEXT(sa, NL); + USET_ADD_CP_AND_NEXT(sa, 0x85); // NEXT LINE (NEL) /* add for u_isIDIgnorable() what was not added above */ - sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ + sa->add(sa->set, 0x7f); /* range DEL..NBSP-1, NBSP added below */ sa->add(sa->set, HAIRSP); sa->add(sa->set, RLM+1); - sa->add(sa->set, INHSWAP); - sa->add(sa->set, NOMDIG+1); + sa->add(sa->set, 0x206a); // INHIBIT SYMMETRIC SWAPPING + sa->add(sa->set, 0x206f+1); // NOMINAL DIGIT SHAPES USET_ADD_CP_AND_NEXT(sa, ZWNBSP); /* add no-break spaces for u_isWhitespace() what was not added above */ @@ -693,23 +693,25 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { USET_ADD_CP_AND_NEXT(sa, NNBSP); /* add for u_digit() */ - sa->add(sa->set, U_a); - sa->add(sa->set, U_z+1); - sa->add(sa->set, U_A); - sa->add(sa->set, U_Z+1); - sa->add(sa->set, U_FW_a); - sa->add(sa->set, U_FW_z+1); - sa->add(sa->set, U_FW_A); - sa->add(sa->set, U_FW_Z+1); + sa->add(sa->set, u'a'); + sa->add(sa->set, u'z'+1); + sa->add(sa->set, u'A'); + sa->add(sa->set, u'Z'+1); + // fullwidth + sa->add(sa->set, u'a'); + sa->add(sa->set, u'z'+1); + sa->add(sa->set, u'A'); + sa->add(sa->set, u'Z'+1); /* add for u_isxdigit() */ - sa->add(sa->set, U_f+1); - sa->add(sa->set, U_F+1); - sa->add(sa->set, U_FW_f+1); - sa->add(sa->set, U_FW_F+1); + sa->add(sa->set, u'f'+1); + sa->add(sa->set, u'F'+1); + // fullwidth + sa->add(sa->set, u'f'+1); + sa->add(sa->set, u'F'+1); /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ - sa->add(sa->set, WJ); /* range WJ..NOMDIG */ + sa->add(sa->set, 0x2060); /* range 2060..206f */ sa->add(sa->set, 0xfff0); sa->add(sa->set, 0xfffb+1); sa->add(sa->set, 0xe0000); diff --git a/deps/icu-small/source/common/ucnv2022.cpp b/deps/icu-small/source/common/ucnv2022.cpp index 89db9e81ddea06..c274ebe41982f2 100644 --- a/deps/icu-small/source/common/ucnv2022.cpp +++ b/deps/icu-small/source/common/ucnv2022.cpp @@ -820,7 +820,7 @@ getKey_2022(char c,int32_t* key,int32_t* offset){ return INVALID_2022; } -/*runs through a state machine to determine the escape sequence - codepage correspondance +/*runs through a state machine to determine the escape sequence - codepage correspondence */ static void changeState_2022(UConverter* _this, @@ -1424,7 +1424,7 @@ toUnicodeCallback(UConverter *cnv, * KSC5601 : alias to ibm-949 mapping table * GB2312 : alias to ibm-1386 mapping table * ISO-8859-1 : Algorithmic implemented as LATIN1 case -* ISO-8859-7 : alisas to ibm-9409 mapping table +* ISO-8859-7 : alias to ibm-9409 mapping table */ /* preference order of JP charsets */ @@ -2324,7 +2324,7 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, /*************************************************************** * Rules for ISO-2022-KR encoding * i) The KSC5601 designator sequence should appear only once in a file, -* at the begining of a line before any KSC5601 characters. This usually +* at the beginning of a line before any KSC5601 characters. This usually * means that it appears by itself on the first line of the file * ii) There are only 2 shifting sequences SO to shift into double byte mode * and SI to shift into single byte mode diff --git a/deps/icu-small/source/common/ucnv_bld.cpp b/deps/icu-small/source/common/ucnv_bld.cpp index 0e198892f1bdfb..d08eec73696f94 100644 --- a/deps/icu-small/source/common/ucnv_bld.cpp +++ b/deps/icu-small/source/common/ucnv_bld.cpp @@ -427,7 +427,7 @@ getAlgorithmicTypeFromName(const char *realName) #define UCNV_CACHE_LOAD_FACTOR 2 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ -/* Will always be called with the cnvCacheMutex alrady being held */ +/* Will always be called with the cnvCacheMutex already being held */ /* by the calling function. */ /* Stores the shared data in the SHARED_DATA_HASHTABLE * @param data The shared data diff --git a/deps/icu-small/source/common/ucnv_err.cpp b/deps/icu-small/source/common/ucnv_err.cpp index 63794d2334f62b..7d9ac46506f8fe 100644 --- a/deps/icu-small/source/common/ucnv_err.cpp +++ b/deps/icu-small/source/common/ucnv_err.cpp @@ -321,7 +321,7 @@ UCNV_FROM_U_CALLBACK_ESCAPE ( case UCNV_PRV_ESCAPE_CSS2: valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); - /* Always add space character, becase the next character might be whitespace, + /* Always add space character, because the next character might be whitespace, which would erroneously be considered the termination of the escape sequence. */ valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; break; diff --git a/deps/icu-small/source/common/ucnv_lmb.cpp b/deps/icu-small/source/common/ucnv_lmb.cpp index 5e7cfde353d7e3..bead464afd4a14 100644 --- a/deps/icu-small/source/common/ucnv_lmb.cpp +++ b/deps/icu-small/source/common/ucnv_lmb.cpp @@ -81,7 +81,7 @@ [G] D1 [D2] That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2 - data bytes. The maximum size of a LMBCS chjaracter is 3 bytes: + data bytes. The maximum size of a LMBCS character is 3 bytes: */ #define ULMBCS_CHARSIZE_MAX 3 /* @@ -164,7 +164,7 @@ beginning of internal 'system' range names: */ /* Then we needed a place to put all the other ansi control characters that must be moved to different values because LMBCS reserves those values for other purposes. To represent the control characters, we start -with a first byte of 0xF & add the control chaarcter value as the +with a first byte of 0xF & add the control character value as the second byte */ #define ULMBCS_GRP_CTRL 0x0F diff --git a/deps/icu-small/source/common/ucnv_u7.cpp b/deps/icu-small/source/common/ucnv_u7.cpp index ec7befe9fc9a78..a5a9485b73f36b 100644 --- a/deps/icu-small/source/common/ucnv_u7.cpp +++ b/deps/icu-small/source/common/ucnv_u7.cpp @@ -814,7 +814,7 @@ const UConverterSharedData _UTF7Data= * the use of "~" in some servers as a home directory indicator. * * 5) UTF-7 permits multiple alternate forms to represent the same - * string; in particular, printable US-ASCII chararacters can be + * string; in particular, printable US-ASCII characters can be * represented in encoded form. * * In modified UTF-7, printable US-ASCII characters except for "&" diff --git a/deps/icu-small/source/common/ucnvisci.cpp b/deps/icu-small/source/common/ucnvisci.cpp index c1ab06e137ee22..4d2815a3a652b3 100644 --- a/deps/icu-small/source/common/ucnvisci.cpp +++ b/deps/icu-small/source/common/ucnvisci.cpp @@ -992,7 +992,7 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { if (sourceChar == PNJ_TIPPI) { - /* Make sure Tippi is converterd to Bindi. */ + /* Make sure Tippi is converted to Bindi. */ sourceChar = PNJ_BINDI; } else if (sourceChar == PNJ_ADHAK) { /* This is for consonant cluster handling. */ @@ -1147,7 +1147,7 @@ static const uint16_t lookupTable[][2]={ /* is the code point valid in current script? */ \ if(sourceChar> ASCII_END && \ (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ - /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ + /* Vocallic RR is assigned in ISCII Telugu and Unicode */ \ if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ targetUniChar!=VOCALLIC_RR){ \ targetUniChar=missingCharMarker; \ @@ -1272,7 +1272,7 @@ UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCo goto CALLBACK; } else if (*contextCharToUnicode==ISCII_INV) { if (sourceChar==ISCII_HALANT) { - targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ + targetUniChar = 0x0020; /* replace with space according to Indic FAQ */ } else { targetUniChar = ZWJ; } diff --git a/deps/icu-small/source/common/ucurr.cpp b/deps/icu-small/source/common/ucurr.cpp index 8d213dfcfcb516..d38ce3d604d15d 100644 --- a/deps/icu-small/source/common/ucurr.cpp +++ b/deps/icu-small/source/common/ucurr.cpp @@ -844,7 +844,7 @@ typedef struct { #endif -// Comparason function used in quick sort. +// Comparison function used in quick sort. static int U_CALLCONV currencyNameComparator(const void* a, const void* b) { const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a; const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b; @@ -1530,7 +1530,7 @@ uprv_parseCurrency(const char* locale, int32_t max = 0; int32_t matchIndex = -1; - // case in-sensitive comparision against currency names + // case in-sensitive comparison against currency names searchCurrencyName(currencyNames, total_currency_name_count, upperText, textLen, partialMatchLen, &max, &matchIndex); diff --git a/deps/icu-small/source/common/uhash.cpp b/deps/icu-small/source/common/uhash.cpp index 86311ceb0b25d1..67c7c363540307 100644 --- a/deps/icu-small/source/common/uhash.cpp +++ b/deps/icu-small/source/common/uhash.cpp @@ -133,8 +133,10 @@ static const float RESIZE_POLICY_RATIO_TABLE[6] = { * or a pointer. If a hint bit is zero, then the associated * token is assumed to be an integer. */ +#define HINT_BOTH_INTEGERS (0) #define HINT_KEY_POINTER (1) #define HINT_VALUE_POINTER (2) +#define HINT_ALLOW_ZERO (4) /******************************************************************** * PRIVATE Implementation @@ -479,8 +481,9 @@ _uhash_put(UHashtable *hash, goto err; } U_ASSERT(hash != NULL); - /* Cannot always check pointer here or iSeries sees NULL every time. */ - if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) { + if ((hint & HINT_VALUE_POINTER) ? + value.pointer == NULL : + value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) { /* Disallow storage of NULL values, since NULL is returned by * get() to indicate an absent key. Storing NULL == removing. */ @@ -687,6 +690,28 @@ uhash_igeti(const UHashtable *hash, return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; } +U_CAPI int32_t U_EXPORT2 +uhash_getiAndFound(const UHashtable *hash, + const void *key, + UBool *found) { + UHashTok keyholder; + keyholder.pointer = (void *)key; + const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + *found = !IS_EMPTY_OR_DELETED(e->hashcode); + return e->value.integer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_igetiAndFound(const UHashtable *hash, + int32_t key, + UBool *found) { + UHashTok keyholder; + keyholder.integer = key; + const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + *found = !IS_EMPTY_OR_DELETED(e->hashcode); + return e->value.integer; +} + U_CAPI void* U_EXPORT2 uhash_put(UHashtable *hash, void* key, @@ -736,7 +761,34 @@ uhash_iputi(UHashtable *hash, keyholder.integer = key; valueholder.integer = value; return _uhash_put(hash, keyholder, valueholder, - 0, /* neither is a ptr */ + HINT_BOTH_INTEGERS, + status).integer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_putiAllowZero(UHashtable *hash, + void *key, + int32_t value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.pointer = key; + valueholder.integer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_KEY_POINTER | HINT_ALLOW_ZERO, + status).integer; +} + + +U_CAPI int32_t U_EXPORT2 +uhash_iputiAllowZero(UHashtable *hash, + int32_t key, + int32_t value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.integer = key; + valueholder.integer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO, status).integer; } @@ -785,6 +837,29 @@ uhash_removeAll(UHashtable *hash) { U_ASSERT(hash->count == 0); } +U_CAPI UBool U_EXPORT2 +uhash_containsKey(const UHashtable *hash, const void *key) { + UHashTok keyholder; + keyholder.pointer = (void *)key; + const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + return !IS_EMPTY_OR_DELETED(e->hashcode); +} + +/** + * Returns true if the UHashtable contains an item with this integer key. + * + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @return true if the key is found. + */ +U_CAPI UBool U_EXPORT2 +uhash_icontainsKey(const UHashtable *hash, int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + return !IS_EMPTY_OR_DELETED(e->hashcode); +} + U_CAPI const UHashElement* U_EXPORT2 uhash_find(const UHashtable *hash, const void* key) { UHashTok keyholder; diff --git a/deps/icu-small/source/common/uhash.h b/deps/icu-small/source/common/uhash.h index b59d2711bb29d0..af75999860dbb4 100644 --- a/deps/icu-small/source/common/uhash.h +++ b/deps/icu-small/source/common/uhash.h @@ -23,7 +23,7 @@ /** * UHashtable stores key-value pairs and does moderately fast lookup * based on keys. It provides a good tradeoff between access time and - * storage space. As elements are added to it, it grows to accomodate + * storage space. As elements are added to it, it grows to accommodate * them. By default, the table never shrinks, even if all elements * are removed from it. * @@ -54,6 +54,13 @@ * uhash_remove() on that key. This keeps uhash_get(), uhash_count(), * and uhash_nextElement() consistent with one another. * + * Keys and values can be integers. + * Functions that work with an integer key have an "i" prefix. + * Functions that work with an integer value have an "i" suffix. + * As with putting a NULL value pointer, putting a zero value integer removes the item. + * Except, there are pairs of functions that allow setting zero values + * and fetching (value, found) pairs. + * * To see everything in a hashtable, use uhash_nextElement() to * iterate through its contents. Each call to this function returns a * UHashElement pointer. A hash element contains a key, value, and @@ -405,6 +412,44 @@ uhash_iputi(UHashtable *hash, int32_t value, UErrorCode *status); +/** + * Put a (key=pointer, value=integer) item in a UHashtable. If the + * keyDeleter is non-NULL, then the hashtable owns 'key' after this + * call. valueDeleter must be NULL. + * Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero. + * + * @param hash The target UHashtable. + * @param key The key to store. + * @param value The integer value to store. + * @param status A pointer to an UErrorCode to receive any errors. + * @return The previous value, or 0 if none. + * @see uhash_getiAndFound + */ +U_CAPI int32_t U_EXPORT2 +uhash_putiAllowZero(UHashtable *hash, + void *key, + int32_t value, + UErrorCode *status); + +/** + * Put a (key=integer, value=integer) item in a UHashtable. If the + * keyDeleter is non-NULL, then the hashtable owns 'key' after this + * call. valueDeleter must be NULL. + * Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero. + * + * @param hash The target UHashtable. + * @param key The key to store. + * @param value The integer value to store. + * @param status A pointer to an UErrorCode to receive any errors. + * @return The previous value, or 0 if none. + * @see uhash_igetiAndFound + */ +U_CAPI int32_t U_EXPORT2 +uhash_iputiAllowZero(UHashtable *hash, + int32_t key, + int32_t value, + UErrorCode *status); + /** * Retrieve a pointer value from a UHashtable using a pointer key, * as previously stored by uhash_put(). @@ -448,6 +493,34 @@ U_CAPI int32_t U_EXPORT2 uhash_igeti(const UHashtable *hash, int32_t key); +/** + * Retrieves an integer value from a UHashtable using a pointer key, + * as previously stored by uhash_putiAllowZero() or uhash_puti(). + * + * @param hash The target UHashtable. + * @param key A pointer key stored in a hashtable + * @param found A pointer to a boolean which will be set for whether the key was found. + * @return The requested item, or 0 if not found. + */ +U_CAPI int32_t U_EXPORT2 +uhash_getiAndFound(const UHashtable *hash, + const void *key, + UBool *found); + +/** + * Retrieves an integer value from a UHashtable using an integer key, + * as previously stored by uhash_iputiAllowZero() or uhash_iputi(). + * + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @param found A pointer to a boolean which will be set for whether the key was found. + * @return The requested item, or 0 if not found. + */ +U_CAPI int32_t U_EXPORT2 +uhash_igetiAndFound(const UHashtable *hash, + int32_t key, + UBool *found); + /** * Remove an item from a UHashtable stored by uhash_put(). * @param hash The target UHashtable. @@ -495,6 +568,26 @@ uhash_iremovei(UHashtable *hash, U_CAPI void U_EXPORT2 uhash_removeAll(UHashtable *hash); +/** + * Returns true if the UHashtable contains an item with this pointer key. + * + * @param hash The target UHashtable. + * @param key A pointer key stored in a hashtable + * @return true if the key is found. + */ +U_CAPI UBool U_EXPORT2 +uhash_containsKey(const UHashtable *hash, const void *key); + +/** + * Returns true if the UHashtable contains an item with this integer key. + * + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @return true if the key is found. + */ +U_CAPI UBool U_EXPORT2 +uhash_icontainsKey(const UHashtable *hash, int32_t key); + /** * Locate an element of a UHashtable. The caller must not modify the * returned object. The primary use of this function is to obtain the diff --git a/deps/icu-small/source/common/uloc.cpp b/deps/icu-small/source/common/uloc.cpp index ebfbb506508c1b..d96e79b8fdd805 100644 --- a/deps/icu-small/source/common/uloc.cpp +++ b/deps/icu-small/source/common/uloc.cpp @@ -143,7 +143,7 @@ static const char * const LANGUAGES[] = { "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga", "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", - "ml", "mn", "mnc", "mni", "mo", + "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj", "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv", "my", "mye", "myv", "mzn", @@ -166,9 +166,9 @@ static const char * const LANGUAGES[] = { "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr", "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux", - "sv", "sw", "swb", "swc", "syc", "syr", "szl", + "sv", "sw", "swb", "syc", "syr", "szl", "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg", - "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl", + "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi", "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt", "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm", @@ -181,7 +181,7 @@ static const char * const LANGUAGES[] = { "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu", "zun", "zxx", "zza", NULL, - "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ + "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", /* obsolete language codes */ NULL }; @@ -260,7 +260,7 @@ static const char * const LANGUAGES_3[] = { "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga", "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", - "mal", "mon", "mnc", "mni", "mol", + "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj", "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv", "mya", "mye", "myv", "mzn", @@ -283,9 +283,9 @@ static const char * const LANGUAGES_3[] = { "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms", "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr", "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux", - "swe", "swa", "swb", "swc", "syc", "syr", "szl", + "swe", "swa", "swb", "syc", "syr", "szl", "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk", - "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl", + "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi", "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt", "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm", @@ -298,8 +298,8 @@ static const char * const LANGUAGES_3[] = { "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul", "zun", "zxx", "zza", NULL, -/* "in", "iw", "ji", "jw", "sh", */ - "ind", "heb", "yid", "jaw", "srp", +/* "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", */ + "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl", NULL }; @@ -334,13 +334,13 @@ static const char * const COUNTRIES[] = { "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", - "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", - "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", + "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", + "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", - "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", + "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", @@ -357,7 +357,7 @@ static const char * const COUNTRIES[] = { "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", - "WS", "YE", "YT", "ZA", "ZM", "ZW", + "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", NULL, "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ NULL @@ -397,10 +397,10 @@ static const char * const COUNTRIES_3[] = { "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", -/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ - "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", -/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ - "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", +/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */ + "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK", +/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */ + "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI", /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ @@ -409,8 +409,8 @@ static const char * const COUNTRIES_3[] = { "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", -/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ - "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", +/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ + "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ @@ -443,8 +443,8 @@ static const char * const COUNTRIES_3[] = { "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", -/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ - "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", +/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */ + "WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE", NULL, /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", diff --git a/deps/icu-small/source/common/uloc_keytype.cpp b/deps/icu-small/source/common/uloc_keytype.cpp index 25f35b5ced42d5..f45aeb53d2a6ba 100644 --- a/deps/icu-small/source/common/uloc_keytype.cpp +++ b/deps/icu-small/source/common/uloc_keytype.cpp @@ -271,7 +271,7 @@ initFromResourceBundle(UErrorCode& sts) { if (U_FAILURE(sts)) { break; } - // check if this is an alias of canoncal legacy type + // check if this is an alias of canonical legacy type if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) { const char* from = ures_getKey(typeAliasDataEntry.getAlias()); if (isTZ) { diff --git a/deps/icu-small/source/common/uloc_tag.cpp b/deps/icu-small/source/common/uloc_tag.cpp index d2d938753182d8..7cc4511175b081 100644 --- a/deps/icu-small/source/common/uloc_tag.cpp +++ b/deps/icu-small/source/common/uloc_tag.cpp @@ -129,7 +129,6 @@ static const char* const LEGACY[] = { // Legacy tags with no preferred value in the IANA // registry. Kept for now for the backward compatibility // because ICU has mapped them this way. - "cel-gaulish", "xtg-x-cel-gaulish", "i-default", "en-x-i-default", "i-enochian", "und-x-i-enochian", "i-mingo", "see-x-i-mingo", @@ -647,6 +646,22 @@ _isTKey(const char* s, int32_t len) return FALSE; } +U_CAPI const char * U_EXPORT2 +ultag_getTKeyStart(const char *localeID) { + const char *result = localeID; + const char *sep; + while((sep = uprv_strchr(result, SEP)) != nullptr) { + if (_isTKey(result, static_cast(sep - result))) { + return result; + } + result = ++sep; + } + if (_isTKey(result, -1)) { + return result; + } + return nullptr; +} + static UBool _isTValue(const char* s, int32_t len) { @@ -671,9 +686,13 @@ _isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len) const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here. const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end + + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } switch (state) { case kStart: - if (ultag_isLanguageSubtag(s, len)) { + if (ultag_isLanguageSubtag(s, len) && len != 4) { state = kGotLanguage; return TRUE; } @@ -1775,11 +1794,6 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) return; } - /* Determine if variants already exists */ - if (ultag_getVariantsSize(langtag)) { - posixVariant = TRUE; - } - n = ultag_getExtensionsSize(langtag); /* resolve locale keywords and reordering keys */ @@ -1787,6 +1801,11 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) key = ultag_getExtensionKey(langtag, i); type = ultag_getExtensionValue(langtag, i); if (*key == LDMLEXT) { + /* Determine if variants already exists */ + if (ultag_getVariantsSize(langtag)) { + posixVariant = TRUE; + } + _appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status); if (U_FAILURE(*status)) { break; @@ -2028,7 +2047,10 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta *status = U_MEMORY_ALLOCATION_ERROR; return NULL; } - uprv_memcpy(tagBuf, tag, tagLen); + + if (tagLen > 0) { + uprv_memcpy(tagBuf, tag, tagLen); + } *(tagBuf + tagLen) = 0; /* create a ULanguageTag */ @@ -2692,8 +2714,7 @@ ulocimp_toLanguageTag(const char* localeID, if (U_SUCCESS(tmpStatus)) { if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) { /* return private use only tag */ - static const char PREFIX[] = { PRIVATEUSE, SEP }; - sink.Append(PREFIX, sizeof(PREFIX)); + sink.Append("und-x-", 6); sink.Append(buf.data(), buf.length()); done = TRUE; } else if (strict) { diff --git a/deps/icu-small/source/common/ulocimp.h b/deps/icu-small/source/common/ulocimp.h index a686759f32e3e1..3875664b2d4132 100644 --- a/deps/icu-small/source/common/ulocimp.h +++ b/deps/icu-small/source/common/ulocimp.h @@ -286,6 +286,9 @@ ultag_isUnicodeLocaleType(const char* s, int32_t len); U_CFUNC UBool ultag_isVariantSubtags(const char* s, int32_t len); +U_CAPI const char * U_EXPORT2 +ultag_getTKeyStart(const char *localeID); + U_CFUNC const char* ulocimp_toBcpKey(const char* key); diff --git a/deps/icu-small/source/common/unicode/bytestream.h b/deps/icu-small/source/common/unicode/bytestream.h index 73d498397c744c..5f116910f46ed6 100644 --- a/deps/icu-small/source/common/unicode/bytestream.h +++ b/deps/icu-small/source/common/unicode/bytestream.h @@ -71,7 +71,6 @@ class U_COMMON_API ByteSink : public UMemory { */ virtual void Append(const char* bytes, int32_t n) = 0; -#ifndef U_HIDE_DRAFT_API /** * Appends n bytes to this. Same as Append(). * Call AppendU8() with u8"string literals" which are const char * in C++11 @@ -81,7 +80,7 @@ class U_COMMON_API ByteSink : public UMemory { * * @param bytes the pointer to the bytes * @param n the number of bytes; must be non-negative - * @draft ICU 67 + * @stable ICU 67 */ inline void AppendU8(const char* bytes, int32_t n) { Append(bytes, n); @@ -97,13 +96,12 @@ class U_COMMON_API ByteSink : public UMemory { * * @param bytes the pointer to the bytes * @param n the number of bytes; must be non-negative - * @draft ICU 67 + * @stable ICU 67 */ inline void AppendU8(const char8_t* bytes, int32_t n) { Append(reinterpret_cast(bytes), n); } #endif -#endif // U_HIDE_DRAFT_API /** * Returns a writable buffer for appending and writes the buffer's capacity to diff --git a/deps/icu-small/source/common/unicode/bytestrie.h b/deps/icu-small/source/common/unicode/bytestrie.h index 85f802df420262..271a81d1b4d7d4 100644 --- a/deps/icu-small/source/common/unicode/bytestrie.h +++ b/deps/icu-small/source/common/unicode/bytestrie.h @@ -30,6 +30,8 @@ #include "unicode/uobject.h" #include "unicode/ustringtrie.h" +class BytesTrieTest; + U_NAMESPACE_BEGIN class ByteSink; @@ -378,6 +380,7 @@ class U_COMMON_API BytesTrie : public UMemory { private: friend class BytesTrieBuilder; + friend class ::BytesTrieTest; /** * Constructs a BytesTrie reader instance. diff --git a/deps/icu-small/source/common/unicode/bytestriebuilder.h b/deps/icu-small/source/common/unicode/bytestriebuilder.h index eafe5a28c3ef05..1861fb8b3f9a89 100644 --- a/deps/icu-small/source/common/unicode/bytestriebuilder.h +++ b/deps/icu-small/source/common/unicode/bytestriebuilder.h @@ -30,6 +30,8 @@ #include "unicode/stringpiece.h" #include "unicode/stringtriebuilder.h" +class BytesTrieTest; + U_NAMESPACE_BEGIN class BytesTrieElement; @@ -125,6 +127,8 @@ class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { BytesTrieBuilder &clear(); private: + friend class ::BytesTrieTest; + BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator @@ -168,6 +172,7 @@ class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); virtual int32_t writeDeltaTo(int32_t jumpTarget); + static int32_t internalEncodeDelta(int32_t i, char intBytes[]); CharString *strings; // Pointer not object so we need not #include internal charstr.h. BytesTrieElement *elements; diff --git a/deps/icu-small/source/common/unicode/docmain.h b/deps/icu-small/source/common/unicode/docmain.h index 14491494c5ca7a..1b88eca92b9e9c 100644 --- a/deps/icu-small/source/common/unicode/docmain.h +++ b/deps/icu-small/source/common/unicode/docmain.h @@ -15,7 +15,7 @@ * \file * \brief (Non API- contains Doxygen definitions) * - * This file contains documentation for Doxygen and doesnot have + * This file contains documentation for Doxygen and does not have * any significance with respect to C or C++ API */ @@ -74,7 +74,7 @@ * * * Strings and Character Iteration - * ustring.h, utf8.h, utf16.h, UText, UCharIterator + * ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink * icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink * * @@ -128,9 +128,9 @@ * icu::Normalizer2 * * - * Calendars + * Calendars and Time Zones * ucal.h - * icu::Calendar + * icu::Calendar, icu::TimeZone * * * Date and Time Formatting diff --git a/deps/icu-small/source/common/unicode/icuplug.h b/deps/icu-small/source/common/unicode/icuplug.h index 2bd51ff8dfc050..f817679acf016c 100644 --- a/deps/icu-small/source/common/unicode/icuplug.h +++ b/deps/icu-small/source/common/unicode/icuplug.h @@ -117,14 +117,13 @@ /* === Basic types === */ #ifndef U_HIDE_INTERNAL_API +struct UPlugData; /** * @{ - * Opaque structure passed to/from a plugin. - * use the APIs to access it. + * Typedef for opaque structure passed to/from a plugin. + * Use the APIs to access it. * @internal ICU 4.4 Technology Preview */ - -struct UPlugData; typedef struct UPlugData UPlugData; /** @} */ diff --git a/deps/icu-small/source/common/unicode/localematcher.h b/deps/icu-small/source/common/unicode/localematcher.h index 63a68b0b7fb3d3..0cd068ef32805b 100644 --- a/deps/icu-small/source/common/unicode/localematcher.h +++ b/deps/icu-small/source/common/unicode/localematcher.h @@ -91,8 +91,6 @@ enum ULocMatchDemotion { typedef enum ULocMatchDemotion ULocMatchDemotion; #endif -#ifndef U_FORCE_HIDE_DRAFT_API - /** * Builder option for whether to include or ignore one-way (fallback) match data. * The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries. @@ -108,20 +106,20 @@ typedef enum ULocMatchDemotion ULocMatchDemotion; * but not if it is merely a fallback. * * @see LocaleMatcher::Builder#setDirection(ULocMatchDirection) - * @draft ICU 67 + * @stable ICU 67 */ enum ULocMatchDirection { /** * Locale matching includes one-way matches such as Breton→French. (default) * - * @draft ICU 67 + * @stable ICU 67 */ ULOCMATCH_DIRECTION_WITH_ONE_WAY, /** * Locale matching limited to two-way matches including e.g. Danish↔Norwegian * but ignoring one-way matches. * - * @draft ICU 67 + * @stable ICU 67 */ ULOCMATCH_DIRECTION_ONLY_TWO_WAY }; @@ -129,8 +127,6 @@ enum ULocMatchDirection { typedef enum ULocMatchDirection ULocMatchDirection; #endif -#endif // U_FORCE_HIDE_DRAFT_API - struct UHashtable; U_NAMESPACE_BEGIN @@ -463,14 +459,13 @@ class U_COMMON_API LocaleMatcher : public UMemory { */ Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion); -#ifndef U_HIDE_DRAFT_API /** * Option for whether to include or ignore one-way (fallback) match data. * By default, they are included. * * @param direction the match direction to set. * @return this Builder object - * @draft ICU 67 + * @stable ICU 67 */ Builder &setDirection(ULocMatchDirection direction) { if (U_SUCCESS(errorCode_)) { @@ -478,7 +473,6 @@ class U_COMMON_API LocaleMatcher : public UMemory { } return *this; } -#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DRAFT_API /** @@ -704,7 +698,7 @@ class U_COMMON_API LocaleMatcher : public UMemory { LSR *lsrs; int32_t supportedLocalesLength; // These are in preference order: 1. Default locale 2. paradigm locales 3. others. - UHashtable *supportedLsrToIndex; // Map stores index+1 because 0 is "not found" + UHashtable *supportedLsrToIndex; // Map // Array versions of the supportedLsrToIndex keys and values. // The distance lookup loops over the supportedLSRs and returns the index of the best match. const LSR **supportedLSRs; diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h index 877c8014b0b3a7..b535e77e181015 100644 --- a/deps/icu-small/source/common/unicode/locid.h +++ b/deps/icu-small/source/common/unicode/locid.h @@ -571,15 +571,13 @@ class U_COMMON_API Locale : public UObject { */ void minimizeSubtags(UErrorCode& status); -#ifndef U_HIDE_DRAFT_API /** * Canonicalize the locale ID of this object according to CLDR. * @param status the status code - * @draft ICU 67 + * @stable ICU 67 * @see createCanonical */ void canonicalize(UErrorCode& status); -#endif // U_HIDE_DRAFT_API /** * Gets the list of keywords for the specified locale. diff --git a/deps/icu-small/source/common/unicode/normalizer2.h b/deps/icu-small/source/common/unicode/normalizer2.h index 5eb1d95cafb3c2..2d355250c29fde 100644 --- a/deps/icu-small/source/common/unicode/normalizer2.h +++ b/deps/icu-small/source/common/unicode/normalizer2.h @@ -225,10 +225,8 @@ class U_COMMON_API Normalizer2 : public UObject { * Normalizes a UTF-8 string and optionally records how source substrings * relate to changed and unchanged result substrings. * - * Currently implemented completely only for "compose" modes, - * such as for NFC, NFKC, and NFKC_Casefold - * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). - * Otherwise currently converts to & from UTF-16 and does not support edits. + * Implemented completely for all built-in modes except for FCD. + * The base class implementation converts to & from UTF-16 and does not support edits. * * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src Source UTF-8 string. @@ -381,11 +379,9 @@ class U_COMMON_API Normalizer2 : public UObject { * resolves to "yes" or "no" to provide a definitive result, * at the cost of doing more work in those cases. * - * This works for all normalization modes, - * but it is currently optimized for UTF-8 only for "compose" modes, - * such as for NFC, NFKC, and NFKC_Casefold - * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). - * For other modes it currently converts to UTF-16 and calls isNormalized(). + * This works for all normalization modes. + * It is optimized for UTF-8 for all built-in modes except for FCD. + * The base class implementation converts to UTF-16 and calls isNormalized(). * * @param s UTF-8 input string * @param errorCode Standard ICU error code. Its input value must @@ -543,10 +539,8 @@ class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { * Normalizes a UTF-8 string and optionally records how source substrings * relate to changed and unchanged result substrings. * - * Currently implemented completely only for "compose" modes, - * such as for NFC, NFKC, and NFKC_Casefold - * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). - * Otherwise currently converts to & from UTF-16 and does not support edits. + * Implemented completely for most built-in modes except for FCD. + * The base class implementation converts to & from UTF-16 and does not support edits. * * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src Source UTF-8 string. @@ -676,11 +670,9 @@ class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { * resolves to "yes" or "no" to provide a definitive result, * at the cost of doing more work in those cases. * - * This works for all normalization modes, - * but it is currently optimized for UTF-8 only for "compose" modes, - * such as for NFC, NFKC, and NFKC_Casefold - * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). - * For other modes it currently converts to UTF-16 and calls isNormalized(). + * This works for all normalization modes. + * It is optimized for UTF-8 for all built-in modes except for FCD. + * The base class implementation converts to UTF-16 and calls isNormalized(). * * @param s UTF-8 input string * @param errorCode Standard ICU error code. Its input value must diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h index d6f792ba345b7e..0dc91234006650 100644 --- a/deps/icu-small/source/common/unicode/platform.h +++ b/deps/icu-small/source/common/unicode/platform.h @@ -880,6 +880,6 @@ namespace std { #else # define U_CALLCONV_FPTR #endif -/* @} */ +/** @} */ #endif // _PLATFORM_H diff --git a/deps/icu-small/source/common/unicode/stringpiece.h b/deps/icu-small/source/common/unicode/stringpiece.h index 58053cfd5c3108..c145915fa953b9 100644 --- a/deps/icu-small/source/common/unicode/stringpiece.h +++ b/deps/icu-small/source/common/unicode/stringpiece.h @@ -75,12 +75,11 @@ class U_COMMON_API StringPiece : public UMemory { * @stable ICU 4.2 */ StringPiece(const char* str); -#ifndef U_HIDE_DRAFT_API #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) /** * Constructs from a NUL-terminated const char8_t * pointer. * @param str a NUL-terminated const char8_t * pointer - * @draft ICU 67 + * @stable ICU 67 */ StringPiece(const char8_t* str) : StringPiece(reinterpret_cast(str)) {} #endif @@ -88,10 +87,9 @@ class U_COMMON_API StringPiece : public UMemory { * Constructs an empty StringPiece. * Needed for type disambiguation from multiple other overloads. * @param p nullptr - * @draft ICU 67 + * @stable ICU 67 */ StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {} -#endif // U_HIDE_DRAFT_API /** * Constructs from a std::string. @@ -99,17 +97,15 @@ class U_COMMON_API StringPiece : public UMemory { */ StringPiece(const std::string& str) : ptr_(str.data()), length_(static_cast(str.size())) { } -#ifndef U_HIDE_DRAFT_API #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN) /** * Constructs from a std::u8string. - * @draft ICU 67 + * @stable ICU 67 */ StringPiece(const std::u8string& str) : ptr_(reinterpret_cast(str.data())), length_(static_cast(str.size())) { } #endif -#endif // U_HIDE_DRAFT_API /** * Constructs from some other implementation of a string piece class, from any @@ -152,18 +148,16 @@ class U_COMMON_API StringPiece : public UMemory { * @stable ICU 4.2 */ StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } -#ifndef U_HIDE_DRAFT_API #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) /** * Constructs from a const char8_t * pointer and a specified length. * @param str a const char8_t * pointer (need not be terminated) * @param len the length of the string; must be non-negative - * @draft ICU 67 + * @stable ICU 67 */ StringPiece(const char8_t* str, int32_t len) : StringPiece(reinterpret_cast(str), len) {} #endif -#endif // U_HIDE_DRAFT_API /** * Substring of another StringPiece. @@ -233,13 +227,12 @@ class U_COMMON_API StringPiece : public UMemory { */ void set(const char* str); -#ifndef U_HIDE_DRAFT_API #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) /** * Resets the stringpiece to refer to new data. * @param xdata pointer the new string data. Need not be NUL-terminated. * @param len the length of the new data - * @draft ICU 67 + * @stable ICU 67 */ inline void set(const char8_t* xdata, int32_t len) { set(reinterpret_cast(xdata), len); @@ -248,13 +241,12 @@ class U_COMMON_API StringPiece : public UMemory { /** * Resets the stringpiece to refer to new data. * @param str a pointer to a NUL-terminated string. - * @draft ICU 67 + * @stable ICU 67 */ inline void set(const char8_t* str) { set(reinterpret_cast(str)); } #endif -#endif // U_HIDE_DRAFT_API /** * Removes the first n string units. @@ -286,13 +278,12 @@ class U_COMMON_API StringPiece : public UMemory { } } -#ifndef U_HIDE_DRAFT_API /** * Searches the StringPiece for the given search string (needle); * @param needle The string for which to search. * @param offset Where to start searching within this string (haystack). * @return The offset of needle in haystack, or -1 if not found. - * @draft ICU 67 + * @stable ICU 67 */ int32_t find(StringPiece needle, int32_t offset); @@ -301,10 +292,9 @@ class U_COMMON_API StringPiece : public UMemory { * similar to std::string::compare(). * @param other The string to compare to. * @return below zero if this < other; above zero if this > other; 0 if this == other. - * @draft ICU 67 + * @stable ICU 67 */ int32_t compare(StringPiece other); -#endif // U_HIDE_DRAFT_API /** * Maximum integer, used as a default value for substring methods. diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h index 37189a85984b68..1249b0b160d362 100644 --- a/deps/icu-small/source/common/unicode/ubrk.h +++ b/deps/icu-small/source/common/unicode/ubrk.h @@ -296,6 +296,8 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, const UChar * text, int32_t textLength, UErrorCode * status); +#ifndef U_HIDE_DEPRECATED_API + /** * Thread safe cloning operation * @param bi iterator to be cloned @@ -312,7 +314,7 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, * @param status to indicate whether the operation went on smoothly or there were errors * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary. * @return pointer to the new clone - * @stable ICU 2.0 + * @deprecated ICU 69 Use ubrk_clone() instead. */ U_CAPI UBreakIterator * U_EXPORT2 ubrk_safeClone( @@ -321,6 +323,23 @@ ubrk_safeClone( int32_t *pBufferSize, UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_HIDE_DRAFT_API + +/** + * Thread safe cloning operation. + * @param bi iterator to be cloned + * @param status to indicate whether the operation went on smoothly or there were errors + * @return pointer to the new clone + * @draft ICU 69 + */ +U_CAPI UBreakIterator * U_EXPORT2 +ubrk_clone(const UBreakIterator *bi, + UErrorCode *status); + +#endif // U_HIDE_DRAFT_API + #ifndef U_HIDE_DEPRECATED_API /** diff --git a/deps/icu-small/source/common/unicode/ucnv.h b/deps/icu-small/source/common/unicode/ucnv.h index 58f271cfb5adf2..5d784990f2cdce 100644 --- a/deps/icu-small/source/common/unicode/ucnv.h +++ b/deps/icu-small/source/common/unicode/ucnv.h @@ -1699,10 +1699,10 @@ ucnv_countAvailable(void); /** * Gets the canonical converter name of the specified converter from a list of - * all available converters contaied in the alias file. All converters + * all available converters contained in the alias file. All converters * in this list can be opened. * - * @param n the index to a converter available on the system (in the range [0..ucnv_countAvaiable()]) + * @param n the index to a converter available on the system (in the range [0..ucnv_countAvailable()]) * @return a pointer a string (library owned), or NULL if the index is out of bounds. * @see ucnv_countAvailable * @stable ICU 2.0 diff --git a/deps/icu-small/source/common/unicode/ucnvsel.h b/deps/icu-small/source/common/unicode/ucnvsel.h index b84bc86bed2659..3ba41bd64c1750 100644 --- a/deps/icu-small/source/common/unicode/ucnvsel.h +++ b/deps/icu-small/source/common/unicode/ucnvsel.h @@ -45,11 +45,11 @@ * from the serialized form. */ +struct UConverterSelector; /** * @{ - * The selector data structure + * Typedef for selector data structure. */ -struct UConverterSelector; typedef struct UConverterSelector UConverterSelector; /** @} */ diff --git a/deps/icu-small/source/common/unicode/unifilt.h b/deps/icu-small/source/common/unicode/unifilt.h index 1a77089233bf10..b0789d2e88fb91 100644 --- a/deps/icu-small/source/common/unicode/unifilt.h +++ b/deps/icu-small/source/common/unicode/unifilt.h @@ -40,8 +40,8 @@ U_NAMESPACE_BEGIN * * UnicodeFilter defines a protocol for selecting a * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. - * Currently, filters are used in conjunction with classes like {@link - * Transliterator} to only process selected characters through a + * Currently, filters are used in conjunction with classes like + * {@link Transliterator} to only process selected characters through a * transformation. * *

Note: UnicodeFilter currently stubs out two pure virtual methods diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h index 4179507af18e6f..7344a29a89b017 100644 --- a/deps/icu-small/source/common/unicode/uniset.h +++ b/deps/icu-small/source/common/unicode/uniset.h @@ -178,8 +178,6 @@ class RuleCharacterIterator; * Unicode property * * - *

Warning: you cannot add an empty string ("") to a UnicodeSet.

- * *

Formal syntax

* * \htmlonly
\endhtmlonly @@ -601,7 +599,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Make this object represent the range `start - end`. - * If `end > start` then this object is set to an empty range. + * If `start > end` then this object is set to an empty range. * A frozen set will not be modified. * * @param start first character in the set, inclusive @@ -1077,7 +1075,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Adds the specified range to this set if it is not already * present. If this set already contains the specified range, - * the call leaves this set unchanged. If end > start + * the call leaves this set unchanged. If start > end * then an empty range is added, leaving the set unchanged. * This is equivalent to a boolean logic OR, or a set UNION. * A frozen set will not be modified. @@ -1095,6 +1093,9 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { * present. If this set already contains the specified character, * the call leaves this set unchanged. * A frozen set will not be modified. + * + * @param c the character (code point) + * @return this object, for chaining * @stable ICU 2.0 */ UnicodeSet& add(UChar32 c); @@ -1104,8 +1105,8 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { * present. If this set already contains the multicharacter, * the call leaves this set unchanged. * Thus "ch" => {"ch"} - *
Warning: you cannot add an empty string ("") to a UnicodeSet. * A frozen set will not be modified. + * * @param s the source string * @return this object, for chaining * @stable ICU 2.4 @@ -1124,8 +1125,8 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { public: /** - * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} - * If this set already any particular character, it has no effect on that character. + * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"} + * If this set already contains any particular character, it has no effect on that character. * A frozen set will not be modified. * @param s the source string * @return this object, for chaining @@ -1135,7 +1136,6 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. * A frozen set will not be modified. * @param s the source string * @return this object, for chaining @@ -1145,7 +1145,6 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. * A frozen set will not be modified. * @param s the source string * @return this object, for chaining @@ -1155,7 +1154,6 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. * A frozen set will not be modified. * @param s the source string * @return this object, for chaining @@ -1165,7 +1163,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Makes a set from a multicharacter string. Thus "ch" => {"ch"} - *
Warning: you cannot add an empty string ("") to a UnicodeSet. + * * @param s the source string * @return a newly created set containing the given string. * The caller owns the return object and is responsible for deleting it. @@ -1185,15 +1183,13 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Retain only the elements in this set that are contained in the - * specified range. If end > start then an empty range is + * specified range. If start > end then an empty range is * retained, leaving the set empty. This is equivalent to * a boolean logic AND, or a set INTERSECTION. * A frozen set will not be modified. * - * @param start first character, inclusive, of range to be retained - * to this set. - * @param end last character, inclusive, of range to be retained - * to this set. + * @param start first character, inclusive, of range + * @param end last character, inclusive, of range * @stable ICU 2.0 */ virtual UnicodeSet& retain(UChar32 start, UChar32 end); @@ -1202,14 +1198,31 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Retain the specified character from this set if it is present. * A frozen set will not be modified. + * + * @param c the character (code point) + * @return this object, for chaining * @stable ICU 2.0 */ UnicodeSet& retain(UChar32 c); +#ifndef U_HIDE_DRAFT_API + /** + * Retains only the specified string from this set if it is present. + * Upon return this set will be empty if it did not contain s, or + * will only contain s if it did contain s. + * A frozen set will not be modified. + * + * @param s the source string + * @return this object, for chaining + * @draft ICU 69 + */ + UnicodeSet& retain(const UnicodeString &s); +#endif // U_HIDE_DRAFT_API + /** * Removes the specified range from this set if it is present. * The set will not contain the specified range once the call - * returns. If end > start then an empty range is + * returns. If start > end then an empty range is * removed, leaving the set unchanged. * A frozen set will not be modified. * @@ -1226,6 +1239,9 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { * The set will not contain the specified range once the call * returns. * A frozen set will not be modified. + * + * @param c the character (code point) + * @return this object, for chaining * @stable ICU 2.0 */ UnicodeSet& remove(UChar32 c); @@ -1253,15 +1269,13 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { /** * Complements the specified range in this set. Any character in * the range will be removed if it is in this set, or will be - * added if it is not in this set. If end > start + * added if it is not in this set. If start > end * then an empty range is complemented, leaving the set unchanged. * This is equivalent to a boolean logic XOR. * A frozen set will not be modified. * - * @param start first character, inclusive, of range to be removed - * from this set. - * @param end last character, inclusive, of range to be removed - * from this set. + * @param start first character, inclusive, of range + * @param end last character, inclusive, of range * @stable ICU 2.0 */ virtual UnicodeSet& complement(UChar32 start, UChar32 end); @@ -1271,16 +1285,18 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { * will be removed if it is in this set, or will be added if it is * not in this set. * A frozen set will not be modified. + * + * @param c the character (code point) + * @return this object, for chaining * @stable ICU 2.0 */ UnicodeSet& complement(UChar32 c); /** * Complement the specified string in this set. - * The set will not contain the specified string once the call - * returns. - *
Warning: you cannot add an empty string ("") to a UnicodeSet. + * The string will be removed if it is in this set, or will be added if it is not in this set. * A frozen set will not be modified. + * * @param s the string to complement * @return this object, for chaining * @stable ICU 2.4 diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h index 456389f265fea0..85bd96495150d3 100644 --- a/deps/icu-small/source/common/unicode/unistr.h +++ b/deps/icu-small/source/common/unicode/unistr.h @@ -44,9 +44,10 @@ struct UConverter; // unicode/ucnv.h #ifndef USTRING_H /** * \ingroup ustring_ustrlen + * @param s Pointer to sequence of UChars. + * @return Length of sequence. */ -U_CAPI int32_t U_EXPORT2 -u_strlen(const UChar *s); +U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s); #endif U_NAMESPACE_BEGIN @@ -2766,7 +2767,6 @@ class U_COMMON_API UnicodeString : public Replaceable * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. - * @param options Options bit set, see ucasemap_open(). * @return A reference to this. * @stable ICU 3.8 */ @@ -3614,7 +3614,7 @@ class U_COMMON_API UnicodeString : public Replaceable // turn a bogus string into an empty one void unBogus(); - // implements assigment operator, copy constructor, and fastCopyFrom() + // implements assignment operator, copy constructor, and fastCopyFrom() UnicodeString ©From(const UnicodeString &src, UBool fastCopy=false); // Copies just the fields without memory management. diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index fe59fdd893d940..737f4b308ef83f 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -482,6 +482,7 @@ #define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open) #define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform) #define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode) +#define ubrk_clone U_ICU_ENTRY_POINT_RENAME(ubrk_clone) #define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close) #define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable) #define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current) @@ -534,6 +535,7 @@ #define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName) #define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID) #define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID) +#define ucal_getTimeZoneOffsetFromLocal U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneOffsetFromLocal) #define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate) #define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType) #define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition) @@ -962,6 +964,7 @@ #define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet) #define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars) #define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString) +#define uhash_containsKey U_ICU_ENTRY_POINT_RENAME(uhash_containsKey) #define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count) #define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable) #define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet) @@ -970,6 +973,7 @@ #define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find) #define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get) #define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti) +#define uhash_getiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_getiAndFound) #define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString) #define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars) #define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars) @@ -977,12 +981,15 @@ #define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet) #define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars) #define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString) +#define uhash_icontainsKey U_ICU_ENTRY_POINT_RENAME(uhash_icontainsKey) #define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget) #define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti) +#define uhash_igetiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_igetiAndFound) #define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init) #define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize) #define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput) #define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi) +#define uhash_iputiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_iputiAllowZero) #define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove) #define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei) #define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement) @@ -990,6 +997,7 @@ #define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize) #define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put) #define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti) +#define uhash_putiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_putiAllowZero) #define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove) #define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll) #define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement) @@ -1150,6 +1158,8 @@ #define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey) #define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType) #define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags) +#define umeas_getPrefixBase U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixBase) +#define umeas_getPrefixPower U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixPower) #define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern) #define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe) #define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone) @@ -1672,6 +1682,9 @@ #define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact) #define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement) #define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll) +#define uset_complementAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_complementAllCodePoints) +#define uset_complementRange U_ICU_ENTRY_POINT_RENAME(uset_complementRange) +#define uset_complementString U_ICU_ENTRY_POINT_RENAME(uset_complementString) #define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains) #define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll) #define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints) @@ -1695,12 +1708,15 @@ #define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions) #define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove) #define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll) +#define uset_removeAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_removeAllCodePoints) #define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings) #define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange) #define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString) #define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern) #define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain) #define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll) +#define uset_retainAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_retainAllCodePoints) +#define uset_retainString U_ICU_ENTRY_POINT_RENAME(uset_retainString) #define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize) #define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains) #define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set) diff --git a/deps/icu-small/source/common/unicode/uset.h b/deps/icu-small/source/common/unicode/uset.h index 473cc6fbae4e0d..a32f52c678a28a 100644 --- a/deps/icu-small/source/common/unicode/uset.h +++ b/deps/icu-small/source/common/unicode/uset.h @@ -582,8 +582,8 @@ U_CAPI void U_EXPORT2 uset_addString(USet* set, const UChar* str, int32_t strLen); /** - * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} - * If this set already any particular character, it has no effect on that character. + * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"} + * If this set already contains any particular character, it has no effect on that character. * A frozen set will not be modified. * @param set the object to which to add the character * @param str the source string @@ -628,6 +628,20 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end); U_CAPI void U_EXPORT2 uset_removeString(USet* set, const UChar* str, int32_t strLen); +#ifndef U_HIDE_DRAFT_API +/** + * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"} + * A frozen set will not be modified. + * + * @param set the object to be modified + * @param str the string + * @param length the length of the string, or -1 if NUL-terminated + * @draft ICU 69 + */ +U_CAPI void U_EXPORT2 +uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length); +#endif // U_HIDE_DRAFT_API + /** * Removes from this set all of its elements that are contained in the * specified set. This operation effectively modifies this @@ -650,15 +664,41 @@ uset_removeAll(USet* set, const USet* removeSet); * A frozen set will not be modified. * * @param set the object for which to retain only the specified range - * @param start first character, inclusive, of range to be retained - * to this set. - * @param end last character, inclusive, of range to be retained - * to this set. + * @param start first character, inclusive, of range + * @param end last character, inclusive, of range * @stable ICU 3.2 */ U_CAPI void U_EXPORT2 uset_retain(USet* set, UChar32 start, UChar32 end); +#ifndef U_HIDE_DRAFT_API +/** + * Retains only the specified string from this set if it is present. + * Upon return this set will be empty if it did not contain s, or + * will only contain s if it did contain s. + * A frozen set will not be modified. + * + * @param set the object to be modified + * @param str the string + * @param length the length of the string, or -1 if NUL-terminated + * @draft ICU 69 + */ +U_CAPI void U_EXPORT2 +uset_retainString(USet *set, const UChar *str, int32_t length); + +/** + * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} + * A frozen set will not be modified. + * + * @param set the object to be modified + * @param str the string + * @param length the length of the string, or -1 if NUL-terminated + * @draft ICU 69 + */ +U_CAPI void U_EXPORT2 +uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length); +#endif // U_HIDE_DRAFT_API + /** * Retains only the elements in this set that are contained in the * specified set. In other words, removes from this set all of @@ -696,6 +736,49 @@ uset_compact(USet* set); U_CAPI void U_EXPORT2 uset_complement(USet* set); +#ifndef U_HIDE_DRAFT_API +/** + * Complements the specified range in this set. Any character in + * the range will be removed if it is in this set, or will be + * added if it is not in this set. If start > end + * then an empty range is complemented, leaving the set unchanged. + * This is equivalent to a boolean logic XOR. + * A frozen set will not be modified. + * + * @param set the object to be modified + * @param start first character, inclusive, of range + * @param end last character, inclusive, of range + * @draft ICU 69 + */ +U_CAPI void U_EXPORT2 +uset_complementRange(USet *set, UChar32 start, UChar32 end); + +/** + * Complements the specified string in this set. + * The string will be removed if it is in this set, or will be added if it is not in this set. + * A frozen set will not be modified. + * + * @param set the object to be modified + * @param str the string + * @param length the length of the string, or -1 if NUL-terminated + * @draft ICU 69 + */ +U_CAPI void U_EXPORT2 +uset_complementString(USet *set, const UChar *str, int32_t length); + +/** + * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"} + * A frozen set will not be modified. + * + * @param set the object to be modified + * @param str the string + * @param length the length of the string, or -1 if NUL-terminated + * @draft ICU 69 + */ +U_CAPI void U_EXPORT2 +uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length); +#endif // U_HIDE_DRAFT_API + /** * Complements in this set all elements contained in the specified * set. Any character in the other set will be removed if it is diff --git a/deps/icu-small/source/common/unicode/ushape.h b/deps/icu-small/source/common/unicode/ushape.h index 9a85b73e4cba80..3a4492caff0f04 100644 --- a/deps/icu-small/source/common/unicode/ushape.h +++ b/deps/icu-small/source/common/unicode/ushape.h @@ -323,7 +323,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength, #define U_SHAPE_PRESERVE_PRESENTATION 0x8000 /** Presentation form option: * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with - * their unshaped correspondants in range 0+06xx, before shaping. + * their unshaped correspondents in range 0+06xx, before shaping. * @stable ICU 3.6 */ #define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0 diff --git a/deps/icu-small/source/common/unicode/utrace.h b/deps/icu-small/source/common/unicode/utrace.h index bb8e3e8109cc66..3ed01911058966 100644 --- a/deps/icu-small/source/common/unicode/utrace.h +++ b/deps/icu-small/source/common/unicode/utrace.h @@ -173,24 +173,23 @@ typedef enum UTraceFunctionNumber { UTRACE_RES_DATA_LIMIT, #endif // U_HIDE_INTERNAL_API -#ifndef U_HIDE_DRAFT_API /** * The lowest break iterator location. - * @draft ICU 67 + * @stable ICU 67 */ UTRACE_UBRK_START=0x4000, /** * Indicates that a character instance of break iterator was created. * - * @draft ICU 67 + * @stable ICU 67 */ UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START, /** * Indicates that a word instance of break iterator was created. * - * @draft ICU 67 + * @stable ICU 67 */ UTRACE_UBRK_CREATE_WORD, @@ -200,21 +199,21 @@ typedef enum UTraceFunctionNumber { * Provides one C-style string to UTraceData: the lb value ("", * "loose", "strict", or "normal"). * - * @draft ICU 67 + * @stable ICU 67 */ UTRACE_UBRK_CREATE_LINE, /** * Indicates that a sentence instance of break iterator was created. * - * @draft ICU 67 + * @stable ICU 67 */ UTRACE_UBRK_CREATE_SENTENCE, /** * Indicates that a title instance of break iterator was created. * - * @draft ICU 67 + * @stable ICU 67 */ UTRACE_UBRK_CREATE_TITLE, @@ -224,12 +223,10 @@ typedef enum UTraceFunctionNumber { * Provides one C-style string to UTraceData: the script code of what * the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai"). * - * @draft ICU 67 + * @stable ICU 67 */ UTRACE_UBRK_CREATE_BREAK_ENGINE, -#endif // U_HIDE_DRAFT_API - #ifndef U_HIDE_INTERNAL_API /** * One more than the highest normal break iterator trace location. diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index a46481a3fe610c..b09d4943c1cc98 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -60,13 +60,13 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 68 +#define U_ICU_VERSION_MAJOR_NUM 69 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 2 +#define U_ICU_VERSION_MINOR_NUM 1 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -86,7 +86,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _68 +#define U_ICU_VERSION_SUFFIX _69 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -139,7 +139,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "68.2" +#define U_ICU_VERSION "69.1" /** * The current ICU library major version number as a string, for library name suffixes. @@ -152,13 +152,13 @@ * * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "68" +#define U_ICU_VERSION_SHORT "69" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "68.2" +#define U_ICU_DATA_VERSION "69.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/deps/icu-small/source/common/uniset.cpp b/deps/icu-small/source/common/uniset.cpp index 3807b837475948..067930b73bd6b2 100644 --- a/deps/icu-small/source/common/uniset.cpp +++ b/deps/icu-small/source/common/uniset.cpp @@ -30,24 +30,6 @@ #include "bmpset.h" #include "unisetspan.h" -// Define UChar constants using hex for EBCDIC compatibility -// Used #define to reduce private static exports and memory access time. -#define SET_OPEN ((UChar)0x005B) /*[*/ -#define SET_CLOSE ((UChar)0x005D) /*]*/ -#define HYPHEN ((UChar)0x002D) /*-*/ -#define COMPLEMENT ((UChar)0x005E) /*^*/ -#define COLON ((UChar)0x003A) /*:*/ -#define BACKSLASH ((UChar)0x005C) /*\*/ -#define INTERSECTION ((UChar)0x0026) /*&*/ -#define UPPER_U ((UChar)0x0055) /*U*/ -#define LOWER_U ((UChar)0x0075) /*u*/ -#define OPEN_BRACE ((UChar)123) /*{*/ -#define CLOSE_BRACE ((UChar)125) /*}*/ -#define UPPER_P ((UChar)0x0050) /*P*/ -#define LOWER_P ((UChar)0x0070) /*p*/ -#define UPPER_N ((UChar)78) /*N*/ -#define EQUALS ((UChar)0x003D) /*=*/ - // HIGH_VALUE > all valid values. 110000 for codepoints #define UNICODESET_HIGH 0x0110000 @@ -444,7 +426,6 @@ UBool UnicodeSet::contains(UChar32 start, UChar32 end) const { * @return true if this set contains the specified string */ UBool UnicodeSet::contains(const UnicodeString& s) const { - if (s.length() == 0) return FALSE; int32_t cp = getSingleCP(s); if (cp < 0) { return stringsContains(s); @@ -559,11 +540,9 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const { if (hasStrings()) { for (i=0; isize(); ++i) { const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i); - //if (s.length() == 0) { - // // Empty strings match everything - // return TRUE; - //} - // assert(s.length() != 0); // We enforce this elsewhere + if (s.isEmpty()) { + continue; // skip the empty string + } UChar32 c = s.char32At(0); if ((c & 0xFF) == v) { return TRUE; @@ -582,9 +561,6 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text, int32_t limit, UBool incremental) { if (offset == limit) { - // Strings, if any, have length != 0, so we don't worry - // about them here. If we ever allow zero-length strings - // we much check for them here. if (contains(U_ETHER)) { return incremental ? U_PARTIAL_MATCH : U_MATCH; } else { @@ -614,11 +590,9 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text, for (i=0; isize(); ++i) { const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i); - - //if (trial.length() == 0) { - // return U_MATCH; // null-string always matches - //} - // assert(trial.length() != 0); // We ensure this elsewhere + if (trial.isEmpty()) { + continue; // skip the empty string + } UChar c = trial.charAt(forward ? 0 : trial.length() - 1); @@ -971,12 +945,12 @@ UnicodeSet& UnicodeSet::add(UChar32 c) { * present. If this set already contains the multicharacter, * the call leaves this set unchanged. * Thus "ch" => {"ch"} - *
Warning: you cannot add an empty string ("") to a UnicodeSet. + * * @param s the source string * @return the modified set, for chaining */ UnicodeSet& UnicodeSet::add(const UnicodeString& s) { - if (s.length() == 0 || isFrozen() || isBogus()) return *this; + if (isFrozen() || isBogus()) return *this; int32_t cp = getSingleCP(s); if (cp < 0) { if (!stringsContains(s)) { @@ -991,8 +965,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) { /** * Adds the given string, in order, to 'strings'. The given string - * must have been checked by the caller to not be empty and to not - * already be in 'strings'. + * must have been checked by the caller to not already be in 'strings'. */ void UnicodeSet::_add(const UnicodeString& s) { if (isFrozen() || isBogus()) { @@ -1021,16 +994,13 @@ void UnicodeSet::_add(const UnicodeString& s) { * @param string to test */ int32_t UnicodeSet::getSingleCP(const UnicodeString& s) { - //if (s.length() < 1) { - // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet"); - //} - if (s.length() > 2) return -1; - if (s.length() == 1) return s.charAt(0); - - // at this point, len = 2 - UChar32 cp = s.char32At(0); - if (cp > 0xFFFF) { // is surrogate pair - return cp; + int32_t sLength = s.length(); + if (sLength == 1) return s.charAt(0); + if (sLength == 2) { + UChar32 cp = s.char32At(0); + if (cp > 0xFFFF) { // is surrogate pair + return cp; + } } return -1; } @@ -1150,6 +1120,26 @@ UnicodeSet& UnicodeSet::retain(UChar32 c) { return retain(c, c); } +UnicodeSet& UnicodeSet::retain(const UnicodeString &s) { + if (isFrozen() || isBogus()) { return *this; } + UChar32 cp = getSingleCP(s); + if (cp < 0) { + bool isIn = stringsContains(s); + // Check for getRangeCount() first to avoid somewhat-expensive size() + // when there are single code points. + if (isIn && getRangeCount() == 0 && size() == 1) { + return *this; + } + clear(); + if (isIn) { + _add(s); + } + } else { + retain(cp, cp); + } + return *this; +} + /** * Removes the specified range from this set if it is present. * The set will not contain the specified range once the call @@ -1186,7 +1176,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) { * @return the modified set, for chaining */ UnicodeSet& UnicodeSet::remove(const UnicodeString& s) { - if (s.length() == 0 || isFrozen() || isBogus()) return *this; + if (isFrozen() || isBogus()) return *this; int32_t cp = getSingleCP(s); if (cp < 0) { if (strings != nullptr && strings->removeElement((void*) &s)) { @@ -1252,12 +1242,12 @@ UnicodeSet& UnicodeSet::complement(void) { * Complement the specified string in this set. * The set will not contain the specified string once the call * returns. - *
Warning: you cannot add an empty string ("") to a UnicodeSet. + * * @param s the string to complement * @return this object, for chaining */ UnicodeSet& UnicodeSet::complement(const UnicodeString& s) { - if (s.length() == 0 || isFrozen() || isBogus()) return *this; + if (isFrozen() || isBogus()) return *this; int32_t cp = getSingleCP(s); if (cp < 0) { if (stringsContains(s)) { @@ -2001,22 +1991,22 @@ escapeUnprintable) { } // Okay to let ':' pass through switch (c) { - case SET_OPEN: - case SET_CLOSE: - case HYPHEN: - case COMPLEMENT: - case INTERSECTION: - case BACKSLASH: - case OPEN_BRACE: - case CLOSE_BRACE: - case COLON: + case u'[': + case u']': + case u'-': + case u'^': + case u'&': + case u'\\': + case u'{': + case u'}': + case u':': case SymbolTable::SYMBOL_REF: - buf.append(BACKSLASH); + buf.append(u'\\'); break; default: // Escape whitespace if (PatternProps::isWhiteSpace(c)) { - buf.append(BACKSLASH); + buf.append(u'\\'); } break; } @@ -2049,7 +2039,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result, backslashCount = 0; } else { result.append(c); - if (c == BACKSLASH) { + if (c == u'\\') { ++backslashCount; } else { backslashCount = 0; @@ -2082,13 +2072,13 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result, UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result, UBool escapeUnprintable) const { - result.append(SET_OPEN); + result.append(u'['); // // Check against the predefined categories. We implicitly build // // up ALL category sets the first time toPattern() is called. // for (int8_t cat=0; catsize(); ++i) { - result.append(OPEN_BRACE); + result.append(u'{'); _appendToPat(result, *(const UnicodeString*) strings->elementAt(i), escapeUnprintable); - result.append(CLOSE_BRACE); + result.append(u'}'); } } - return result.append(SET_CLOSE); + return result.append(u']'); } /** diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp index 45d3dab9938b29..8c86cc18c7473f 100644 --- a/deps/icu-small/source/common/uniset_props.cpp +++ b/deps/icu-small/source/common/uniset_props.cpp @@ -47,31 +47,6 @@ U_NAMESPACE_USE -// Define UChar constants using hex for EBCDIC compatibility -// Used #define to reduce private static exports and memory access time. -#define SET_OPEN ((UChar)0x005B) /*[*/ -#define SET_CLOSE ((UChar)0x005D) /*]*/ -#define HYPHEN ((UChar)0x002D) /*-*/ -#define COMPLEMENT ((UChar)0x005E) /*^*/ -#define COLON ((UChar)0x003A) /*:*/ -#define BACKSLASH ((UChar)0x005C) /*\*/ -#define INTERSECTION ((UChar)0x0026) /*&*/ -#define UPPER_U ((UChar)0x0055) /*U*/ -#define LOWER_U ((UChar)0x0075) /*u*/ -#define OPEN_BRACE ((UChar)123) /*{*/ -#define CLOSE_BRACE ((UChar)125) /*}*/ -#define UPPER_P ((UChar)0x0050) /*P*/ -#define LOWER_P ((UChar)0x0070) /*p*/ -#define UPPER_N ((UChar)78) /*N*/ -#define EQUALS ((UChar)0x003D) /*=*/ - -//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:" -static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]" -//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p" -//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}" -//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N" -static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/ - // Special property set IDs static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF] static const char ASCII[] = "ASCII"; // [\u0000-\u007F] @@ -81,12 +56,6 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:] #define NAME_PROP "na" #define NAME_PROP_LENGTH 2 -/** - * Delimiter string used in patterns to close a category reference: - * ":]". Example: "[:Lu:]". - */ -//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */ - // Cached sets ------------------------------------------------------------- *** U_CDECL_BEGIN @@ -140,27 +109,27 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) { static inline UBool isPerlOpen(const UnicodeString &pattern, int32_t pos) { UChar c; - return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P); + return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P'); } /*static inline UBool isPerlClose(const UnicodeString &pattern, int32_t pos) { - return pattern.charAt(pos)==CLOSE_BRACE; + return pattern.charAt(pos)==u'}'; }*/ static inline UBool isNameOpen(const UnicodeString &pattern, int32_t pos) { - return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N; + return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N'; } static inline UBool isPOSIXOpen(const UnicodeString &pattern, int32_t pos) { - return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON; + return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':'; } /*static inline UBool isPOSIXClose(const UnicodeString &pattern, int32_t pos) { - return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE; + return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']'; }*/ // TODO memory debugging provided inside uniset.cpp @@ -326,9 +295,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, while (mode != 2 && !chars.atEnd()) { U_ASSERT((lastItem == 0 && op == 0) || - (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) || - (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ || - op == INTERSECTION /*'&'*/))); + (lastItem == 1 && (op == 0 || op == u'-')) || + (lastItem == 2 && (op == 0 || op == u'-' || op == u'&'))); UChar32 c = 0; UBool literal = FALSE; @@ -356,27 +324,27 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; - if (c == 0x5B /*'['*/ && !literal) { + if (c == u'[' && !literal) { if (mode == 1) { chars.setPos(backup); // backup setMode = 1; } else { // Handle opening '[' delimiter mode = 1; - patLocal.append((UChar) 0x5B /*'['*/); + patLocal.append(u'['); chars.getPos(backup); // prepare to backup c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; - if (c == 0x5E /*'^'*/ && !literal) { + if (c == u'^' && !literal) { invert = TRUE; - patLocal.append((UChar) 0x5E /*'^'*/); + patLocal.append(u'^'); chars.getPos(backup); // prepare to backup c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; } // Fall through to handle special leading '-'; // otherwise restart loop for nested [], \p{}, etc. - if (c == HYPHEN /*'-'*/) { + if (c == u'-') { literal = TRUE; // Fall through to handle literal '-' below } else { @@ -418,7 +386,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, op = 0; } - if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) { + if (op == u'-' || op == u'&') { patLocal.append(op); } @@ -454,10 +422,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, } switch (op) { - case HYPHEN: /*'-'*/ + case u'-': removeAll(*nested); break; - case INTERSECTION: /*'&'*/ + case u'&': retainAll(*nested); break; case 0: @@ -483,24 +451,24 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, if (!literal) { switch (c) { - case 0x5D /*']'*/: + case u']': if (lastItem == 1) { add(lastChar, lastChar); _appendToPat(patLocal, lastChar, FALSE); } // Treat final trailing '-' as a literal - if (op == HYPHEN /*'-'*/) { + if (op == u'-') { add(op, op); patLocal.append(op); - } else if (op == INTERSECTION /*'&'*/) { + } else if (op == u'&') { // syntaxError(chars, "Trailing '&'"); ec = U_MALFORMED_SET; return; } - patLocal.append((UChar) 0x5D /*']'*/); + patLocal.append(u']'); mode = 2; continue; - case HYPHEN /*'-'*/: + case u'-': if (op == 0) { if (lastItem != 0) { op = (UChar) c; @@ -510,8 +478,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, add(c, c); c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; - if (c == 0x5D /*']'*/ && !literal) { - patLocal.append(HYPHEN_RIGHT_BRACE, 2); + if (c == u']' && !literal) { + patLocal.append(u"-]", 2); mode = 2; continue; } @@ -520,7 +488,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, // syntaxError(chars, "'-' not after char or set"); ec = U_MALFORMED_SET; return; - case INTERSECTION /*'&'*/: + case u'&': if (lastItem == 2 && op == 0) { op = (UChar) c; continue; @@ -528,11 +496,11 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, // syntaxError(chars, "'&' not after set"); ec = U_MALFORMED_SET; return; - case 0x5E /*'^'*/: + case u'^': // syntaxError(chars, "'^' not after '['"); ec = U_MALFORMED_SET; return; - case 0x7B /*'{'*/: + case u'{': if (op != 0) { // syntaxError(chars, "Missing operand after operator"); ec = U_MALFORMED_SET; @@ -549,13 +517,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, while (!chars.atEnd()) { c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; - if (c == 0x7D /*'}'*/ && !literal) { + if (c == u'}' && !literal) { ok = TRUE; break; } buf.append(c); } - if (buf.length() < 1 || !ok) { + if (!ok) { // syntaxError(chars, "Invalid multicharacter string"); ec = U_MALFORMED_SET; return; @@ -565,9 +533,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, // we don't need to drop through to the further // processing add(buf); - patLocal.append((UChar) 0x7B /*'{'*/); + patLocal.append(u'{'); _appendToPat(patLocal, buf, FALSE); - patLocal.append((UChar) 0x7D /*'}'*/); + patLocal.append(u'}'); continue; case SymbolTable::SYMBOL_REF: // symbols nosymbols @@ -580,7 +548,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, chars.getPos(backup); c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; - UBool anchor = (c == 0x5D /*']'*/ && !literal); + UBool anchor = (c == u']' && !literal); if (symbols == 0 && !anchor) { c = SymbolTable::SYMBOL_REF; chars.setPos(backup); @@ -594,7 +562,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, add(U_ETHER); usePat = TRUE; patLocal.append((UChar) SymbolTable::SYMBOL_REF); - patLocal.append((UChar) 0x5D /*']'*/); + patLocal.append(u']'); mode = 2; continue; } @@ -617,7 +585,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, lastChar = c; break; case 1: - if (op == HYPHEN /*'-'*/) { + if (op == u'-') { if (lastChar >= c) { // Don't allow redundant (a-a) or empty (b-a) ranges; // these are most likely typos. @@ -1036,11 +1004,11 @@ UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars, RuleCharacterIterator::Pos pos; chars.getPos(pos); UChar32 c = chars.next(iterOpts, literal, ec); - if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) { + if (c == u'[' || c == u'\\') { UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE, literal, ec); - result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) : - (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/); + result = (c == u'[') ? (d == u':') : + (d == u'N' || d == u'p' || d == u'P'); } chars.setPos(pos); return result && U_SUCCESS(ec); @@ -1071,17 +1039,17 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern, posix = TRUE; pos += 2; pos = ICU_Utility::skipWhitespace(pattern, pos); - if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) { + if (pos < pattern.length() && pattern.charAt(pos) == u'^') { ++pos; invert = TRUE; } } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) { UChar c = pattern.charAt(pos+1); - invert = (c == UPPER_P); - isName = (c == UPPER_N); + invert = (c == u'P'); + isName = (c == u'N'); pos += 2; pos = ICU_Utility::skipWhitespace(pattern, pos); - if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) { + if (pos == pattern.length() || pattern.charAt(pos++) != u'{') { // Syntax error; "\p" or "\P" not followed by "{" FAIL(ec); } @@ -1093,9 +1061,9 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern, // Look for the matching close delimiter, either :] or } int32_t close; if (posix) { - close = pattern.indexOf(POSIX_CLOSE, 2, pos); + close = pattern.indexOf(u":]", 2, pos); } else { - close = pattern.indexOf(CLOSE_BRACE, pos); + close = pattern.indexOf(u'}', pos); } if (close < 0) { // Syntax error; close delimiter missing @@ -1105,7 +1073,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern, // Look for an '=' sign. If this is present, we will parse a // medium \p{gc=Cf} or long \p{GeneralCategory=Format} // pattern. - int32_t equals = pattern.indexOf(EQUALS, pos); + int32_t equals = pattern.indexOf(u'=', pos); UnicodeString propName, valueName; if (equals >= 0 && equals < close && !isName) { // Equals seen; parse medium/long pattern diff --git a/deps/icu-small/source/common/unisetspan.cpp b/deps/icu-small/source/common/unisetspan.cpp index 68e44d91ee7066..fe0d74f5b28e76 100644 --- a/deps/icu-small/source/common/unisetspan.cpp +++ b/deps/icu-small/source/common/unisetspan.cpp @@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set, const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + if (length16==0) { + continue; // skip the empty string + } UBool thisRelevant; spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED); if(spanLength0) { // Relevant string. if(which&UTF16) { if(which&CONTAINED) { if(which&FWD) { @@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set, addToSpanNotSet(c); } } - } else { // Irrelevant string. + } else { // Irrelevant string. (Also the empty string.) if(which&UTF8) { if(which&CONTAINED) { // Only necessary for LONGEST_MATCH. uint8_t *s8=utf8+utf8Count; @@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi for(i=0; i0); // Try to match this string at pos-overlap..pos. if(overlap>=LONG_SPAN) { @@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + if (length16==0) { + continue; // skip the empty string + } // Try to match this string at pos-overlap..pos. if(overlap>=LONG_SPAN) { @@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC for(i=0; i0); // Try to match this string at pos-(length16-overlap)..pos-length16. if(overlap>=LONG_SPAN) { @@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + if (length16==0) { + continue; // skip the empty string + } // Try to match this string at pos-(length16-overlap)..pos-length16. if(overlap>=LONG_SPAN) { @@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const { // Try to match the strings at pos. for(i=0; i0); if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) { return pos; // There is a set element at pos. } @@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const // it is easier and we only need to know whether the string is irrelevant // which is the same in either array. if(spanLengths[i]==ALL_CP_CONTAINED) { - continue; // Irrelevant string. + continue; // Irrelevant string. (Also the empty string.) } const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); const UChar *s16=string.getBuffer(); int32_t length16=string.length(); + U_ASSERT(length>0); if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) { return pos; // There is a set element at pos. } diff --git a/deps/icu-small/source/common/uprops.h b/deps/icu-small/source/common/uprops.h index 8bf929919f200c..09830bdeb99892 100644 --- a/deps/icu-small/source/common/uprops.h +++ b/deps/icu-small/source/common/uprops.h @@ -310,55 +310,12 @@ u_isgraphPOSIX(UChar32 c); U_CFUNC UBool u_isprintPOSIX(UChar32 c); -/** Turn a bit index into a bit flag. @internal */ -#define FLAG(n) ((uint32_t)1<<(n)) - -/** Flags for general categories in the order of UCharCategory. @internal */ -#define _Cn FLAG(U_GENERAL_OTHER_TYPES) -#define _Lu FLAG(U_UPPERCASE_LETTER) -#define _Ll FLAG(U_LOWERCASE_LETTER) -#define _Lt FLAG(U_TITLECASE_LETTER) -#define _Lm FLAG(U_MODIFIER_LETTER) -/* #define _Lo FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */ -#define _Mn FLAG(U_NON_SPACING_MARK) -#define _Me FLAG(U_ENCLOSING_MARK) -#define _Mc FLAG(U_COMBINING_SPACING_MARK) -#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER) -#define _Nl FLAG(U_LETTER_NUMBER) -#define _No FLAG(U_OTHER_NUMBER) -#define _Zs FLAG(U_SPACE_SEPARATOR) -#define _Zl FLAG(U_LINE_SEPARATOR) -#define _Zp FLAG(U_PARAGRAPH_SEPARATOR) -#define _Cc FLAG(U_CONTROL_CHAR) -#define _Cf FLAG(U_FORMAT_CHAR) -#define _Co FLAG(U_PRIVATE_USE_CHAR) -#define _Cs FLAG(U_SURROGATE) -#define _Pd FLAG(U_DASH_PUNCTUATION) -#define _Ps FLAG(U_START_PUNCTUATION) -/* #define _Pe FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */ -/* #define _Pc FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */ -#define _Po FLAG(U_OTHER_PUNCTUATION) -#define _Sm FLAG(U_MATH_SYMBOL) -#define _Sc FLAG(U_CURRENCY_SYMBOL) -#define _Sk FLAG(U_MODIFIER_SYMBOL) -#define _So FLAG(U_OTHER_SYMBOL) -#define _Pi FLAG(U_INITIAL_PUNCTUATION) -/* #define _Pf FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */ - /** Some code points. @internal */ enum { TAB =0x0009, LF =0x000a, FF =0x000c, CR =0x000d, - U_A =0x0041, - U_F =0x0046, - U_Z =0x005a, - U_a =0x0061, - U_f =0x0066, - U_z =0x007a, - DEL =0x007f, - NL =0x0085, NBSP =0x00a0, CGJ =0x034f, FIGURESP=0x2007, @@ -367,15 +324,6 @@ enum { ZWJ =0x200d, RLM =0x200f, NNBSP =0x202f, - WJ =0x2060, - INHSWAP =0x206a, - NOMDIG =0x206f, - U_FW_A =0xff21, - U_FW_F =0xff26, - U_FW_Z =0xff3a, - U_FW_a =0xff41, - U_FW_f =0xff46, - U_FW_z =0xff5a, ZWNBSP =0xfeff }; diff --git a/deps/icu-small/source/common/uresbund.cpp b/deps/icu-small/source/common/uresbund.cpp index 743df1f8c505ce..73c4b44037e8aa 100644 --- a/deps/icu-small/source/common/uresbund.cpp +++ b/deps/icu-small/source/common/uresbund.cpp @@ -91,6 +91,15 @@ static UBool chopLocale(char *name) { return FALSE; } +/** + * Called to check whether a name without '_' needs to be checked for a parent. + * Some code had assumed that locale IDs with '_' could not have a non-root parent. + * We may want a better way of doing this. + */ +static UBool mayHaveParent(char *name) { + return (name[0] != 0 && uprv_strstr("nb nn",name) != nullptr); +} + /** * Internal function */ @@ -529,8 +538,8 @@ loadParentsExceptRoot(UResourceDataEntry *&t1, char name[], int32_t nameCapacity, UBool usingUSRData, char usrDataPath[], UErrorCode *status) { if (U_FAILURE(*status)) { return FALSE; } - UBool hasChopped = TRUE; - while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback && + UBool checkParent = TRUE; + while (checkParent && t1->fParent == NULL && !t1->fData.noFallback && res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) { Resource parentRes = res_getResource(&t1->fData, "%%Parent"); if (parentRes != RES_BOGUS) { // An explicit parent was found. @@ -573,7 +582,7 @@ loadParentsExceptRoot(UResourceDataEntry *&t1, } } t1 = t2; - hasChopped = chopLocale(name); + checkParent = chopLocale(name) || mayHaveParent(name); } return TRUE; } @@ -692,7 +701,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID, } } } - if (hasChopped && !isRoot) { + if ((hasChopped || mayHaveParent(name)) && !isRoot) { if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { goto finish; } @@ -716,7 +725,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID, hasRealData = TRUE; isDefault = TRUE; // TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path? - if (hasChopped && !isRoot) { + if ((hasChopped || mayHaveParent(name)) && !isRoot) { if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { goto finish; } @@ -1908,6 +1917,8 @@ ures_getByKeyWithFallback(const UResourceBundle *resB, } else { break; } + } else if (res == RES_BOGUS) { + break; } } while(*myPath); /* Continue until the whole path is consumed */ } @@ -3019,7 +3030,7 @@ ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status) U_CAPI UBool U_EXPORT2 ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){ if(res1==NULL || res2==NULL){ - return res1==res2; /* pointer comparision */ + return res1==res2; /* pointer comparison */ } if(res1->fKey==NULL|| res2->fKey==NULL){ return (res1->fKey==res2->fKey); diff --git a/deps/icu-small/source/common/uresdata.cpp b/deps/icu-small/source/common/uresdata.cpp index 4e3309f497522c..26a2fd91a6fdbd 100644 --- a/deps/icu-small/source/common/uresdata.cpp +++ b/deps/icu-small/source/common/uresdata.cpp @@ -960,14 +960,6 @@ res_findResource(const ResourceData *pResData, Resource r, char** path, const ch if(URES_IS_TABLE(type)) { *key = pathP; t2 = res_getTableItemByKey(pResData, t1, &indexR, key); - if(t2 == RES_BOGUS) { - /* if we fail to get the resource by key, maybe we got an index */ - indexR = uprv_strtol(pathP, &closeIndex, 10); - if(indexR >= 0 && *closeIndex == 0 && (*pathP != '0' || closeIndex - pathP == 1)) { - /* if we indeed have an index, try to get the item by index */ - t2 = res_getTableItemByIndex(pResData, t1, indexR, key); - } // else t2 is already RES_BOGUS - } } else if(URES_IS_ARRAY(type)) { indexR = uprv_strtol(pathP, &closeIndex, 10); if(indexR >= 0 && *closeIndex == 0) { diff --git a/deps/icu-small/source/common/uresimp.h b/deps/icu-small/source/common/uresimp.h index 12154dcb7c68d4..cfd15a2dde10e2 100644 --- a/deps/icu-small/source/common/uresimp.h +++ b/deps/icu-small/source/common/uresimp.h @@ -270,11 +270,13 @@ ures_getByKeyWithFallback(const UResourceBundle *resB, * function can perform fallback on the sub-resources of the table. * @param resB a resource * @param inKey a key associated with the requested resource + * @param len if not NULL, used to return the length of the string * @param status: fills in the outgoing error code * could be U_MISSING_RESOURCE_ERROR if the key is not found * could be a non-failing error * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it + * @return returns a pointer to a zero-terminated UChar array which lives in a + * memory mapped/DLL file. */ U_CAPI const UChar* U_EXPORT2 ures_getStringByKeyWithFallback(const UResourceBundle *resB, diff --git a/deps/icu-small/source/common/uset.cpp b/deps/icu-small/source/common/uset.cpp index 265a300b19060c..ca7e80b082cb36 100644 --- a/deps/icu-small/source/common/uset.cpp +++ b/deps/icu-small/source/common/uset.cpp @@ -116,6 +116,12 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen) { ((UnicodeSet*) set)->UnicodeSet::remove(s); } +U_CAPI void U_EXPORT2 +uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length) { + UnicodeString s(length==-1, str, length); + ((UnicodeSet*) set)->UnicodeSet::removeAll(s); +} + U_CAPI void U_EXPORT2 uset_removeAll(USet* set, const USet* remove) { ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove); @@ -126,6 +132,18 @@ uset_retain(USet* set, UChar32 start, UChar32 end) { ((UnicodeSet*) set)->UnicodeSet::retain(start, end); } +U_CAPI void U_EXPORT2 +uset_retainString(USet *set, const UChar *str, int32_t length) { + UnicodeString s(length==-1, str, length); + ((UnicodeSet*) set)->UnicodeSet::retain(s); +} + +U_CAPI void U_EXPORT2 +uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length) { + UnicodeString s(length==-1, str, length); + ((UnicodeSet*) set)->UnicodeSet::retainAll(s); +} + U_CAPI void U_EXPORT2 uset_retainAll(USet* set, const USet* retain) { ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain); @@ -141,6 +159,23 @@ uset_complement(USet* set) { ((UnicodeSet*) set)->UnicodeSet::complement(); } +U_CAPI void U_EXPORT2 +uset_complementRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet*) set)->UnicodeSet::complement(start, end); +} + +U_CAPI void U_EXPORT2 +uset_complementString(USet *set, const UChar *str, int32_t length) { + UnicodeString s(length==-1, str, length); + ((UnicodeSet*) set)->UnicodeSet::complement(s); +} + +U_CAPI void U_EXPORT2 +uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length) { + UnicodeString s(length==-1, str, length); + ((UnicodeSet*) set)->UnicodeSet::complementAll(s); +} + U_CAPI void U_EXPORT2 uset_complementAll(USet* set, const USet* complement) { ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement); diff --git a/deps/icu-small/source/common/usprep.cpp b/deps/icu-small/source/common/usprep.cpp index f1c075a391116f..b84a5d93ea4987 100644 --- a/deps/icu-small/source/common/usprep.cpp +++ b/deps/icu-small/source/common/usprep.cpp @@ -575,7 +575,7 @@ usprep_map( const UStringPrepProfile* profile, } }else if(type==USPREP_DELETE){ - // just consume the codepoint and contine + // just consume the codepoint and continue continue; } //copy the code point into destination diff --git a/deps/icu-small/source/common/ustr_wcs.cpp b/deps/icu-small/source/common/ustr_wcs.cpp index 0372824f21f615..9a49a67744b6f9 100644 --- a/deps/icu-small/source/common/ustr_wcs.cpp +++ b/deps/icu-small/source/common/ustr_wcs.cpp @@ -364,7 +364,7 @@ _strFromWCS( UChar *dest, } /* we have found a null so convert the - * chunk from begining of non-null char to null + * chunk from beginning of non-null char to null */ retVal = uprv_wcstombs(pCSrc,pSrc,remaining); @@ -387,7 +387,7 @@ _strFromWCS( UChar *dest, * null terminate it and convert wchar_ts to chars */ if(nulLen >= _STACK_BUFFER_CAPACITY){ - /* Should rarely occcur */ + /* Should rarely occur */ /* allocate new buffer buffer */ pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); if(pWStack==NULL){ diff --git a/deps/icu-small/source/common/utext.cpp b/deps/icu-small/source/common/utext.cpp index ac828fbefa81bb..0dfef36d264176 100644 --- a/deps/icu-small/source/common/utext.cpp +++ b/deps/icu-small/source/common/utext.cpp @@ -382,7 +382,7 @@ utext_previous32From(UText *ut, int64_t index) { // UChar32 cPrev; // The character preceding cCurr, which is what we will return. - // Address the chunk containg the position preceding the incoming index + // Address the chunk containing the position preceding the incoming index // A tricky edge case: // We try to test the requested native index against the chunkNativeStart to determine // whether the character preceding the one at the index is in the current chunk. @@ -894,7 +894,7 @@ struct UTF8Buf { // one for a supplementary starting in the last normal position, // and one for an entry for the buffer limit position. uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to - // correspoding offset in filled part of buf. + // corresponding offset in filled part of buf. int32_t align; }; @@ -1545,7 +1545,7 @@ utf8TextMapOffsetToNative(const UText *ut) { } // -// Map a native index to the corrsponding chunk offset +// Map a native index to the corresponding chunk offset // static int32_t U_CALLCONV utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) { diff --git a/deps/icu-small/source/common/util.h b/deps/icu-small/source/common/util.h index 2a709e408a2698..74fdd75982cc5a 100644 --- a/deps/icu-small/source/common/util.h +++ b/deps/icu-small/source/common/util.h @@ -13,10 +13,10 @@ #ifndef ICU_UTIL_H #define ICU_UTIL_H -#include "unicode/utypes.h" -#include "unicode/uobject.h" +#include "charstr.h" #include "unicode/unistr.h" - +#include "unicode/uobject.h" +#include "unicode/utypes.h" //-------------------------------------------------------------------- // class ICU_Utility // i18n utility functions, scoped into the class ICU_Utility. diff --git a/deps/icu-small/source/common/utracimp.h b/deps/icu-small/source/common/utracimp.h index 84e7031da8bc17..42e92749d65f02 100644 --- a/deps/icu-small/source/common/utracimp.h +++ b/deps/icu-small/source/common/utracimp.h @@ -193,7 +193,7 @@ UPRV_BLOCK_MACRO_BEGIN { \ * Trace statement for each exit point of a function that has a UTRACE_ENTRY() * statement, and that returns a value. * - * @param val The function's return value, int32_t or comatible type. + * @param val The function's return value, int32_t or compatible type. * * @internal */ diff --git a/deps/icu-small/source/common/uvector.cpp b/deps/icu-small/source/common/uvector.cpp index ad3a813e3706ac..39a92e07c01911 100644 --- a/deps/icu-small/source/common/uvector.cpp +++ b/deps/icu-small/source/common/uvector.cpp @@ -312,7 +312,7 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const { } else { for (i=startIndex; i and a timezone - // behind UTC is Etc/GMT+. - int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+ld", -utcOffsetMins / 60); + // Important note on the sign convention for zones: + // + // From https://en.wikipedia.org/wiki/Tz_database#Area + // "In order to conform with the POSIX style, those zone names beginning with "Etc/GMT" have their sign reversed + // from the standard ISO 8601 convention. In the "Etc" area, zones west of GMT have a positive sign and those + // east have a negative sign in their name (e.g "Etc/GMT-14" is 14 hours ahead of GMT)." + // + // Regarding the POSIX style, from https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html + // "The offset specifies the time value you must add to the local time to get a Coordinated Universal Time value." + // + // However, the Bias value in DYNAMIC_TIME_ZONE_INFORMATION *already* follows the POSIX convention. + // + // From https://docs.microsoft.com/en-us/windows/win32/api/timezoneapi/ns-timezoneapi-dynamic_time_zone_information + // "The bias is the difference, in minutes, between Coordinated Universal Time (UTC) and + // local time. All translations between UTC and local time are based on the following formula: + // UTC = local time + bias" + // + // For example, a time zone that is 3 hours ahead of UTC (UTC+03:00) would have a Bias value of -180, and the + // corresponding time zone ID would be "Etc/GMT-3". (So there is no need to negate utcOffsetMins below.) + int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+ld", utcOffsetMins / 60); if (ret > 0 && ret < UPRV_LENGTHOF(gmtOffsetTz)) { return uprv_strdup(gmtOffsetTz); } diff --git a/deps/icu-small/source/data/in/icudt68l.dat.bz2 b/deps/icu-small/source/data/in/icudt69l.dat.bz2 similarity index 55% rename from deps/icu-small/source/data/in/icudt68l.dat.bz2 rename to deps/icu-small/source/data/in/icudt69l.dat.bz2 index 8fd32b7471d648..752a98acd8237d 100644 Binary files a/deps/icu-small/source/data/in/icudt68l.dat.bz2 and b/deps/icu-small/source/data/in/icudt69l.dat.bz2 differ diff --git a/deps/icu-small/source/i18n/basictz.cpp b/deps/icu-small/source/i18n/basictz.cpp index 6cd93f4d3c2c83..4d07d5e1a74c3b 100644 --- a/deps/icu-small/source/i18n/basictz.cpp +++ b/deps/icu-small/source/i18n/basictz.cpp @@ -423,7 +423,7 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, goto error; } } else { - // Colllect transitions after the start time + // Collect transitions after the start time int32_t startTimes; DateTimeRule::TimeRuleType timeType; int32_t idx; @@ -547,14 +547,23 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, } void -BasicTimeZone::getOffsetFromLocal(UDate /*date*/, int32_t /*nonExistingTimeOpt*/, int32_t /*duplicatedTimeOpt*/, - int32_t& /*rawOffset*/, int32_t& /*dstOffset*/, UErrorCode& status) const { +BasicTimeZone::getOffsetFromLocal(UDate /*date*/, UTimeZoneLocalOption /*nonExistingTimeOpt*/, + UTimeZoneLocalOption /*duplicatedTimeOpt*/, + int32_t& /*rawOffset*/, int32_t& /*dstOffset*/, + UErrorCode& status) const { if (U_FAILURE(status)) { return; } status = U_UNSUPPORTED_ERROR; } +void BasicTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, + UErrorCode& status) const { + getOffsetFromLocal(date, (UTimeZoneLocalOption)nonExistingTimeOpt, + (UTimeZoneLocalOption)duplicatedTimeOpt, rawOffset, dstOffset, status); +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/calendar.cpp b/deps/icu-small/source/i18n/calendar.cpp index df256c154d7a4f..6842f1cfc63c06 100644 --- a/deps/icu-small/source/i18n/calendar.cpp +++ b/deps/icu-small/source/i18n/calendar.cpp @@ -308,7 +308,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) { calTypeBuf[0] = 0; if (U_SUCCESS(status) && order != NULL) { - // the first calender type is the default for the region + // the first calendar type is the default for the region int32_t len = 0; const UChar *uCalType = ures_getStringByIndex(order, 0, &len, &status); if (len < (int32_t)sizeof(calTypeBuf)) { @@ -2291,7 +2291,7 @@ int32_t Calendar::fieldDifference(UDate targetMs, UCalendarDateFields field, UEr if (U_FAILURE(ec)) return 0; int32_t min = 0; double startMs = getTimeInMillis(ec); - // Always add from the start millis. This accomodates + // Always add from the start millis. This accommodates // operations like adding years from February 29, 2000 up to // February 29, 2004. If 1, 1, 1, 1 is added to the year // field, the DOM gets pinned to 28 and stays there, giving an diff --git a/deps/icu-small/source/i18n/collationbuilder.cpp b/deps/icu-small/source/i18n/collationbuilder.cpp index 45ac6ddcd5839b..b411b4f12f6bf9 100644 --- a/deps/icu-small/source/i18n/collationbuilder.cpp +++ b/deps/icu-small/source/i18n/collationbuilder.cpp @@ -688,7 +688,7 @@ CollationBuilder::addRelation(int32_t strength, const UnicodeString &prefix, // A Hangul syllable completely inside a contraction is ok. } // Note: If there is a prefix, then the parser checked that - // both the prefix and the string beging with NFC boundaries (not Jamo V or T). + // both the prefix and the string begin with NFC boundaries (not Jamo V or T). // Therefore: prefix.isEmpty() || !isJamoVOrT(nfdString.charAt(0)) // (While handling a Hangul syllable, prefixes on Jamo V or T // would not see the previous Jamo of that syllable.) diff --git a/deps/icu-small/source/i18n/collationdatabuilder.cpp b/deps/icu-small/source/i18n/collationdatabuilder.cpp index fdd264f8aaa65a..613124a6f58c50 100644 --- a/deps/icu-small/source/i18n/collationdatabuilder.cpp +++ b/deps/icu-small/source/i18n/collationdatabuilder.cpp @@ -255,12 +255,18 @@ DataBuilderCollationIterator::getDataCE32(UChar32 c) const { uint32_t DataBuilderCollationIterator::getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return 0; } U_ASSERT(Collation::hasCE32Tag(ce32, Collation::BUILDER_DATA_TAG)); if((ce32 & CollationDataBuilder::IS_BUILDER_JAMO_CE32) != 0) { UChar32 jamo = Collation::indexFromCE32(ce32); return utrie2_get32(builder.trie, jamo); } else { ConditionalCE32 *cond = builder.getConditionalCE32ForCE32(ce32); + if (cond == nullptr) { + errorCode = U_INTERNAL_PROGRAM_ERROR; + // TODO: ICU-21531 figure out why this happens. + return 0; + } if(cond->builtCE32 == Collation::NO_CE32) { // Build the context-sensitive mappings into their runtime form and cache the result. cond->builtCE32 = builder.buildContext(cond, errorCode); diff --git a/deps/icu-small/source/i18n/cpdtrans.cpp b/deps/icu-small/source/i18n/cpdtrans.cpp index 624ae431d35aa6..1d3e6209ec7d36 100644 --- a/deps/icu-small/source/i18n/cpdtrans.cpp +++ b/deps/icu-small/source/i18n/cpdtrans.cpp @@ -282,6 +282,7 @@ void CompoundTransliterator::freeTransliterators(void) { CompoundTransliterator& CompoundTransliterator::operator=( const CompoundTransliterator& t) { + if (this == &t) { return *this; } // self-assignment: no-op Transliterator::operator=(t); int32_t i = 0; UBool failed = FALSE; diff --git a/deps/icu-small/source/i18n/csrmbcs.cpp b/deps/icu-small/source/i18n/csrmbcs.cpp index 46d626bb3f4ffe..c2f768f7889fe2 100644 --- a/deps/icu-small/source/i18n/csrmbcs.cpp +++ b/deps/icu-small/source/i18n/csrmbcs.cpp @@ -186,7 +186,7 @@ int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars if (doubleByteCharCount == 0 && totalCharCount < 10) { // There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes. // We don't have enough data to have any confidence. - // Statistical analysis of single byte non-ASCII charcters would probably help here. + // Statistical analysis of single byte non-ASCII characters would probably help here. confidence = 0; } else { diff --git a/deps/icu-small/source/i18n/csrucode.cpp b/deps/icu-small/source/i18n/csrucode.cpp index b84011c259095a..01d565ec3a9bcc 100644 --- a/deps/icu-small/source/i18n/csrucode.cpp +++ b/deps/icu-small/source/i18n/csrucode.cpp @@ -155,7 +155,7 @@ UBool CharsetRecog_UTF_32::match(InputText* textIn, CharsetMatch *results) const } else if (numValid > 0 && numInvalid == 0) { confidence = 80; } else if (numValid > numInvalid*10) { - // Probably corruput UTF-32BE data. Valid sequences aren't likely by chance. + // Probably corrupt UTF-32BE data. Valid sequences aren't likely by chance. confidence = 25; } diff --git a/deps/icu-small/source/i18n/csrutf8.cpp b/deps/icu-small/source/i18n/csrutf8.cpp index bc06fa8bb8dd4e..914be35b1f8222 100644 --- a/deps/icu-small/source/i18n/csrutf8.cpp +++ b/deps/icu-small/source/i18n/csrutf8.cpp @@ -99,7 +99,7 @@ UBool CharsetRecog_UTF8::match(InputText* input, CharsetMatch *results) const { // accepts ASCII with confidence = 10. confidence = 15; } else if (numValid > numInvalid*10) { - // Probably corruput utf-8 data. Valid sequences aren't likely by chance. + // Probably corrupt utf-8 data. Valid sequences aren't likely by chance. confidence = 25; } diff --git a/deps/icu-small/source/i18n/decContext.cpp b/deps/icu-small/source/i18n/decContext.cpp index bead83efff7b83..e22823034fab88 100644 --- a/deps/icu-small/source/i18n/decContext.cpp +++ b/deps/icu-small/source/i18n/decContext.cpp @@ -150,7 +150,7 @@ U_CAPI uInt U_EXPORT2 uprv_decContextGetStatus(decContext *context) { /* newstatus is the source for the bits to be restored */ /* mask indicates the bits to be restored (the status bit that */ /* corresponds to each 1 bit in the mask is set to the value of */ -/* the correspnding bit in newstatus) */ +/* the corresponding bit in newstatus) */ /* returns context */ /* */ /* No error is possible. */ diff --git a/deps/icu-small/source/i18n/decNumber.cpp b/deps/icu-small/source/i18n/decNumber.cpp index c19493bdf3bbee..2f9ffafd571062 100644 --- a/deps/icu-small/source/i18n/decNumber.cpp +++ b/deps/icu-small/source/i18n/decNumber.cpp @@ -2203,7 +2203,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberPower(decNumber *res, const decNumber /* if a negative power the constant 1 is needed, and if not subset */ /* invert the lhs now rather than inverting the result later */ if (decNumberIsNegative(rhs)) { /* was a **-n [hence digits>0] */ - decNumber *inv=invbuff; /* asssume use fixed buffer */ + decNumber *inv=invbuff; /* assume use fixed buffer */ uprv_decNumberCopy(&dnOne, dac); /* dnOne=1; [needed now or later] */ #if DECSUBSET if (set->extended) { /* need to calculate 1/lhs */ @@ -5242,7 +5242,7 @@ static decNumber * decMultiplyOp(decNumber *res, const decNumber *lhs, /* exp(-x) where x can be the tiniest number (Ntiny). */ /* */ /* 2. Normalizing x to be <=0.1 (instead of <=1) reduces loop */ -/* iterations by appoximately a third with additional (although */ +/* iterations by approximately a third with additional (although */ /* diminishing) returns as the range is reduced to even smaller */ /* fractions. However, h (the power of 10 used to correct the */ /* result at the end, see below) must be kept <=8 as otherwise */ diff --git a/deps/icu-small/source/i18n/decNumberLocal.h b/deps/icu-small/source/i18n/decNumberLocal.h index e8d1b38653eb9d..1c5a79b7021f0c 100644 --- a/deps/icu-small/source/i18n/decNumberLocal.h +++ b/deps/icu-small/source/i18n/decNumberLocal.h @@ -146,7 +146,7 @@ /* ---------------------------------------------------------------- */ - /* Definitions for arbitary-precision modules (only valid after */ + /* Definitions for arbitrary-precision modules (only valid after */ /* decNumber.h has been included) */ /* ---------------------------------------------------------------- */ diff --git a/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp index a95910df0421d1..372e17dc7bd638 100644 --- a/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp +++ b/deps/icu-small/source/i18n/double-conversion-bignum-dtoa.cpp @@ -384,7 +384,7 @@ static void BignumToFixed(int requested_digits, int* decimal_point, // Returns an estimation of k such that 10^(k-1) <= v < 10^k where // v = f * 2^exponent and 2^52 <= f < 2^53. // v is hence a normalized double with the given exponent. The output is an -// approximation for the exponent of the decimal approimation .digits * 10^k. +// approximation for the exponent of the decimal approximation .digits * 10^k. // // The result might undershoot by 1 in which case 10^k <= v < 10^k+1. // Note: this property holds for v's upper boundary m+ too. @@ -562,7 +562,7 @@ static void InitialScaledStartValuesNegativeExponentNegativePower( // // Let ep == estimated_power, then the returned values will satisfy: // v / 10^ep = numerator / denominator. -// v's boundarys m- and m+: +// v's boundaries m- and m+: // m- / 10^ep == v / 10^ep - delta_minus / denominator // m+ / 10^ep == v / 10^ep + delta_plus / denominator // Or in other words: diff --git a/deps/icu-small/source/i18n/double-conversion-double-to-string.cpp b/deps/icu-small/source/i18n/double-conversion-double-to-string.cpp index 44c176f4f9c958..90ba4360600472 100644 --- a/deps/icu-small/source/i18n/double-conversion-double-to-string.cpp +++ b/deps/icu-small/source/i18n/double-conversion-double-to-string.cpp @@ -107,19 +107,19 @@ void DoubleToStringConverter::CreateExponentialRepresentation( result_builder->AddCharacter('+'); } } - if (exponent == 0) { - result_builder->AddCharacter('0'); - return; - } DOUBLE_CONVERSION_ASSERT(exponent < 1e4); // Changing this constant requires updating the comment of DoubleToStringConverter constructor const int kMaxExponentLength = 5; char buffer[kMaxExponentLength + 1]; buffer[kMaxExponentLength] = '\0'; int first_char_pos = kMaxExponentLength; - while (exponent > 0) { - buffer[--first_char_pos] = '0' + (exponent % 10); - exponent /= 10; + if (exponent == 0) { + buffer[--first_char_pos] = '0'; + } else { + while (exponent > 0) { + buffer[--first_char_pos] = '0' + (exponent % 10); + exponent /= 10; + } } // Add prefix '0' to make exponent width >= min(min_exponent_with_, kMaxExponentLength) // For example: convert 1e+9 -> 1e+09, if min_exponent_with_ is set to 2 @@ -342,9 +342,21 @@ bool DoubleToStringConverter::ToPrecision(double value, int exponent = decimal_point - 1; int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; - if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || + bool as_exponential = + (-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || (decimal_point - precision + extra_zero > - max_trailing_padding_zeroes_in_precision_mode_)) { + max_trailing_padding_zeroes_in_precision_mode_); + if ((flags_ & NO_TRAILING_ZERO) != 0) { + // Truncate trailing zeros that occur after the decimal point (if exponential, + // that is everything after the first digit). + int stop = as_exponential ? 1 : std::max(1, decimal_point); + while (decimal_rep_length > stop && decimal_rep[decimal_rep_length - 1] == '0') { + --decimal_rep_length; + } + // Clamp precision to avoid the code below re-adding the zeros. + precision = std::min(precision, decimal_rep_length); + } + if (as_exponential) { // Fill buffer to contain 'precision' digits. // Usually the buffer is already at the correct length, but 'DoubleToAscii' // is allowed to return less characters. diff --git a/deps/icu-small/source/i18n/double-conversion-double-to-string.h b/deps/icu-small/source/i18n/double-conversion-double-to-string.h index 27bd86784895f8..73ff48f109ecd5 100644 --- a/deps/icu-small/source/i18n/double-conversion-double-to-string.h +++ b/deps/icu-small/source/i18n/double-conversion-double-to-string.h @@ -48,12 +48,11 @@ namespace double_conversion { class DoubleToStringConverter { public: -#if 0 // not needed for ICU // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the // function returns false. static const int kMaxFixedDigitsBeforePoint = 60; - static const int kMaxFixedDigitsAfterPoint = 60; + static const int kMaxFixedDigitsAfterPoint = 100; // When calling ToExponential with a requested_digits // parameter > kMaxExponentialDigits then the function returns false. @@ -65,12 +64,36 @@ class DoubleToStringConverter { static const int kMinPrecisionDigits = 1; static const int kMaxPrecisionDigits = 120; + // The maximal number of digits that are needed to emit a double in base 10. + // A higher precision can be achieved by using more digits, but the shortest + // accurate representation of any double will never use more digits than + // kBase10MaximalLength. + // Note that DoubleToAscii null-terminates its input. So the given buffer + // should be at least kBase10MaximalLength + 1 characters long. + static const int kBase10MaximalLength = 17; + + // The maximal number of digits that are needed to emit a single in base 10. + // A higher precision can be achieved by using more digits, but the shortest + // accurate representation of any single will never use more digits than + // kBase10MaximalLengthSingle. + static const int kBase10MaximalLengthSingle = 9; + + // The length of the longest string that 'ToShortest' can produce when the + // converter is instantiated with EcmaScript defaults (see + // 'EcmaScriptConverter') + // This value does not include the trailing '\0' character. + // This amount of characters is needed for negative values that hit the + // 'decimal_in_shortest_low' limit. For example: "-0.0000033333333333333333" + static const int kMaxCharsEcmaScriptShortest = 25; + +#if 0 // not needed for ICU enum Flags { NO_FLAGS = 0, EMIT_POSITIVE_EXPONENT_SIGN = 1, EMIT_TRAILING_DECIMAL_POINT = 2, EMIT_TRAILING_ZERO_AFTER_POINT = 4, - UNIQUE_ZERO = 8 + UNIQUE_ZERO = 8, + NO_TRAILING_ZERO = 16 }; // Flags should be a bit-or combination of the possible Flags-enum. @@ -82,9 +105,13 @@ class DoubleToStringConverter { // Example: 2345.0 is converted to "2345.". // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point // emits a trailing '0'-character. This flag requires the - // EXMIT_TRAILING_DECIMAL_POINT flag. + // EMIT_TRAILING_DECIMAL_POINT flag. // Example: 2345.0 is converted to "2345.0". // - UNIQUE_ZERO: "-0.0" is converted to "0.0". + // - NO_TRAILING_ZERO: Trailing zeros are removed from the fractional portion + // of the result in precision mode. Matches printf's %g. + // When EMIT_TRAILING_ZERO_AFTER_POINT is also given, one trailing zero is + // preserved. // // Infinity symbol and nan_symbol provide the string representation for these // special values. If the string is NULL and the special value is encountered @@ -152,6 +179,14 @@ class DoubleToStringConverter { } // Returns a converter following the EcmaScript specification. + // + // Flags: UNIQUE_ZERO and EMIT_POSITIVE_EXPONENT_SIGN. + // Special values: "Infinity" and "NaN". + // Lower case 'e' for exponential values. + // decimal_in_shortest_low: -6 + // decimal_in_shortest_high: 21 + // max_leading_padding_zeroes_in_precision_mode: 6 + // max_trailing_padding_zeroes_in_precision_mode: 0 static const DoubleToStringConverter& EcmaScriptConverter(); // Computes the shortest string of digits that correctly represent the input @@ -177,6 +212,21 @@ class DoubleToStringConverter { // Returns true if the conversion succeeds. The conversion always succeeds // except when the input value is special and no infinity_symbol or // nan_symbol has been given to the constructor. + // + // The length of the longest result is the maximum of the length of the + // following string representations (each with possible examples): + // - NaN and negative infinity: "NaN", "-Infinity", "-inf". + // - -10^(decimal_in_shortest_high - 1): + // "-100000000000000000000", "-1000000000000000.0" + // - the longest string in range [0; -10^decimal_in_shortest_low]. Generally, + // this string is 3 + kBase10MaximalLength - decimal_in_shortest_low. + // (Sign, '0', decimal point, padding zeroes for decimal_in_shortest_low, + // and the significant digits). + // "-0.0000033333333333333333", "-0.0012345678901234567" + // - the longest exponential representation. (A negative number with + // kBase10MaximalLength significant digits). + // "-1.7976931348623157e+308", "-1.7976931348623157E308" + // In addition, the buffer must be able to hold the trailing '\0' character. bool ToShortest(double value, StringBuilder* result_builder) const { return ToShortestIeeeNumber(value, result_builder, SHORTEST); } @@ -217,9 +267,11 @@ class DoubleToStringConverter { // been provided to the constructor, // - 'value' > 10^kMaxFixedDigitsBeforePoint, or // - 'requested_digits' > kMaxFixedDigitsAfterPoint. - // The last two conditions imply that the result will never contain more than - // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters + // The last two conditions imply that the result for non-special values never + // contains more than + // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters // (one additional character for the sign, and one for the decimal point). + // In addition, the buffer must be able to hold the trailing '\0' character. bool ToFixed(double value, int requested_digits, StringBuilder* result_builder) const; @@ -248,14 +300,17 @@ class DoubleToStringConverter { // - the input value is special and no infinity_symbol or nan_symbol has // been provided to the constructor, // - 'requested_digits' > kMaxExponentialDigits. - // The last condition implies that the result will never contain more than + // + // The last condition implies that the result never contains more than // kMaxExponentialDigits + 8 characters (the sign, the digit before the // decimal point, the decimal point, the exponent character, the // exponent's sign, and at most 3 exponent digits). + // In addition, the buffer must be able to hold the trailing '\0' character. bool ToExponential(double value, int requested_digits, StringBuilder* result_builder) const; + // Computes 'precision' leading digits of the given 'value' and returns them // either in exponential or decimal format, depending on // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the @@ -287,9 +342,11 @@ class DoubleToStringConverter { // been provided to the constructor, // - precision < kMinPericisionDigits // - precision > kMaxPrecisionDigits - // The last condition implies that the result will never contain more than + // + // The last condition implies that the result never contains more than // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the // exponent character, the exponent's sign, and at most 3 exponent digits). + // In addition, the buffer must be able to hold the trailing '\0' character. bool ToPrecision(double value, int precision, StringBuilder* result_builder) const; @@ -310,14 +367,6 @@ class DoubleToStringConverter { PRECISION }; - // The maximal number of digits that are needed to emit a double in base 10. - // A higher precision can be achieved by using more digits, but the shortest - // accurate representation of any double will never use more digits than - // kBase10MaximalLength. - // Note that DoubleToAscii null-terminates its input. So the given buffer - // should be at least kBase10MaximalLength + 1 characters long. - static const int kBase10MaximalLength = 17; - // Converts the given double 'v' to digit characters. 'v' must not be NaN, // +Infinity, or -Infinity. In SHORTEST_SINGLE-mode this restriction also // applies to 'v' after it has been casted to a single-precision float. That diff --git a/deps/icu-small/source/i18n/double-conversion-string-to-double.cpp b/deps/icu-small/source/i18n/double-conversion-string-to-double.cpp index 548cad1f302241..3275b9e552f66b 100644 --- a/deps/icu-small/source/i18n/double-conversion-string-to-double.cpp +++ b/deps/icu-small/source/i18n/double-conversion-string-to-double.cpp @@ -51,6 +51,18 @@ // ICU PATCH: Wrap in ICU namespace U_NAMESPACE_BEGIN +#ifdef _MSC_VER +# if _MSC_VER >= 1900 +// Fix MSVC >= 2015 (_MSC_VER == 1900) warning +// C4244: 'argument': conversion from 'const uc16' to 'char', possible loss of data +// against Advance and friends, when instantiated with **it as char, not uc16. + __pragma(warning(disable: 4244)) +# endif +# if _MSC_VER <= 1700 // VS2012, see IsDecimalDigitForRadix warning fix, below +# define VS2012_RADIXWARN +# endif +#endif + namespace double_conversion { namespace { @@ -170,9 +182,9 @@ static double SignedZero(bool sign) { // // The function is small and could be inlined, but VS2012 emitted a warning // because it constant-propagated the radix and concluded that the last -// condition was always true. By moving it into a separate function the -// compiler wouldn't warn anymore. -#ifdef _MSC_VER +// condition was always true. Moving it into a separate function and +// suppressing optimisation keeps the compiler from warning. +#ifdef VS2012_RADIXWARN #pragma optimize("",off) static bool IsDecimalDigitForRadix(int c, int radix) { return '0' <= c && c <= '9' && (c - '0') < radix; @@ -738,11 +750,17 @@ double StringToDoubleConverter::StringToIeee( DOUBLE_CONVERSION_ASSERT(buffer_pos < kBufferSize); buffer[buffer_pos] = '\0'; + // Code above ensures there are no leading zeros and the buffer has fewer than + // kMaxSignificantDecimalDigits characters. Trim trailing zeros. + Vector chars(buffer, buffer_pos); + chars = TrimTrailingZeros(chars); + exponent += buffer_pos - chars.length(); + double converted; if (read_as_double) { - converted = Strtod(Vector(buffer, buffer_pos), exponent); + converted = StrtodTrimmed(chars, exponent); } else { - converted = Strtof(Vector(buffer, buffer_pos), exponent); + converted = StrtofTrimmed(chars, exponent); } *processed_characters_count = static_cast(current - input); return sign? -converted: converted; diff --git a/deps/icu-small/source/i18n/double-conversion-strtod.cpp b/deps/icu-small/source/i18n/double-conversion-strtod.cpp index ee6377782bb82f..9eeaa2e40fc73c 100644 --- a/deps/icu-small/source/i18n/double-conversion-strtod.cpp +++ b/deps/icu-small/source/i18n/double-conversion-strtod.cpp @@ -115,17 +115,6 @@ static Vector TrimLeadingZeros(Vector buffer) { return Vector(buffer.start(), 0); } - -static Vector TrimTrailingZeros(Vector buffer) { - for (int i = buffer.length() - 1; i >= 0; --i) { - if (buffer[i] != '0') { - return buffer.SubVector(0, i + 1); - } - } - return Vector(buffer.start(), 0); -} - - static void CutToMaxSignificantDigits(Vector buffer, int exponent, char* significant_buffer, @@ -216,12 +205,14 @@ static bool DoubleStrtod(Vector trimmed, int exponent, double* result) { #if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS) + // Avoid "unused parameter" warnings + (void) trimmed; + (void) exponent; + (void) result; // On x86 the floating-point stack can be 64 or 80 bits wide. If it is // 80 bits wide (as is the case on Linux) then double-rounding occurs and the // result is not accurate. // We know that Windows32 uses 64 bits and is therefore accurate. - // Note that the ARM simulator is compiled for 32bits. It therefore exhibits - // the same problem. return false; #else if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) { @@ -473,6 +464,11 @@ static bool IsNonZeroDigit(const char d) { return ('1' <= d) && (d <= '9'); } +#ifdef __has_cpp_attribute +#if __has_cpp_attribute(maybe_unused) +[[maybe_unused]] +#endif +#endif static bool AssertTrimmedDigits(const Vector& buffer) { for(int i = 0; i < buffer.length(); ++i) { if(!IsDigit(buffer[i])) { @@ -545,6 +541,12 @@ float Strtof(Vector buffer, int exponent) { TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, &trimmed, &updated_exponent); exponent = updated_exponent; + return StrtofTrimmed(trimmed, exponent); +} + +float StrtofTrimmed(Vector trimmed, int exponent) { + DOUBLE_CONVERSION_ASSERT(trimmed.length() <= kMaxSignificantDecimalDigits); + DOUBLE_CONVERSION_ASSERT(AssertTrimmedDigits(trimmed)); double double_guess; bool is_correct = ComputeGuess(trimmed, exponent, &double_guess); diff --git a/deps/icu-small/source/i18n/double-conversion-strtod.h b/deps/icu-small/source/i18n/double-conversion-strtod.h index 50ef746401a908..abfe00a333102c 100644 --- a/deps/icu-small/source/i18n/double-conversion-strtod.h +++ b/deps/icu-small/source/i18n/double-conversion-strtod.h @@ -54,11 +54,25 @@ double Strtod(Vector buffer, int exponent); // contain a dot or a sign. It must not start with '0', and must not be empty. float Strtof(Vector buffer, int exponent); -// For special use cases, the heart of the Strtod() function is also available -// separately, it assumes that 'trimmed' is as produced by TrimAndCut(), i.e. -// no leading or trailing zeros, also no lone zero, and not 'too many' digits. +// Same as Strtod, but assumes that 'trimmed' is already trimmed, as if run +// through TrimAndCut. That is, 'trimmed' must have no leading or trailing +// zeros, must not be a lone zero, and must not have 'too many' digits. double StrtodTrimmed(Vector trimmed, int exponent); +// Same as Strtof, but assumes that 'trimmed' is already trimmed, as if run +// through TrimAndCut. That is, 'trimmed' must have no leading or trailing +// zeros, must not be a lone zero, and must not have 'too many' digits. +float StrtofTrimmed(Vector trimmed, int exponent); + +inline Vector TrimTrailingZeros(Vector buffer) { + for (int i = buffer.length() - 1; i >= 0; --i) { + if (buffer[i] != '0') { + return buffer.SubVector(0, i + 1); + } + } + return Vector(buffer.start(), 0); +} + } // namespace double_conversion // ICU PATCH: Close ICU namespace diff --git a/deps/icu-small/source/i18n/double-conversion-utils.h b/deps/icu-small/source/i18n/double-conversion-utils.h index 8c6a0e16e005fe..c937463647135b 100644 --- a/deps/icu-small/source/i18n/double-conversion-utils.h +++ b/deps/icu-small/source/i18n/double-conversion-utils.h @@ -118,7 +118,7 @@ int main(int argc, char** argv) { defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \ defined(__hppa__) || defined(__ia64__) || \ defined(__mips__) || \ - defined(__nios2__) || \ + defined(__nios2__) || defined(__ghs) || \ defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ diff --git a/deps/icu-small/source/i18n/dtfmtsym.cpp b/deps/icu-small/source/i18n/dtfmtsym.cpp index 9dde66c1fba419..5a54e9ebbd4493 100644 --- a/deps/icu-small/source/i18n/dtfmtsym.cpp +++ b/deps/icu-small/source/i18n/dtfmtsym.cpp @@ -450,6 +450,7 @@ DateFormatSymbols::copyData(const DateFormatSymbols& other) { */ DateFormatSymbols& DateFormatSymbols::operator=(const DateFormatSymbols& other) { + if (this == &other) { return *this; } // self-assignment: no-op dispose(); copyData(other); @@ -2330,7 +2331,7 @@ DateFormatSymbols::initializeData(const Locale& locale, const char *type, UError // If format/narrow not available, use standalone/narrow assignArray(fNarrowMonths, fNarrowMonthsCount, fStandaloneNarrowMonths, fStandaloneNarrowMonthsCount); } else if (narrowMonthsEC != U_MISSING_RESOURCE_ERROR && standaloneNarrowMonthsEC == U_MISSING_RESOURCE_ERROR) { - // If standalone/narrow not availabe, use format/narrow + // If standalone/narrow not available, use format/narrow assignArray(fStandaloneNarrowMonths, fStandaloneNarrowMonthsCount, fNarrowMonths, fNarrowMonthsCount); } else if (narrowMonthsEC == U_MISSING_RESOURCE_ERROR && standaloneNarrowMonthsEC == U_MISSING_RESOURCE_ERROR) { // If neither is available, use format/abbreviated diff --git a/deps/icu-small/source/i18n/dtitvfmt.cpp b/deps/icu-small/source/i18n/dtitvfmt.cpp index d6ec501af88701..18253156ea7e0e 100644 --- a/deps/icu-small/source/i18n/dtitvfmt.cpp +++ b/deps/icu-small/source/i18n/dtitvfmt.cpp @@ -704,7 +704,7 @@ DateIntervalFormat::create(const Locale& locale, status = U_MEMORY_ALLOCATION_ERROR; delete dtitvinf; } else if ( U_FAILURE(status) ) { - // safe to delete f, although nothing acutally is saved + // safe to delete f, although nothing actually is saved delete f; f = 0; } @@ -863,6 +863,14 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { setPatternInfo(UCAL_DATE, nullptr, &pattern, fInfo->getDefaultOrder()); setPatternInfo(UCAL_MONTH, nullptr, &pattern, fInfo->getDefaultOrder()); setPatternInfo(UCAL_YEAR, nullptr, &pattern, fInfo->getDefaultOrder()); + + timeSkeleton.insert(0, CAP_G); + pattern = DateFormat::getBestPattern( + locale, timeSkeleton, status); + if ( U_FAILURE(status) ) { + return; + } + setPatternInfo(UCAL_ERA, nullptr, &pattern, fInfo->getDefaultOrder()); } else { // TODO: fall back } @@ -889,15 +897,23 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { setPatternInfo(UCAL_DATE, nullptr, &pattern, fInfo->getDefaultOrder()); setPatternInfo(UCAL_MONTH, nullptr, &pattern, fInfo->getDefaultOrder()); setPatternInfo(UCAL_YEAR, nullptr, &pattern, fInfo->getDefaultOrder()); + + timeSkeleton.insert(0, CAP_G); + pattern = DateFormat::getBestPattern( + locale, timeSkeleton, status); + if ( U_FAILURE(status) ) { + return; + } + setPatternInfo(UCAL_ERA, nullptr, &pattern, fInfo->getDefaultOrder()); } else { /* if both present, - * 1) when the year, month, or day differs, + * 1) when the era, year, month, or day differs, * concatenate the two original expressions with a separator between, * 2) otherwise, present the date followed by the * range expression for the time. */ /* - * 1) when the year, month, or day differs, + * 1) when the era, year, month, or day differs, * concatenate the two original expressions with a separator between, */ // if field exists, use fall back @@ -917,6 +933,11 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { skeleton.insert(0, LOW_Y); setFallbackPattern(UCAL_YEAR, skeleton, status); } + if ( !fieldExistsInSkeleton(UCAL_ERA, dateSkeleton) ) { + // then prefix skeleton with 'G' + skeleton.insert(0, CAP_G); + setFallbackPattern(UCAL_ERA, skeleton, status); + } /* * 2) otherwise, present the date followed by the diff --git a/deps/icu-small/source/i18n/formattedval_impl.h b/deps/icu-small/source/i18n/formattedval_impl.h index 1e6eb1e639f809..c43c1020d1ef26 100644 --- a/deps/icu-small/source/i18n/formattedval_impl.h +++ b/deps/icu-small/source/i18n/formattedval_impl.h @@ -119,7 +119,9 @@ class FormattedValueFieldPositionIteratorImpl : public UMemory, public Formatted // Internal struct that must be exported for MSVC struct U_I18N_API SpanInfo { + UFieldCategory category; int32_t spanValue; + int32_t start; int32_t length; }; @@ -171,16 +173,19 @@ class U_I18N_API FormattedValueStringBuilderImpl : public UMemory, public Format /** * Adds additional metadata used for span fields. * - * spanValue: the index of the list item, for example. + * category: the category to use for the span field. + * spanValue: the value of the span field: index of the list item, for example. + * start: the start position within the string of the span. -1 if unknown. * length: the length of the span, used to split adjacent fields. */ - void appendSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status); - void prependSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status); + void appendSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status); + void prependSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status); private: FormattedStringBuilder fString; FormattedStringBuilder::Field fNumericField; MaybeStackArray spanIndices; + int32_t spanIndicesCount = 0; bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const; static bool isIntOrGroup(FormattedStringBuilder::Field field); diff --git a/deps/icu-small/source/i18n/formattedval_sbimpl.cpp b/deps/icu-small/source/i18n/formattedval_sbimpl.cpp index 84c2d00666c2be..9ec06daf3ea41e 100644 --- a/deps/icu-small/source/i18n/formattedval_sbimpl.cpp +++ b/deps/icu-small/source/i18n/formattedval_sbimpl.cpp @@ -103,6 +103,27 @@ static constexpr Field kEndField = Field(0xf, 0xf); bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const { int32_t fieldStart = -1; Field currField = kUndefinedField; + bool prevIsSpan = false; + int32_t nextSpanStart = -1; + if (spanIndicesCount > 0) { + int64_t si = cfpos.getInt64IterationContext(); + U_ASSERT(si <= spanIndicesCount); + if (si < spanIndicesCount) { + nextSpanStart = spanIndices[si].start; + } + if (si > 0) { + prevIsSpan = cfpos.getCategory() == spanIndices[si-1].category + && cfpos.getField() == spanIndices[si-1].spanValue; + } + } + bool prevIsNumeric = false; + if (numericField != kUndefinedField) { + prevIsNumeric = cfpos.getCategory() == numericField.getCategory() + && cfpos.getField() == numericField.getField(); + } + bool prevIsInteger = cfpos.getCategory() == UFIELD_CATEGORY_NUMBER + && cfpos.getField() == UNUM_INTEGER_FIELD; + for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) { Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField; // Case 1: currently scanning a field. @@ -129,11 +150,38 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& } continue; } + // Special case: emit normalField if we are pointing at the end of spanField. + if (i > fString.fZero && prevIsSpan) { + int64_t si = cfpos.getInt64IterationContext() - 1; + U_ASSERT(si >= 0); + int32_t previ = i - spanIndices[si].length; + U_ASSERT(previ >= fString.fZero); + Field prevField = fString.getFieldPtr()[previ]; + if (prevField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + // Special handling for ULISTFMT_ELEMENT_FIELD + if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + fieldStart = i - fString.fZero - spanIndices[si].length; + int32_t end = fieldStart + spanIndices[si].length; + cfpos.setState( + UFIELD_CATEGORY_LIST, + ULISTFMT_ELEMENT_FIELD, + fieldStart, + end); + return true; + } else { + prevIsSpan = false; + } + } else { + // Re-wind, since there may be multiple fields in the span. + i = previ; + _field = prevField; + } + } // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER. if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD) && i > fString.fZero - // don't return the same field twice in a row: - && i - fString.fZero > cfpos.getLimit() + && !prevIsInteger + && !prevIsNumeric && isIntOrGroup(fString.getFieldPtr()[i - 1]) && !isIntOrGroup(_field)) { int j = i - 1; @@ -149,10 +197,7 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& if (numericField != kUndefinedField && cfpos.matchesField(numericField.getCategory(), numericField.getField()) && i > fString.fZero - // don't return the same field twice in a row: - && (i - fString.fZero > cfpos.getLimit() - || cfpos.getCategory() != numericField.getCategory() - || cfpos.getField() != numericField.getField()) + && !prevIsNumeric && fString.getFieldPtr()[i - 1].isNumeric() && !_field.isNumeric()) { // Re-wind to the beginning of the field and then emit it @@ -165,38 +210,22 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& i - fString.fZero); return true; } - // Special case: emit normalField if we are pointing at the end of spanField. - if (i > fString.fZero) { - auto elementField = fString.getFieldPtr()[i-1]; - if (elementField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) - && cfpos.matchesField(elementField.getCategory(), elementField.getField()) - && (cfpos.getLimit() < i - fString.fZero || cfpos.getCategory() != elementField.getCategory())) { - int64_t si = cfpos.getInt64IterationContext() - 1; - cfpos.setState( - elementField.getCategory(), - elementField.getField(), - i - fString.fZero - spanIndices[si].length, - i - fString.fZero); - return true; - } - } - // Special case: skip over INTEGER; will be coalesced later. - if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) { - _field = kUndefinedField; - } - // Case 2: no field starting at this position. - if (_field.isUndefined() || _field == kEndField) { - continue; - } - // Case 3: check for field starting at this position - // Case 3a: Need to add a SpanField - if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + // Check for span field + if (!prevIsSpan && ( + _field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) || + i - fString.fZero == nextSpanStart)) { int64_t si = cfpos.getInt64IterationContext(); + if (si >= spanIndicesCount) { + break; + } + UFieldCategory spanCategory = spanIndices[si].category; int32_t spanValue = spanIndices[si].spanValue; int32_t length = spanIndices[si].length; cfpos.setInt64IterationContext(si + 1); - if (cfpos.matchesField(UFIELD_CATEGORY_LIST_SPAN, spanValue)) { - UFieldCategory spanCategory = UFIELD_CATEGORY_LIST_SPAN; + if (si + 1 < spanIndicesCount) { + nextSpanStart = spanIndices[si + 1].start; + } + if (cfpos.matchesField(spanCategory, spanValue)) { fieldStart = i - fString.fZero; int32_t end = fieldStart + length; cfpos.setState( @@ -205,17 +234,41 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& fieldStart, end); return true; - } else { - // Failed to match; jump ahead - i += length - 1; - continue; + } else if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + // Special handling for ULISTFMT_ELEMENT_FIELD + if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + fieldStart = i - fString.fZero; + int32_t end = fieldStart + length; + cfpos.setState( + UFIELD_CATEGORY_LIST, + ULISTFMT_ELEMENT_FIELD, + fieldStart, + end); + return true; + } else { + // Failed to match; jump ahead + i += length - 1; + // goto loopend + } } } - // Case 3b: No SpanField - if (cfpos.matchesField(_field.getCategory(), _field.getField())) { + // Special case: skip over INTEGER; will be coalesced later. + else if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) { + _field = kUndefinedField; + } + // No field starting at this position. + else if (_field.isUndefined() || _field == kEndField) { + // goto loopend + } + // No SpanField + else if (cfpos.matchesField(_field.getCategory(), _field.getField())) { fieldStart = i - fString.fZero; currField = _field; } + // loopend: + prevIsSpan = false; + prevIsNumeric = false; + prevIsInteger = false; } U_ASSERT(currField == kUndefinedField); @@ -228,7 +281,7 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& return false; } -void FormattedValueStringBuilderImpl::appendSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status) { +void FormattedValueStringBuilderImpl::appendSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) { if (U_FAILURE(status)) { return; } U_ASSERT(spanIndices.getCapacity() >= spanValue); if (spanIndices.getCapacity() == spanValue) { @@ -237,10 +290,11 @@ void FormattedValueStringBuilderImpl::appendSpanInfo(int32_t spanValue, int32_t return; } } - spanIndices[spanValue] = {spanValue, length}; + spanIndices[spanValue] = {category, spanValue, start, length}; + spanIndicesCount++; } -void FormattedValueStringBuilderImpl::prependSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status) { +void FormattedValueStringBuilderImpl::prependSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) { if (U_FAILURE(status)) { return; } U_ASSERT(spanIndices.getCapacity() >= spanValue); if (spanIndices.getCapacity() == spanValue) { @@ -252,7 +306,8 @@ void FormattedValueStringBuilderImpl::prependSpanInfo(int32_t spanValue, int32_t for (int32_t i = spanValue - 1; i >= 0; i--) { spanIndices[i+1] = spanIndices[i]; } - spanIndices[0] = {spanValue, length}; + spanIndices[0] = {category, spanValue, start, length}; + spanIndicesCount++; } bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) { diff --git a/deps/icu-small/source/i18n/gregocal.cpp b/deps/icu-small/source/i18n/gregocal.cpp index 38a20dd93fb704..36f7bb933285bd 100644 --- a/deps/icu-small/source/i18n/gregocal.cpp +++ b/deps/icu-small/source/i18n/gregocal.cpp @@ -398,7 +398,7 @@ void GregorianCalendar::handleComputeFields(int32_t julianDay, UErrorCode& statu // with 8 AD. Before 8 AD the spacing is irregular; every 3 years // from 45 BC to 9 BC, and then none until 8 AD. However, we don't // implement this historical detail; instead, we implement the - // computatinally cleaner proleptic calendar, which assumes + // computationally cleaner proleptic calendar, which assumes // consistent 4-year cycles throughout time. UBool isLeap = ((eyear&0x3) == 0); // equiv. to (eyear%4 == 0) diff --git a/deps/icu-small/source/i18n/indiancal.h b/deps/icu-small/source/i18n/indiancal.h index 624cec73b53efc..1a06ebde408e9e 100644 --- a/deps/icu-small/source/i18n/indiancal.h +++ b/deps/icu-small/source/i18n/indiancal.h @@ -25,7 +25,7 @@ U_NAMESPACE_BEGIN * Concrete class which provides the Indian calendar. *

* IndianCalendar is a subclass of Calendar - * that numbers years since the begining of SAKA ERA. This is the civil calendar + * that numbers years since the beginning of SAKA ERA. This is the civil calendar * which is accepted by government of India as Indian National Calendar. * The two calendars most widely used in India today are the Vikrama calendar * followed in North India and the Shalivahana or Saka calendar which is followed diff --git a/deps/icu-small/source/i18n/islamcal.h b/deps/icu-small/source/i18n/islamcal.h index b4ead411da9e04..e12e04a6b9c8b4 100644 --- a/deps/icu-small/source/i18n/islamcal.h +++ b/deps/icu-small/source/i18n/islamcal.h @@ -67,7 +67,7 @@ U_NAMESPACE_BEGIN * moon's illumination, and other factors, it is possible to determine the start * of a lunar month with a fairly high degree of certainty. However, these * calculations are extremely complicated and thus slow, so most algorithms, - * including the one used here, are only approximations of the true astronical + * including the one used here, are only approximations of the true astronomical * calculations. At present, the approximations used in this class are fairly * simplistic; they will be improved in later versions of the code. *

diff --git a/deps/icu-small/source/i18n/listformatter.cpp b/deps/icu-small/source/i18n/listformatter.cpp index be0d16bc7f52b3..e5c01c0ab32c2b 100644 --- a/deps/icu-small/source/i18n/listformatter.cpp +++ b/deps/icu-small/source/i18n/listformatter.cpp @@ -567,7 +567,7 @@ class FormattedListBuilder { start, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->appendSpanInfo(0, start.length(), status); + data->appendSpanInfo(UFIELD_CATEGORY_LIST_SPAN, 0, -1, start.length(), status); } } @@ -603,7 +603,7 @@ class FormattedListBuilder { next, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->appendSpanInfo(position, next.length(), status); + data->appendSpanInfo(UFIELD_CATEGORY_LIST_SPAN, position, -1, next.length(), status); data->getStringRef().append( temp.tempSubString(offsets[1]), {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, @@ -622,7 +622,7 @@ class FormattedListBuilder { next, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->prependSpanInfo(position, next.length(), status); + data->prependSpanInfo(UFIELD_CATEGORY_LIST_SPAN, position, -1, next.length(), status); data->getStringRef().insert( 0, temp.tempSubStringBetween(0, offsets[1]), diff --git a/deps/icu-small/source/i18n/measfmt.cpp b/deps/icu-small/source/i18n/measfmt.cpp index 3e7f7bae157cf7..6334c6f9fdf235 100644 --- a/deps/icu-small/source/i18n/measfmt.cpp +++ b/deps/icu-small/source/i18n/measfmt.cpp @@ -581,7 +581,10 @@ void MeasureFormat::initMeasureFormat( UMeasureFormatWidth w, NumberFormat *nfToAdopt, UErrorCode &status) { - static const char *listStyles[] = {"unit", "unit-short", "unit-narrow"}; + static const UListFormatterWidth listWidths[] = { + ULISTFMT_WIDTH_WIDE, + ULISTFMT_WIDTH_SHORT, + ULISTFMT_WIDTH_NARROW}; LocalPointer nf(nfToAdopt); if (U_FAILURE(status)) { return; @@ -620,7 +623,8 @@ void MeasureFormat::initMeasureFormat( delete listFormatter; listFormatter = ListFormatter::createInstance( locale, - listStyles[getRegularWidth(fWidth)], + ULISTFMT_TYPE_UNITS, + listWidths[getRegularWidth(fWidth)], status); } diff --git a/deps/icu-small/source/i18n/measunit.cpp b/deps/icu-small/source/i18n/measunit.cpp index ece83177625513..f57495e65a1b64 100644 --- a/deps/icu-small/source/i18n/measunit.cpp +++ b/deps/icu-small/source/i18n/measunit.cpp @@ -33,8 +33,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MeasureUnit) // update this code, refer to: // http://site.icu-project.org/design/formatting/measureformat/updating-measure-unit // -// Start generated code -// TODO(ICU-21076): improve how this generated code is produced. +// Start generated code for measunit.cpp // Maps from Type ID to offset in gSubTypes. static const int32_t gOffsets[] = { @@ -42,57 +41,28 @@ static const int32_t gOffsets[] = { 2, 7, 17, - 25, - 29, - 328, - 339, - 355, - 359, - 368, - 370, - 374, - 382, - 404, - 408, - 423, - 426, - 432, - 442, - 446, - 450, - 452, - 486 + 26, + 30, + 329, + 340, + 356, + 360, + 369, + 371, + 375, + 383, + 405, + 409, + 424, + 425, + 431, + 441, + 445, + 449, + 451, + 485 }; -// TODO: FIX CODE GENERATION - leaving this here but commented-out to make it -// clear that we no longer want this array. We needed it for only one thing: efficient checking of "currency". -// -// static const int32_t gIndexes[] = { -// 0, -// 2, -// 7, -// 17, -// 25, -// 29, -// 29, -// 40, -// 56, -// 60, -// 69, -// 71, -// 75, -// 83, -// 105, -// 109, -// 124, -// 127, -// 133, -// 143, -// 147, -// 151, -// 153, -// 187 -// }; static const int32_t kCurrencyOffset = 5; // Must be sorted alphabetically. @@ -142,6 +112,7 @@ static const char * const gSubTypes[] = { "square-mile", "square-yard", "karat", + "milligram-ofglucose-per-deciliter", "milligram-per-deciliter", "millimole-per-liter", "mole", @@ -547,9 +518,7 @@ static const char * const gSubTypes[] = { "solar-mass", "stone", "ton", - "", // TODO(ICU-21076): manual edit of what should have been generated by Java. - "percent", // TODO(ICU-21076): regenerate, deal with duplication. - "permille", // TODO(ICU-21076): regenerate, deal with duplication. + "", "gigawatt", "horsepower", "kilowatt", @@ -612,8 +581,6 @@ static const char * const gSubTypes[] = { "teaspoon" }; -// unitPerUnitToSingleUnit no longer in use! TODO: remove from code-generation code. - // Shortcuts to the base unit in order to make the default constructor fast static const int32_t kBaseTypeIdx = 16; static const int32_t kBaseSubTypeIdx = 0; @@ -762,62 +729,70 @@ MeasureUnit MeasureUnit::getKarat() { return MeasureUnit(3, 0); } -MeasureUnit *MeasureUnit::createMilligramPerDeciliter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMilligramOfglucosePerDeciliter(UErrorCode &status) { return MeasureUnit::create(3, 1, status); } -MeasureUnit MeasureUnit::getMilligramPerDeciliter() { +MeasureUnit MeasureUnit::getMilligramOfglucosePerDeciliter() { return MeasureUnit(3, 1); } -MeasureUnit *MeasureUnit::createMillimolePerLiter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMilligramPerDeciliter(UErrorCode &status) { return MeasureUnit::create(3, 2, status); } -MeasureUnit MeasureUnit::getMillimolePerLiter() { +MeasureUnit MeasureUnit::getMilligramPerDeciliter() { return MeasureUnit(3, 2); } -MeasureUnit *MeasureUnit::createMole(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMillimolePerLiter(UErrorCode &status) { return MeasureUnit::create(3, 3, status); } -MeasureUnit MeasureUnit::getMole() { +MeasureUnit MeasureUnit::getMillimolePerLiter() { return MeasureUnit(3, 3); } -MeasureUnit *MeasureUnit::createPercent(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMole(UErrorCode &status) { return MeasureUnit::create(3, 4, status); } -MeasureUnit MeasureUnit::getPercent() { +MeasureUnit MeasureUnit::getMole() { return MeasureUnit(3, 4); } -MeasureUnit *MeasureUnit::createPermille(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPercent(UErrorCode &status) { return MeasureUnit::create(3, 5, status); } -MeasureUnit MeasureUnit::getPermille() { +MeasureUnit MeasureUnit::getPercent() { return MeasureUnit(3, 5); } -MeasureUnit *MeasureUnit::createPartPerMillion(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPermille(UErrorCode &status) { return MeasureUnit::create(3, 6, status); } -MeasureUnit MeasureUnit::getPartPerMillion() { +MeasureUnit MeasureUnit::getPermille() { return MeasureUnit(3, 6); } -MeasureUnit *MeasureUnit::createPermyriad(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPartPerMillion(UErrorCode &status) { return MeasureUnit::create(3, 7, status); } -MeasureUnit MeasureUnit::getPermyriad() { +MeasureUnit MeasureUnit::getPartPerMillion() { return MeasureUnit(3, 7); } +MeasureUnit *MeasureUnit::createPermyriad(UErrorCode &status) { + return MeasureUnit::create(3, 8, status); +} + +MeasureUnit MeasureUnit::getPermyriad() { + return MeasureUnit(3, 8); +} + MeasureUnit *MeasureUnit::createLiterPer100Kilometers(UErrorCode &status) { return MeasureUnit::create(4, 0, status); } @@ -2090,7 +2065,7 @@ MeasureUnit MeasureUnit::getTeaspoon() { return MeasureUnit(22, 33); } -// End generated code +// End generated code for measunit.cpp static int32_t binarySearch( const char * const * array, int32_t start, int32_t end, StringPiece key) { @@ -2277,9 +2252,11 @@ StringEnumeration* MeasureUnit::getAvailableTypes(UErrorCode &errorCode) { } bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { + // Sanity checking kCurrencyOffset and final entry in gOffsets + U_ASSERT(uprv_strcmp(gTypes[kCurrencyOffset], "currency") == 0); + U_ASSERT(gOffsets[UPRV_LENGTHOF(gOffsets) - 1] == UPRV_LENGTHOF(gSubTypes)); + for (int32_t t = 0; t < UPRV_LENGTHOF(gOffsets) - 1; t++) { - // Ensure kCurrencyOffset is set correctly - U_ASSERT(uprv_strcmp(gTypes[kCurrencyOffset], "currency") == 0); // Skip currency units if (t == kCurrencyOffset) { continue; @@ -2353,8 +2330,8 @@ MeasureUnitImpl MeasureUnitImpl::copy(UErrorCode &status) const { MeasureUnitImpl result; result.complexity = complexity; result.identifier.append(identifier, status); - for (int32_t i = 0; i < units.length(); i++) { - SingleUnitImpl *item = result.units.emplaceBack(*units[i]); + for (int32_t i = 0; i < singleUnits.length(); i++) { + SingleUnitImpl *item = result.singleUnits.emplaceBack(*singleUnits[i]); if (!item) { status = U_MEMORY_ALLOCATION_ERROR; return result; diff --git a/deps/icu-small/source/i18n/measunit_extra.cpp b/deps/icu-small/source/i18n/measunit_extra.cpp index 2eb3f066142967..76ada0e89cd668 100644 --- a/deps/icu-small/source/i18n/measunit_extra.cpp +++ b/deps/icu-small/source/i18n/measunit_extra.cpp @@ -30,6 +30,7 @@ #include "unicode/ures.h" #include "unicode/ustringtrie.h" #include "uresimp.h" +#include "util.h" #include U_NAMESPACE_BEGIN @@ -40,12 +41,20 @@ namespace { // TODO: Propose a new error code for this? constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR; -// Trie value offset for SI Prefixes. This is big enough to ensure we only +// Trie value offset for SI or binary prefixes. This is big enough to ensure we only // insert positive integers into the trie. -constexpr int32_t kSIPrefixOffset = 64; +constexpr int32_t kPrefixOffset = 64; +static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0, + "kPrefixOffset is too small for minimum UMeasurePrefix value"); +static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0, + "kPrefixOffset is too small for minimum UMeasurePrefix value"); // Trie value offset for compound parts, e.g. "-per-", "-", "-and-". constexpr int32_t kCompoundPartOffset = 128; +static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN, + "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens"); +static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI, + "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens"); enum CompoundPart { // Represents "-per-" @@ -89,44 +98,57 @@ enum PowerPart { // "fluid-ounce-imperial". constexpr int32_t kSimpleUnitOffset = 512; -const struct SIPrefixStrings { +const struct UnitPrefixStrings { const char* const string; - UMeasureSIPrefix value; -} gSIPrefixStrings[] = { - { "yotta", UMEASURE_SI_PREFIX_YOTTA }, - { "zetta", UMEASURE_SI_PREFIX_ZETTA }, - { "exa", UMEASURE_SI_PREFIX_EXA }, - { "peta", UMEASURE_SI_PREFIX_PETA }, - { "tera", UMEASURE_SI_PREFIX_TERA }, - { "giga", UMEASURE_SI_PREFIX_GIGA }, - { "mega", UMEASURE_SI_PREFIX_MEGA }, - { "kilo", UMEASURE_SI_PREFIX_KILO }, - { "hecto", UMEASURE_SI_PREFIX_HECTO }, - { "deka", UMEASURE_SI_PREFIX_DEKA }, - { "deci", UMEASURE_SI_PREFIX_DECI }, - { "centi", UMEASURE_SI_PREFIX_CENTI }, - { "milli", UMEASURE_SI_PREFIX_MILLI }, - { "micro", UMEASURE_SI_PREFIX_MICRO }, - { "nano", UMEASURE_SI_PREFIX_NANO }, - { "pico", UMEASURE_SI_PREFIX_PICO }, - { "femto", UMEASURE_SI_PREFIX_FEMTO }, - { "atto", UMEASURE_SI_PREFIX_ATTO }, - { "zepto", UMEASURE_SI_PREFIX_ZEPTO }, - { "yocto", UMEASURE_SI_PREFIX_YOCTO }, + UMeasurePrefix value; +} gUnitPrefixStrings[] = { + // SI prefixes + { "yotta", UMEASURE_PREFIX_YOTTA }, + { "zetta", UMEASURE_PREFIX_ZETTA }, + { "exa", UMEASURE_PREFIX_EXA }, + { "peta", UMEASURE_PREFIX_PETA }, + { "tera", UMEASURE_PREFIX_TERA }, + { "giga", UMEASURE_PREFIX_GIGA }, + { "mega", UMEASURE_PREFIX_MEGA }, + { "kilo", UMEASURE_PREFIX_KILO }, + { "hecto", UMEASURE_PREFIX_HECTO }, + { "deka", UMEASURE_PREFIX_DEKA }, + { "deci", UMEASURE_PREFIX_DECI }, + { "centi", UMEASURE_PREFIX_CENTI }, + { "milli", UMEASURE_PREFIX_MILLI }, + { "micro", UMEASURE_PREFIX_MICRO }, + { "nano", UMEASURE_PREFIX_NANO }, + { "pico", UMEASURE_PREFIX_PICO }, + { "femto", UMEASURE_PREFIX_FEMTO }, + { "atto", UMEASURE_PREFIX_ATTO }, + { "zepto", UMEASURE_PREFIX_ZEPTO }, + { "yocto", UMEASURE_PREFIX_YOCTO }, + // Binary prefixes + { "yobi", UMEASURE_PREFIX_YOBI }, + { "zebi", UMEASURE_PREFIX_ZEBI }, + { "exbi", UMEASURE_PREFIX_EXBI }, + { "pebi", UMEASURE_PREFIX_PEBI }, + { "tebi", UMEASURE_PREFIX_TEBI }, + { "gibi", UMEASURE_PREFIX_GIBI }, + { "mebi", UMEASURE_PREFIX_MEBI }, + { "kibi", UMEASURE_PREFIX_KIBI }, }; /** * A ResourceSink that collects simple unit identifiers from the keys of the * convertUnits table into an array, and adds these values to a TrieBuilder, * with associated values being their index into this array plus a specified - * offset, to a trie. + * offset. * * Example code: * * UErrorCode status = U_ZERO_ERROR; * BytesTrieBuilder b(status); - * const char *unitIdentifiers[200]; - * SimpleUnitIdentifiersSink identifierSink(unitIdentifiers, 200, b, kTrieValueOffset); + * int32_t ARR_SIZE = 200; + * const char *unitIdentifiers[ARR_SIZE]; + * int32_t *unitCategories[ARR_SIZE]; + * SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers, + * unitCategories, ARR_SIZE, b, kTrieValueOffset); * LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); * ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status); */ @@ -134,20 +156,27 @@ class SimpleUnitIdentifiersSink : public icu::ResourceSink { public: /** * Constructor. - * @param out Array of char* to which the simple unit identifiers will be - * saved. - * @param outSize The size of `out`. + * @param quantitiesTrieData The data for constructing a quantitiesTrie, + * which maps from a simple unit identifier to an index into the + * gCategories array. + * @param out Array of char* to which pointers to the simple unit + * identifiers will be saved. (Does not take ownership.) + * @param outCategories Array of int32_t to which category indexes will be + * saved: this corresponds to simple unit IDs saved to `out`, mapping + * from the ID to the value produced by the quantitiesTrie (which is an + * index into the gCategories array). + * @param outSize The size of `out` and `outCategories`. * @param trieBuilder The trie builder to which the simple unit identifier * should be added. The trie builder must outlive this resource sink. * @param trieValueOffset This is added to the index of the identifier in * the `out` array, before adding to `trieBuilder` as the value * associated with the identifier. */ - explicit SimpleUnitIdentifiersSink(const char **out, int32_t outSize, BytesTrieBuilder &trieBuilder, - int32_t trieValueOffset) - : outArray(out), outSize(outSize), trieBuilder(trieBuilder), trieValueOffset(trieValueOffset), - outIndex(0) { - } + explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out, + int32_t *outCategories, int32_t outSize, + BytesTrieBuilder &trieBuilder, int32_t trieValueOffset) + : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder), + trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {} /** * Adds the table keys found in value to the output vector. @@ -167,30 +196,120 @@ class SimpleUnitIdentifiersSink : public icu::ResourceSink { return; } + BytesTrie quantitiesTrie(quantitiesTrieData.data()); + // Collect keys from the table resource. - const char *key; - for (int32_t i = 0; table.getKeyAndValue(i, key, value); ++i) { + const char *simpleUnitID; + for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) { U_ASSERT(i < table.getSize()); U_ASSERT(outIndex < outSize); - if (uprv_strcmp(key, "kilogram") == 0) { + if (uprv_strcmp(simpleUnitID, "kilogram") == 0) { // For parsing, we use "gram", the prefixless metric mass unit. We // thus ignore the SI Base Unit of Mass: it exists due to being the // mass conversion target unit, but not needed for MeasureUnit // parsing. continue; } - outArray[outIndex] = key; - trieBuilder.add(key, trieValueOffset + outIndex, status); + outArray[outIndex] = simpleUnitID; + trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status); + + // Find the base target unit for this simple unit + ResourceTable table = value.getTable(status); + if (U_FAILURE(status)) { return; } + if (!table.findValue("target", value)) { + status = U_INVALID_FORMAT_ERROR; + break; + } + int32_t len; + const UChar* uTarget = value.getString(len, status); + CharString target; + target.appendInvariantChars(uTarget, len, status); + if (U_FAILURE(status)) { return; } + quantitiesTrie.reset(); + UStringTrieResult result = quantitiesTrie.next(target.data(), target.length()); + if (!USTRINGTRIE_HAS_VALUE(result)) { + status = U_INVALID_FORMAT_ERROR; + break; + } + outCategories[outIndex] = quantitiesTrie.getValue(); + outIndex++; } } private: const char **outArray; + int32_t *outCategories; int32_t outSize; BytesTrieBuilder &trieBuilder; int32_t trieValueOffset; + StringPiece quantitiesTrieData; + + int32_t outIndex; +}; + +/** + * A ResourceSink that collects information from `unitQuantities` in the `units` + * resource to provide key->value lookups from base unit to category, as well as + * preserving ordering information for these categories. See `units.txt`. + * + * For example: "kilogram" -> "mass", "meter-per-second" -> "speed". + * + * In C++ unitQuantity values are collected in order into a UChar* array, while + * unitQuantity keys are added added to a TrieBuilder, with associated values + * being the index into the aforementioned UChar* array. + */ +class CategoriesSink : public icu::ResourceSink { + public: + /** + * Constructor. + * @param out Array of UChar* to which unitQuantity values will be saved. + * The pointers returned not owned: they point directly at the resource + * strings in static memory. + * @param outSize The size of the `out` array. + * @param trieBuilder The trie builder to which the keys (base units) of + * each unitQuantity will be added, each with value being the offset + * into `out`. + */ + explicit CategoriesSink(const UChar **out, int32_t &outSize, BytesTrieBuilder &trieBuilder) + : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {} + + void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) { + ResourceArray array = value.getArray(status); + if (U_FAILURE(status)) { + return; + } + + if (outIndex + array.getSize() > outSize) { + status = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + for (int32_t i = 0; array.getValue(i, value); ++i) { + U_ASSERT(outIndex < outSize); + ResourceTable table = value.getTable(status); + if (U_FAILURE(status)) { + return; + } + if (table.getSize() != 1) { + status = U_INVALID_FORMAT_ERROR; + return; + } + const char *key; + table.getKeyAndValue(0, key, value); + int32_t uTmpLen; + outQuantitiesArray[outIndex] = value.getString(uTmpLen, status); + trieBuilder.add(key, outIndex, status); + outIndex++; + } + } + + private: + const UChar **outQuantitiesArray; + int32_t &outSize; + BytesTrieBuilder &trieBuilder; + int32_t outIndex; }; @@ -203,11 +322,34 @@ icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER; // by SingleUnitImpl::getSimpleUnitID().) const char **gSimpleUnits = nullptr; +// Maps from the value associated with each simple unit ID to an index into the +// gCategories array. +int32_t *gSimpleUnitCategories = nullptr; + char *gSerializedUnitExtrasStemTrie = nullptr; +// Array of UChar* pointing at the unit categories (aka "quantities", aka +// "types"), as found in the `unitQuantities` resource. The array memory itself +// is owned by this pointer, but the individual UChar* in that array point at +// static memory. +const UChar **gCategories = nullptr; +// Number of items in `gCategories`. +int32_t gCategoriesCount = 0; +// TODO: rather save an index into gCategories? +const char *kConsumption = "consumption"; +size_t kConsumptionLen = strlen("consumption"); +// Serialized BytesTrie for mapping from base units to indices into gCategories. +char *gSerializedUnitCategoriesTrie = nullptr; + UBool U_CALLCONV cleanupUnitExtras() { + uprv_free(gSerializedUnitCategoriesTrie); + gSerializedUnitCategoriesTrie = nullptr; + uprv_free(gCategories); + gCategories = nullptr; uprv_free(gSerializedUnitExtrasStemTrie); gSerializedUnitExtrasStemTrie = nullptr; + uprv_free(gSimpleUnitCategories); + gSimpleUnitCategories = nullptr; uprv_free(gSimpleUnits); gSimpleUnits = nullptr; gUnitExtrasInitOnce.reset(); @@ -216,13 +358,43 @@ UBool U_CALLCONV cleanupUnitExtras() { void U_CALLCONV initUnitExtras(UErrorCode& status) { ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras); + LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); + + // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories. + const char *CATEGORY_TABLE_NAME = "unitQuantities"; + LocalUResourceBundlePointer unitQuantities( + ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status)); + if (U_FAILURE(status)) { return; } + gCategoriesCount = unitQuantities.getAlias()->fSize; + size_t quantitiesMallocSize = sizeof(UChar *) * gCategoriesCount; + gCategories = static_cast(uprv_malloc(quantitiesMallocSize)); + if (gCategories == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(gCategories, 0, quantitiesMallocSize); + BytesTrieBuilder quantitiesBuilder(status); + CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status); + StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status); + if (U_FAILURE(status)) { return; } + // Copy the result into the global constant pointer + size_t numBytesQuantities = resultQuantities.length(); + gSerializedUnitCategoriesTrie = static_cast(uprv_malloc(numBytesQuantities)); + if (gSerializedUnitCategoriesTrie == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities); + + // Build the BytesTrie that Parser needs for parsing unit identifiers. BytesTrieBuilder b(status); if (U_FAILURE(status)) { return; } - // Add SI prefixes - for (const auto& siPrefixInfo : gSIPrefixStrings) { - b.add(siPrefixInfo.string, siPrefixInfo.value + kSIPrefixOffset, status); + // Add SI and binary prefixes + for (const auto& unitPrefixInfo : gUnitPrefixStrings) { + b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status); } if (U_FAILURE(status)) { return; } @@ -251,11 +423,8 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { // Add sanctioned simple units by offset: simple units all have entries in // units/convertUnits resources. - // TODO(ICU-21059): confirm whether this is clean enough, or whether we need to - // filter units' validity list instead. - LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); LocalUResourceBundlePointer convertUnits( - ures_getByKey(unitsBundle.getAlias(), "convertUnits", NULL, &status)); + ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status)); if (U_FAILURE(status)) { return; } // Allocate enough space: with identifierSink below skipping kilogram, we're @@ -268,9 +437,17 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { return; } uprv_memset(gSimpleUnits, 0, arrayMallocSize); + arrayMallocSize = sizeof(int32_t) * simpleUnitsCount; + gSimpleUnitCategories = static_cast(uprv_malloc(arrayMallocSize)); + if (gSimpleUnitCategories == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize); // Populate gSimpleUnits and build the associated trie. - SimpleUnitIdentifiersSink identifierSink(gSimpleUnits, simpleUnitsCount, b, kSimpleUnitOffset); + SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories, + simpleUnitsCount, b, kSimpleUnitOffset); ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status); // Build the CharsTrie @@ -294,7 +471,7 @@ class Token { enum Type { TYPE_UNDEFINED, - TYPE_SI_PREFIX, + TYPE_PREFIX, // Token type for "-per-", "-", and "-and-". TYPE_COMPOUND_PART, // Token type for "per-". @@ -308,7 +485,7 @@ class Token { Type getType() const { U_ASSERT(fMatch > 0); if (fMatch < kCompoundPartOffset) { - return TYPE_SI_PREFIX; + return TYPE_PREFIX; } if (fMatch < kInitialCompoundPartOffset) { return TYPE_COMPOUND_PART; @@ -322,9 +499,9 @@ class Token { return TYPE_SIMPLE_UNIT; } - UMeasureSIPrefix getSIPrefix() const { - U_ASSERT(getType() == TYPE_SI_PREFIX); - return static_cast(fMatch - kSIPrefixOffset); + UMeasurePrefix getUnitPrefix() const { + U_ASSERT(getType() == TYPE_PREFIX); + return static_cast(fMatch - kPrefixOffset); } // Valid only for tokens with type TYPE_COMPOUND_PART. @@ -380,7 +557,53 @@ class Parser { MeasureUnitImpl parse(UErrorCode& status) { MeasureUnitImpl result; - parseImpl(result, status); + + if (U_FAILURE(status)) { + return result; + } + if (fSource.empty()) { + // The dimenionless unit: nothing to parse. leave result as is. + return result; + } + + while (hasNext()) { + bool sawAnd = false; + + SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status); + if (U_FAILURE(status)) { + return result; + } + + bool added = result.appendSingleUnit(singleUnit, status); + if (U_FAILURE(status)) { + return result; + } + + if (sawAnd && !added) { + // Two similar units are not allowed in a mixed unit. + status = kUnitIdentifierSyntaxError; + return result; + } + + if (result.singleUnits.length() >= 2) { + // nextSingleUnit fails appropriately for "per" and "and" in the + // same identifier. It doesn't fail for other compound units + // (COMPOUND_PART_TIMES). Consequently we take care of that + // here. + UMeasureUnitComplexity complexity = + sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; + if (result.singleUnits.length() == 2) { + // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND` + U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND); + result.complexity = complexity; + } else if (result.complexity != complexity) { + // Can't have mixed compound units + status = kUnitIdentifierSyntaxError; + return result; + } + } + } + return result; } @@ -457,20 +680,23 @@ class Parser { * unit", sawAnd is set to true. If not, it is left as is. * @param status ICU error code. */ - void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) { + SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) { + SingleUnitImpl result; if (U_FAILURE(status)) { - return; + return result; } // state: - // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) + // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit) // 1 = power token seen (will not accept another power token) - // 2 = SI prefix token seen (will not accept a power or SI prefix token) + // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token) int32_t state = 0; bool atStart = fIndex == 0; Token token = nextToken(status); - if (U_FAILURE(status)) { return; } + if (U_FAILURE(status)) { + return result; + } if (atStart) { // Identifiers optionally start with "per-". @@ -480,14 +706,16 @@ class Parser { result.dimensionality = -1; token = nextToken(status); - if (U_FAILURE(status)) { return; } + if (U_FAILURE(status)) { + return result; + } } } else { // All other SingleUnit's are separated from previous SingleUnit's // via a compound part: if (token.getType() != Token::TYPE_COMPOUND_PART) { status = kUnitIdentifierSyntaxError; - return; + return result; } switch (token.getMatch()) { @@ -496,7 +724,7 @@ class Parser { // Mixed compound units not yet supported, // TODO(CLDR-13700). status = kUnitIdentifierSyntaxError; - return; + return result; } fAfterPer = true; result.dimensionality = -1; @@ -513,14 +741,16 @@ class Parser { // Can't start with "-and-", and mixed compound units // not yet supported, TODO(CLDR-13700). status = kUnitIdentifierSyntaxError; - return; + return result; } sawAnd = true; break; } token = nextToken(status); - if (U_FAILURE(status)) { return; } + if (U_FAILURE(status)) { + return result; + } } // Read tokens until we have a complete SingleUnit or we reach the end. @@ -529,87 +759,46 @@ class Parser { case Token::TYPE_POWER_PART: if (state > 0) { status = kUnitIdentifierSyntaxError; - return; + return result; } result.dimensionality *= token.getPower(); state = 1; break; - case Token::TYPE_SI_PREFIX: + case Token::TYPE_PREFIX: if (state > 1) { status = kUnitIdentifierSyntaxError; - return; + return result; } - result.siPrefix = token.getSIPrefix(); + result.unitPrefix = token.getUnitPrefix(); state = 2; break; case Token::TYPE_SIMPLE_UNIT: result.index = token.getSimpleUnitIndex(); - return; + return result; default: status = kUnitIdentifierSyntaxError; - return; + return result; } if (!hasNext()) { // We ran out of tokens before finding a complete single unit. status = kUnitIdentifierSyntaxError; - return; + return result; } token = nextToken(status); if (U_FAILURE(status)) { - return; + return result; } } - } - /// @param result is modified, not overridden. Caller must pass in a - /// default-constructed (empty) MeasureUnitImpl instance. - void parseImpl(MeasureUnitImpl& result, UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - if (fSource.empty()) { - // The dimenionless unit: nothing to parse. leave result as is. - return; - } - int32_t unitNum = 0; - while (hasNext()) { - bool sawAnd = false; - SingleUnitImpl singleUnit; - nextSingleUnit(singleUnit, sawAnd, status); - if (U_FAILURE(status)) { - return; - } - U_ASSERT(!singleUnit.isDimensionless()); - bool added = result.append(singleUnit, status); - if (sawAnd && !added) { - // Two similar units are not allowed in a mixed unit - status = kUnitIdentifierSyntaxError; - return; - } - if ((++unitNum) >= 2) { - // nextSingleUnit fails appropriately for "per" and "and" in the - // same identifier. It doesn't fail for other compound units - // (COMPOUND_PART_TIMES). Consequently we take care of that - // here. - UMeasureUnitComplexity complexity = - sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; - if (unitNum == 2) { - U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE); - result.complexity = complexity; - } else if (result.complexity != complexity) { - // Can't have mixed compound units - status = kUnitIdentifierSyntaxError; - return; - } - } - } + return result; } }; +// Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray. int32_t U_CALLCONV compareSingleUnits(const void* /*context*/, const void* left, const void* right) { auto realLeft = static_cast(left); @@ -617,162 +806,86 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right) return (*realLeft)->compareTo(**realRight); } -/** - * Generate the identifier string for a single unit in place. - * - * Does not support the dimensionless SingleUnitImpl: calling serializeSingle - * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR. - * - * @param first If singleUnit is part of a compound unit, and not its first - * single unit, set this to false. Otherwise: set to true. - */ -void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) { - if (first && singleUnit.dimensionality < 0) { - // Essentially the "unary per". For compound units with a numerator, the - // caller takes care of the "binary per". - output.append("per-", status); - } - - if (singleUnit.isDimensionless()) { - status = U_INTERNAL_PROGRAM_ERROR; - return; - } - int8_t posPower = std::abs(singleUnit.dimensionality); - if (posPower == 0) { - status = U_INTERNAL_PROGRAM_ERROR; - } else if (posPower == 1) { - // no-op - } else if (posPower == 2) { - output.append("square-", status); - } else if (posPower == 3) { - output.append("cubic-", status); - } else if (posPower < 10) { - output.append("pow", status); - output.append(posPower + '0', status); - output.append('-', status); - } else if (posPower <= 15) { - output.append("pow1", status); - output.append('0' + (posPower % 10), status); - output.append('-', status); - } else { - status = kUnitIdentifierSyntaxError; - } +// Returns an index into the gCategories array, for the "unitQuantity" (aka +// "type" or "category") associated with the given base unit identifier. Returns +// -1 on failure, together with U_UNSUPPORTED_ERROR. +int32_t getUnitCategoryIndex(StringPiece baseUnitIdentifier, UErrorCode &status) { + umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); if (U_FAILURE(status)) { - return; + return -1; } - - if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) { - for (const auto& siPrefixInfo : gSIPrefixStrings) { - if (siPrefixInfo.value == singleUnit.siPrefix) { - output.append(siPrefixInfo.string, status); - break; - } - } + BytesTrie trie(gSerializedUnitCategoriesTrie); + UStringTrieResult result = trie.next(baseUnitIdentifier.data(), baseUnitIdentifier.length()); + if (!USTRINGTRIE_HAS_VALUE(result)) { + status = U_UNSUPPORTED_ERROR; + return -1; } - if (U_FAILURE(status)) { - return; - } - - output.append(singleUnit.getSimpleUnitID(), status); + return trie.getValue(); } -/** - * Normalize a MeasureUnitImpl and generate the identifier string in place. - */ -void serialize(MeasureUnitImpl& impl, UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - U_ASSERT(impl.identifier.isEmpty()); - if (impl.units.length() == 0) { - // Dimensionless, constructed by the default constructor: no appending - // to impl.identifier, we wish it to contain the zero-length string. - return; - } - if (impl.complexity == UMEASURE_UNIT_COMPOUND) { - // Note: don't sort a MIXED unit - uprv_sortArray( - impl.units.getAlias(), - impl.units.length(), - sizeof(impl.units[0]), - compareSingleUnits, - nullptr, - false, - &status); - if (U_FAILURE(status)) { - return; - } - } - serializeSingle(*impl.units[0], true, impl.identifier, status); - if (impl.units.length() == 1) { - return; - } - for (int32_t i = 1; i < impl.units.length(); i++) { - const SingleUnitImpl& prev = *impl.units[i-1]; - const SingleUnitImpl& curr = *impl.units[i]; - if (impl.complexity == UMEASURE_UNIT_MIXED) { - impl.identifier.append("-and-", status); - serializeSingle(curr, true, impl.identifier, status); - } else { - if (prev.dimensionality > 0 && curr.dimensionality < 0) { - impl.identifier.append("-per-", status); - } else { - impl.identifier.append('-', status); - } - serializeSingle(curr, false, impl.identifier, status); - } +} // namespace + +U_CAPI int32_t U_EXPORT2 +umeas_getPrefixPower(UMeasurePrefix unitPrefix) { + if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN && + unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { + return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN; } + U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI && + unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI); + return unitPrefix - UMEASURE_PREFIX_ONE; +} +U_CAPI int32_t U_EXPORT2 +umeas_getPrefixBase(UMeasurePrefix unitPrefix) { + if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN && + unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { + return 1024; + } + U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI && + unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI); + return 10; } -/** - * Appends a SingleUnitImpl to a MeasureUnitImpl. - * - * @return true if a new item was added. If unit is the dimensionless unit, it - * is never added: the return value will always be false. - */ -bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) { - if (unit.isDimensionless()) { - // We don't append dimensionless units. - return false; +CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status) { + CharString result; + U_ASSERT(result.length() == 0); + if (U_FAILURE(status)) { + return result; } - // Find a similar unit that already exists, to attempt to coalesce - SingleUnitImpl* oldUnit = nullptr; - for (int32_t i = 0; i < impl.units.length(); i++) { - auto* candidate = impl.units[i]; - if (candidate->isCompatibleWith(unit)) { - oldUnit = candidate; + UErrorCode localStatus = U_ZERO_ERROR; + int32_t idx = getUnitCategoryIndex(baseUnitIdentifier, localStatus); + if (U_FAILURE(localStatus)) { + // TODO(icu-units#130): support inverting any unit, with correct + // fallback logic: inversion and fallback may depend on presence or + // absence of a usage for that category. + if (uprv_strcmp(baseUnitIdentifier.data(), "meter-per-cubic-meter") == 0) { + result.append(kConsumption, (int32_t)kConsumptionLen, status); + return result; } + status = U_INVALID_FORMAT_ERROR; + return result; } - if (oldUnit) { - // Both dimensionalities will be positive, or both will be negative, by - // virtue of isCompatibleWith(). - oldUnit->dimensionality += unit.dimensionality; - } else { - SingleUnitImpl* destination = impl.units.emplaceBack(); - if (!destination) { - status = U_MEMORY_ALLOCATION_ERROR; - return false; - } - *destination = unit; + if (idx < 0 || idx >= gCategoriesCount) { + status = U_INVALID_FORMAT_ERROR; + return result; } - return (oldUnit == nullptr); + result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status); + return result; } -} // namespace - - +// In ICU4J, this is MeasureUnit.getSingleUnitImpl(). SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) { MeasureUnitImpl temp; const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status); if (U_FAILURE(status)) { return {}; } - if (impl.units.length() == 0) { + if (impl.singleUnits.length() == 0) { return {}; } - if (impl.units.length() == 1) { - return *impl.units[0]; + if (impl.singleUnits.length() == 1) { + return *impl.singleUnits[0]; } status = U_ILLEGAL_ARGUMENT_ERROR; return {}; @@ -780,7 +893,13 @@ SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UE MeasureUnit SingleUnitImpl::build(UErrorCode& status) const { MeasureUnitImpl temp; - temp.append(*this, status); + temp.appendSingleUnit(*this, status); + // TODO(icu-units#28): the MeasureUnitImpl::build() method uses + // findBySubtype, which is relatively slow. + // - At the time of loading the simple unit IDs, we could also save a + // mapping to the builtin MeasureUnit type and subtype they correspond to. + // - This method could then check dimensionality and index, and if both are + // 1, directly return MeasureUnit instances very quickly. return std::move(temp).build(status); } @@ -788,12 +907,56 @@ const char *SingleUnitImpl::getSimpleUnitID() const { return gSimpleUnits[index]; } -MeasureUnitImpl::MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) { - *this = other.copy(status); +void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const { + int32_t absPower = std::abs(this->dimensionality); + + U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units"; + + if (absPower == 1) { + // no-op + } else if (absPower == 2) { + result.append(StringPiece("square-"), status); + } else if (absPower == 3) { + result.append(StringPiece("cubic-"), status); + } else if (absPower <= 15) { + result.append(StringPiece("pow"), status); + result.appendNumber(absPower, status); + result.append(StringPiece("-"), status); + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error + return; + } + + if (U_FAILURE(status)) { + return; + } + + if (this->unitPrefix != UMEASURE_PREFIX_ONE) { + bool found = false; + for (const auto &unitPrefixInfo : gUnitPrefixStrings) { + // TODO: consider using binary search? If we do this, add a unit + // test to ensure gUnitPrefixStrings is sorted? + if (unitPrefixInfo.value == this->unitPrefix) { + result.append(unitPrefixInfo.string, status); + found = true; + break; + } + } + if (!found) { + status = U_UNSUPPORTED_ERROR; + return; + } + } + + result.append(StringPiece(this->getSimpleUnitID()), status); +} + +int32_t SingleUnitImpl::getUnitCategoryIndex() const { + return gSimpleUnitCategories[index]; } MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) { - this->append(singleUnit, status); + this->appendSingleUnit(singleUnit, status); } MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) { @@ -821,33 +984,135 @@ MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy( void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { identifier.clear(); - for (int32_t i = 0; i < units.length(); i++) { - units[i]->dimensionality *= -1; + for (int32_t i = 0; i < singleUnits.length(); i++) { + singleUnits[i]->dimensionality *= -1; } } -bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) { +bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) { identifier.clear(); - return appendImpl(*this, singleUnit, status); + + if (singleUnit.isDimensionless()) { + // Do not append dimensionless units. + return false; + } + + // Find a similar unit that already exists, to attempt to coalesce + SingleUnitImpl *oldUnit = nullptr; + for (int32_t i = 0; i < this->singleUnits.length(); i++) { + auto *candidate = this->singleUnits[i]; + if (candidate->isCompatibleWith(singleUnit)) { + oldUnit = candidate; + } + } + + if (oldUnit) { + // Both dimensionalities will be positive, or both will be negative, by + // virtue of isCompatibleWith(). + oldUnit->dimensionality += singleUnit.dimensionality; + + return false; + } + + // Add a copy of singleUnit + // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of singleUnit. + this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit); + if (U_FAILURE(status)) { + return false; + } + + // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits` + // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND` + if (this->singleUnits.length() > 1 && + this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) { + this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND; + } + + return true; } -MaybeStackVector MeasureUnitImpl::extractIndividualUnits(UErrorCode &status) const { - MaybeStackVector result; +MaybeStackVector +MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const { + MaybeStackVector result; if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { - result.emplaceBackAndCheckErrorCode(status, *this, status); + result.emplaceBackAndCheckErrorCode(status, 0, *this, status); return result; } - for (int32_t i = 0; i < units.length(); i++) { - result.emplaceBackAndCheckErrorCode(status, *units[i], status); + for (int32_t i = 0; i < singleUnits.length(); ++i) { + result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status); + if (U_FAILURE(status)) { + return result; + } } return result; } +/** + * Normalize a MeasureUnitImpl and generate the identifier string in place. + */ +void MeasureUnitImpl::serialize(UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + + if (this->singleUnits.length() == 0) { + // Dimensionless, constructed by the default constructor. + return; + } + + if (this->complexity == UMEASURE_UNIT_COMPOUND) { + // Note: don't sort a MIXED unit + uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(), + sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status); + if (U_FAILURE(status)) { + return; + } + } + + CharString result; + bool beforePer = true; + bool firstTimeNegativeDimension = false; + for (int32_t i = 0; i < this->singleUnits.length(); i++) { + if (beforePer && (*this->singleUnits[i]).dimensionality < 0) { + beforePer = false; + firstTimeNegativeDimension = true; + } else if ((*this->singleUnits[i]).dimensionality < 0) { + firstTimeNegativeDimension = false; + } + + if (U_FAILURE(status)) { + return; + } + + if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + if (result.length() != 0) { + result.append(StringPiece("-and-"), status); + } + } else { + if (firstTimeNegativeDimension) { + if (result.length() == 0) { + result.append(StringPiece("per-"), status); + } else { + result.append(StringPiece("-per-"), status); + } + } else { + if (result.length() != 0) { + result.append(StringPiece("-"), status); + } + } + } + + this->singleUnits[i]->appendNeutralIdentifier(result, status); + } + + this->identifier = CharString(result, status); +} + MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { - serialize(*this, status); + this->serialize(status); return MeasureUnit(std::move(*this)); } @@ -860,13 +1125,13 @@ UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity; } -UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { - return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix; +UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const { + return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix; } -MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { +MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const { SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); - singleUnit.siPrefix = prefix; + singleUnit.unitPrefix = prefix; return singleUnit.build(status); } @@ -899,10 +1164,10 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c status = U_ILLEGAL_ARGUMENT_ERROR; return {}; } - for (int32_t i = 0; i < otherImpl.units.length(); i++) { - impl.append(*otherImpl.units[i], status); + for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) { + impl.appendSingleUnit(*otherImpl.singleUnits[i], status); } - if (impl.units.length() > 1) { + if (impl.singleUnits.length() > 1) { impl.complexity = UMEASURE_UNIT_COMPOUND; } return std::move(impl).build(status); @@ -911,14 +1176,14 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c LocalArray MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const { MeasureUnitImpl temp; const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status); - outCount = impl.units.length(); + outCount = impl.singleUnits.length(); MeasureUnit* arr = new MeasureUnit[outCount]; if (arr == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return LocalArray(); } for (int32_t i = 0; i < outCount; i++) { - arr[i] = impl.units[i]->build(status); + arr[i] = impl.singleUnits[i]->build(status); } return LocalArray(arr, status); } diff --git a/deps/icu-small/source/i18n/measunit_impl.h b/deps/icu-small/source/i18n/measunit_impl.h index 1024cd65547ff1..5028210ab09b3d 100644 --- a/deps/icu-small/source/i18n/measunit_impl.h +++ b/deps/icu-small/source/i18n/measunit_impl.h @@ -14,13 +14,31 @@ U_NAMESPACE_BEGIN +namespace number { +namespace impl { +class LongNameHandler; +} +} // namespace number static const char16_t kDefaultCurrency[] = u"XXX"; static const char kDefaultCurrency8[] = "XXX"; +/** + * Looks up the "unitQuantity" (aka "type" or "category") of a base unit + * identifier. The category is returned via `result`, which must initially be + * empty. + * + * This only supports base units: other units must be resolved to base units + * before passing to this function, otherwise U_UNSUPPORTED_ERROR status will be + * returned. + * + * Categories are found in `unitQuantities` in the `units` resource (see + * `units.txt`). + */ +CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status); /** - * A struct representing a single unit (optional SI prefix and dimensionality). + * A struct representing a single unit (optional SI or binary prefix, and dimensionality). */ struct U_I18N_API SingleUnitImpl : public UMemory { /** @@ -42,10 +60,26 @@ struct U_I18N_API SingleUnitImpl : public UMemory { */ const char *getSimpleUnitID() const; + /** + * Generates and append a neutral identifier string for a single unit which means we do not include + * the dimension signal. + */ + void appendNeutralIdentifier(CharString &result, UErrorCode &status) const; + + /** + * Returns the index of this unit's "quantity" in unitQuantities (in + * measunit_extra.cpp). The value of this index determines sort order for + * normalization of unit identifiers. + */ + int32_t getUnitCategoryIndex() const; + /** * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of * sorting and coalescing. * + * Sort order of units is specified by UTS #35 + * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization). + * * Takes the sign of dimensionality into account, but not the absolute * value: per-meter is not considered the same as meter, but meter is * considered the same as square-meter. @@ -62,16 +96,29 @@ struct U_I18N_API SingleUnitImpl : public UMemory { if (dimensionality > 0 && other.dimensionality < 0) { return -1; } + // Sort by official quantity order + int32_t thisQuantity = this->getUnitCategoryIndex(); + int32_t otherQuantity = other.getUnitCategoryIndex(); + if (thisQuantity < otherQuantity) { + return -1; + } + if (thisQuantity > otherQuantity) { + return 1; + } + // If quantity order didn't help, then we go by index. if (index < other.index) { return -1; } if (index > other.index) { return 1; } - if (siPrefix < other.siPrefix) { + // TODO: revisit if the spec dictates prefix sort order - it doesn't + // currently. For now we're sorting binary prefixes before SI prefixes, + // as per enum values order. + if (unitPrefix < other.unitPrefix) { return -1; } - if (siPrefix > other.siPrefix) { + if (unitPrefix > other.unitPrefix) { return 1; } return 0; @@ -80,8 +127,8 @@ struct U_I18N_API SingleUnitImpl : public UMemory { /** * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing. * - * Units with the same base unit and SI prefix should match, except that they must also have - * the same dimensionality sign, such that we don't merge numerator and denominator. + * Units with the same base unit and SI or binary prefix should match, except that they must also + * have the same dimensionality sign, such that we don't merge numerator and denominator. */ bool isCompatibleWith(const SingleUnitImpl& other) const { return (compareTo(other) == 0); @@ -98,7 +145,8 @@ struct U_I18N_API SingleUnitImpl : public UMemory { /** * Simple unit index, unique for every simple unit, -1 for the dimensionless - * unit. This is an index into a string list in measunit_extra.cpp. + * unit. This is an index into a string list in measunit_extra.cpp, as + * loaded by SimpleUnitIdentifiersSink. * * The default value is -1, meaning the dimensionless unit: * isDimensionless() will return true, until index is changed. @@ -106,11 +154,11 @@ struct U_I18N_API SingleUnitImpl : public UMemory { int32_t index = -1; /** - * SI prefix. + * SI or binary prefix. * * This is ignored for the dimensionless unit. */ - UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE; /** * Dimensionality. @@ -120,11 +168,14 @@ struct U_I18N_API SingleUnitImpl : public UMemory { int32_t dimensionality = 1; }; +// Forward declaration +struct MeasureUnitImplWithIndex; + // Export explicit template instantiations of MaybeStackArray, MemoryPool and // MaybeStackVector. This is required when building DLLs for Windows. (See // datefmt.h, collationiterator.h, erarules.h and others for similar examples.) #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN -template class U_I18N_API MaybeStackArray; +template class U_I18N_API MaybeStackArray; template class U_I18N_API MemoryPool; template class U_I18N_API MaybeStackVector; #endif @@ -133,16 +184,18 @@ template class U_I18N_API MaybeStackVector; * Internal representation of measurement units. Capable of representing all complexities of units, * including mixed and compound units. */ -struct U_I18N_API MeasureUnitImpl : public UMemory { +class U_I18N_API MeasureUnitImpl : public UMemory { + public: MeasureUnitImpl() = default; MeasureUnitImpl(MeasureUnitImpl &&other) = default; - MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status); + // No copy constructor, use MeasureUnitImpl::copy() to make it explicit. + MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete; MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status); MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default; /** Extract the MeasureUnitImpl from a MeasureUnit. */ - static inline const MeasureUnitImpl* get(const MeasureUnit& measureUnit) { + static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) { return measureUnit.fImpl; } @@ -197,14 +250,15 @@ struct U_I18N_API MeasureUnitImpl : public UMemory { MeasureUnitImpl copy(UErrorCode& status) const; /** - * Extracts the list of all the individual units inside the `MeasureUnitImpl`. + * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices. * For example: * - if the `MeasureUnitImpl` is `foot-per-hour` - * it will return a list of 1 {`foot-per-hour`} + * it will return a list of 1 {(0, `foot-per-hour`)} * - if the `MeasureUnitImpl` is `foot-and-inch` - * it will return a list of 2 { `foot`, `inch`} + * it will return a list of 2 {(0, `foot`), (1, `inch`)} */ - MaybeStackVector extractIndividualUnits(UErrorCode &status) const; + MaybeStackVector + extractIndividualUnitsWithIndices(UErrorCode &status) const; /** Mutates this MeasureUnitImpl to take the reciprocal. */ void takeReciprocal(UErrorCode& status); @@ -215,26 +269,71 @@ struct U_I18N_API MeasureUnitImpl : public UMemory { * @return true if a new item was added. If unit is the dimensionless unit, * it is never added: the return value will always be false. */ - bool append(const SingleUnitImpl& singleUnit, UErrorCode& status); + bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status); /** The complexity, either SINGLE, COMPOUND, or MIXED. */ UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; /** - * The list of simple units. These may be summed or multiplied, based on the + * The list of single units. These may be summed or multiplied, based on the * value of the complexity field. * * The "dimensionless" unit (SingleUnitImpl default constructor) must not be * added to this list. */ - MaybeStackVector units; + MaybeStackVector singleUnits; /** * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. */ CharString identifier; + + private: + /** + * Normalizes a MeasureUnitImpl and generate the identifier string in place. + */ + void serialize(UErrorCode &status); + + // For calling serialize + // TODO(icu-units#147): revisit serialization + friend class number::impl::LongNameHandler; +}; + +struct U_I18N_API MeasureUnitImplWithIndex : public UMemory { + const int32_t index; + MeasureUnitImpl unitImpl; + // Makes a copy of unitImpl. + MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status) + : index(index), unitImpl(unitImpl.copy(status)) { + } + MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status) + : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) { + } }; +// Export explicit template instantiations of MaybeStackArray, MemoryPool and +// MaybeStackVector. This is required when building DLLs for Windows. (See +// datefmt.h, collationiterator.h, erarules.h and others for similar examples.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template class U_I18N_API MaybeStackArray; +template class U_I18N_API MemoryPool; +template class U_I18N_API MaybeStackVector; + +// Export an explicit template instantiation of the LocalPointer that is used as a +// data member of MeasureUnitImpl. +// (When building DLLs for Windows this is required.) +#if defined(_MSC_VER) +// Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= +#pragma warning(push) +#pragma warning(disable : 4661) +#endif +template class U_I18N_API LocalPointerBase; +template class U_I18N_API LocalPointer; +#if defined(_MSC_VER) +#pragma warning(pop) +#endif +#endif + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/msgfmt.cpp b/deps/icu-small/source/i18n/msgfmt.cpp index 3ca368ef954846..d2631776e83bb0 100644 --- a/deps/icu-small/source/i18n/msgfmt.cpp +++ b/deps/icu-small/source/i18n/msgfmt.cpp @@ -1107,7 +1107,7 @@ void MessageFormat::format(int32_t msgStart, const void *plNumber, } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) { // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check - // for the hash table containind DummyFormat. + // for the hash table containing DummyFormat. if (arg->isNumeric()) { const NumberFormat* nf = getDefaultNumberFormat(success); appendTo.formatAndAppend(nf, *arg, success); @@ -1440,7 +1440,7 @@ MessageFormat::parse(int32_t msgStart, argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) { // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check - // for the hash table containind DummyFormat. + // for the hash table containing DummyFormat. // Match as a string. // if at end, use longest possible match diff --git a/deps/icu-small/source/i18n/name2uni.cpp b/deps/icu-small/source/i18n/name2uni.cpp index b1812178660421..8e13b0025475ed 100644 --- a/deps/icu-small/source/i18n/name2uni.cpp +++ b/deps/icu-small/source/i18n/name2uni.cpp @@ -119,7 +119,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos return; } - // Accomodate the longest possible name + // Accommodate the longest possible name ++maxLen; // allow for temporary trailing space char* cbuf = (char*) uprv_malloc(maxLen); if (cbuf == NULL) { diff --git a/deps/icu-small/source/i18n/nfrs.cpp b/deps/icu-small/source/i18n/nfrs.cpp index dd91d7833db111..11056dbc68281e 100644 --- a/deps/icu-small/source/i18n/nfrs.cpp +++ b/deps/icu-small/source/i18n/nfrs.cpp @@ -231,7 +231,7 @@ NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) // (this isn't a for loop because we might be deleting items from // the vector-- we want to make sure we only increment i when - // we _didn't_ delete aything from the vector) + // we _didn't_ delete anything from the vector) int32_t rulesSize = rules.size(); for (int32_t i = 0; i < rulesSize; i++) { NFRule* rule = rules[i]; @@ -647,7 +647,7 @@ NFRuleSet::findFractionRuleSetRule(double number) const /** * Parses a string. Matches the string to be parsed against each * of its rules (with a base value less than upperBound) and returns - * the value produced by the rule that matched the most charcters + * the value produced by the rule that matched the most characters * in the source string. * @param text The string to parse * @param parsePosition The initial position is ignored and assumed diff --git a/deps/icu-small/source/i18n/number_compact.cpp b/deps/icu-small/source/i18n/number_compact.cpp index d781b6fada234c..8f898e704703a4 100644 --- a/deps/icu-small/source/i18n/number_compact.cpp +++ b/deps/icu-small/source/i18n/number_compact.cpp @@ -55,7 +55,7 @@ int32_t countZeros(const UChar *patternString, int32_t patternLength) { } // namespace // NOTE: patterns and multipliers both get zero-initialized. -CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(TRUE) { +CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(true) { } void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle, @@ -104,14 +104,30 @@ int32_t CompactData::getMultiplier(int32_t magnitude) const { return multipliers[magnitude]; } -const UChar *CompactData::getPattern(int32_t magnitude, StandardPlural::Form plural) const { +const UChar *CompactData::getPattern( + int32_t magnitude, + const PluralRules *rules, + const DecimalQuantity &dq) const { if (magnitude < 0) { return nullptr; } if (magnitude > largestMagnitude) { magnitude = largestMagnitude; } - const UChar *patternString = patterns[getIndex(magnitude, plural)]; + const UChar *patternString = nullptr; + if (dq.hasIntegerValue()) { + int64_t i = dq.toLong(true); + if (i == 0) { + patternString = patterns[getIndex(magnitude, StandardPlural::Form::EQ_0)]; + } else if (i == 1) { + patternString = patterns[getIndex(magnitude, StandardPlural::Form::EQ_1)]; + } + if (patternString != nullptr) { + return patternString; + } + } + StandardPlural::Form plural = utils::getStandardPlural(rules, dq); + patternString = patterns[getIndex(magnitude, plural)]; if (patternString == nullptr && plural != StandardPlural::OTHER) { // Fall back to "other" plural variant patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)]; @@ -166,12 +182,6 @@ void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UB ResourceTable pluralVariantsTable = value.getTable(status); if (U_FAILURE(status)) { return; } for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { - - if (uprv_strcmp(key, "0") == 0 || uprv_strcmp(key, "1") == 0) { - // TODO(ICU-21258): Handle this case. For now, skip. - continue; - } - // Skip this magnitude/plural if we already have it from a child locale. // Note: This also skips USE_FALLBACK entries. StandardPlural::Form plural = StandardPlural::fromString(key, status); @@ -296,8 +306,7 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr magnitude -= multiplier; } - StandardPlural::Form plural = utils::getStandardPlural(rules, quantity); - const UChar *patternString = data.getPattern(magnitude, plural); + const UChar *patternString = data.getPattern(magnitude, rules, quantity); if (patternString == nullptr) { // Use the default (non-compact) modifier. // No need to take any action. diff --git a/deps/icu-small/source/i18n/number_compact.h b/deps/icu-small/source/i18n/number_compact.h index 199d39f65911ca..9802b9fb10ee72 100644 --- a/deps/icu-small/source/i18n/number_compact.h +++ b/deps/icu-small/source/i18n/number_compact.h @@ -28,7 +28,10 @@ class CompactData : public MultiplierProducer { int32_t getMultiplier(int32_t magnitude) const U_OVERRIDE; - const UChar *getPattern(int32_t magnitude, StandardPlural::Form plural) const; + const UChar *getPattern( + int32_t magnitude, + const PluralRules *rules, + const DecimalQuantity &dq) const; void getUniquePatterns(UVector &output, UErrorCode &status) const; diff --git a/deps/icu-small/source/i18n/number_decimalquantity.cpp b/deps/icu-small/source/i18n/number_decimalquantity.cpp index 75af5e9974fc65..1f020ff87001cd 100644 --- a/deps/icu-small/source/i18n/number_decimalquantity.cpp +++ b/deps/icu-small/source/i18n/number_decimalquantity.cpp @@ -273,6 +273,9 @@ double DecimalQuantity::getPluralOperand(PluralOperand operand) const { return fractionCountWithoutTrailingZeros(); case PLURAL_OPERAND_E: return static_cast(getExponent()); + case PLURAL_OPERAND_C: + // Plural operand `c` is currently an alias for `e`. + return static_cast(getExponent()); default: return std::abs(toDouble()); } diff --git a/deps/icu-small/source/i18n/number_decimfmtprops.h b/deps/icu-small/source/i18n/number_decimfmtprops.h index 1ce84d9dc388d7..0ace241adae8ab 100644 --- a/deps/icu-small/source/i18n/number_decimfmtprops.h +++ b/deps/icu-small/source/i18n/number_decimfmtprops.h @@ -38,7 +38,7 @@ namespace impl { // Exported as U_I18N_API because it is a public member field of exported DecimalFormatProperties // Using this wrapper is rather unfortunate, but is needed on Windows platforms in order to allow -// for DLL-exporting an fully specified template instantiation. +// for DLL-exporting a fully specified template instantiation. class U_I18N_API CurrencyPluralInfoWrapper { public: LocalPointer fPtr; @@ -52,7 +52,8 @@ class U_I18N_API CurrencyPluralInfoWrapper { } CurrencyPluralInfoWrapper& operator=(const CurrencyPluralInfoWrapper& other) { - if (!other.fPtr.isNull()) { + if (this != &other && // self-assignment: no-op + !other.fPtr.isNull()) { fPtr.adoptInstead(new CurrencyPluralInfo(*other.fPtr)); } return *this; diff --git a/deps/icu-small/source/i18n/number_fluent.cpp b/deps/icu-small/source/i18n/number_fluent.cpp index 8569a36e5b260b..a79f224829d805 100644 --- a/deps/icu-small/source/i18n/number_fluent.cpp +++ b/deps/icu-small/source/i18n/number_fluent.cpp @@ -288,6 +288,20 @@ Derived NumberFormatterSettings::usage(const StringPiece usage)&& { return move; } +template +Derived NumberFormatterSettings::unitDisplayCase(const StringPiece unitDisplayCase) const& { + Derived copy(*this); + copy.fMacros.unitDisplayCase.set(unitDisplayCase); + return copy; +} + +template +Derived NumberFormatterSettings::unitDisplayCase(const StringPiece unitDisplayCase)&& { + Derived move(std::move(*this)); + move.fMacros.unitDisplayCase.set(unitDisplayCase); + return move; +} + template Derived NumberFormatterSettings::padding(const Padder& padder) const& { Derived copy(*this); @@ -428,6 +442,7 @@ LocalizedNumberFormatter::LocalizedNumberFormatter(NFS&& src) U_NOEXCEPT } LocalizedNumberFormatter& LocalizedNumberFormatter::operator=(const LNF& other) { + if (this == &other) { return *this; } // self-assignment: no-op NFS::operator=(static_cast&>(other)); UErrorCode localStatus = U_ZERO_ERROR; // Can't bubble up the error lnfCopyHelper(other, localStatus); diff --git a/deps/icu-small/source/i18n/number_formatimpl.cpp b/deps/icu-small/source/i18n/number_formatimpl.cpp index 9a9f3c8b71e01c..b2325aa8e591eb 100644 --- a/deps/icu-small/source/i18n/number_formatimpl.cpp +++ b/deps/icu-small/source/i18n/number_formatimpl.cpp @@ -39,6 +39,7 @@ int32_t NumberFormatterImpl::formatStatic(const MacroProps ¯os, UFormattedNu int32_t length = writeNumber(micros, inValue, outString, 0, status); length += writeAffixes(micros, outString, 0, length, status); results->outputUnit = std::move(micros.outputUnit); + results->gender = micros.gender; return length; } @@ -63,6 +64,7 @@ int32_t NumberFormatterImpl::format(UFormattedNumberData *results, UErrorCode &s int32_t length = writeNumber(micros, inValue, outString, 0, status); length += writeAffixes(micros, outString, 0, length, status); results->outputUnit = std::move(micros.outputUnit); + results->gender = micros.gender; return length; } @@ -137,8 +139,10 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, bool isPermille = utils::unitIsPermille(macros.unit); bool isCompactNotation = macros.notation.fType == Notation::NTN_COMPACT; bool isAccounting = - macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS || - macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; + macros.sign == UNUM_SIGN_ACCOUNTING || + macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS || + macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO || + macros.sign == UNUM_SIGN_ACCOUNTING_NEGATIVE; CurrencyUnit currency(u"", status); if (isCurrency) { currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit @@ -177,6 +181,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, uprv_strncpy(fMicros.nsName, nsName, 8); fMicros.nsName[8] = 0; // guarantee NUL-terminated + // Default gender: none. + fMicros.gender = ""; + // Resolve the symbols. Do this here because currency may need to customize them. if (macros.symbols.isDecimalFormatSymbols()) { fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); @@ -246,14 +253,11 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, return nullptr; } auto usagePrefsHandler = - new UsagePrefsHandler(macros.locale, macros.unit, macros.usage.fUsage, chain, status); + new UsagePrefsHandler(macros.locale, macros.unit, macros.usage.fValue, chain, status); fUsagePrefsHandler.adoptInsteadAndCheckErrorCode(usagePrefsHandler, status); chain = fUsagePrefsHandler.getAlias(); } else if (isMixedUnit) { - MeasureUnitImpl temp; - const MeasureUnitImpl &outputUnit = MeasureUnitImpl::forMeasureUnit(macros.unit, temp, status); - auto unitConversionHandler = - new UnitConversionHandler(outputUnit.units[0]->build(status), macros.unit, chain, status); + auto unitConversionHandler = new UnitConversionHandler(macros.unit, chain, status); fUnitConversionHandler.adoptInsteadAndCheckErrorCode(unitConversionHandler, status); chain = fUnitConversionHandler.getAlias(); } @@ -373,10 +377,14 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, // Outer modifier (CLDR units and currency long names) if (isCldrUnit) { + const char *unitDisplayCase = ""; + if (macros.unitDisplayCase.isSet()) { + unitDisplayCase = macros.unitDisplayCase.fValue; + } if (macros.usage.isSet()) { fLongNameMultiplexer.adoptInsteadAndCheckErrorCode( LongNameMultiplexer::forMeasureUnits( - macros.locale, *fUsagePrefsHandler->getOutputUnits(), unitWidth, + macros.locale, *fUsagePrefsHandler->getOutputUnits(), unitWidth, unitDisplayCase, resolvePluralRules(macros.rules, macros.locale, status), chain, status), status); chain = fLongNameMultiplexer.getAlias(); @@ -384,13 +392,27 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, fMixedUnitLongNameHandler.adoptInsteadAndCheckErrorCode(new MixedUnitLongNameHandler(), status); MixedUnitLongNameHandler::forMeasureUnit( - macros.locale, macros.unit, unitWidth, + macros.locale, macros.unit, unitWidth, unitDisplayCase, resolvePluralRules(macros.rules, macros.locale, status), chain, fMixedUnitLongNameHandler.getAlias(), status); chain = fMixedUnitLongNameHandler.getAlias(); } else { + MeasureUnit unit = macros.unit; + if (!utils::unitIsBaseUnit(macros.perUnit)) { + unit = unit.product(macros.perUnit.reciprocal(status), status); + // This isn't strictly necessary, but was what we specced out + // when perUnit became a backward-compatibility thing: + // unit/perUnit use case is only valid if both units are + // built-ins, or the product is a built-in. + if (uprv_strcmp(unit.getType(), "") == 0 && + (uprv_strcmp(macros.unit.getType(), "") == 0 || + uprv_strcmp(macros.perUnit.getType(), "") == 0)) { + status = U_UNSUPPORTED_ERROR; + return nullptr; + } + } fLongNameHandler.adoptInsteadAndCheckErrorCode(new LongNameHandler(), status); - LongNameHandler::forMeasureUnit(macros.locale, macros.unit, macros.perUnit, unitWidth, + LongNameHandler::forMeasureUnit(macros.locale, unit, unitWidth, unitDisplayCase, resolvePluralRules(macros.rules, macros.locale, status), chain, fLongNameHandler.getAlias(), status); chain = fLongNameHandler.getAlias(); diff --git a/deps/icu-small/source/i18n/number_longnames.cpp b/deps/icu-small/source/i18n/number_longnames.cpp index 3891d532dea851..fd23a81a8df840 100644 --- a/deps/icu-small/source/i18n/number_longnames.cpp +++ b/deps/icu-small/source/i18n/number_longnames.cpp @@ -5,6 +5,8 @@ #if !UCONFIG_NO_FORMATTING +#include + #include "unicode/simpleformatter.h" #include "unicode/ures.h" #include "ureslocs.h" @@ -39,19 +41,77 @@ constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; * `per` forms. */ constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; +/** + * Gender of the word, in languages with grammatical gender. + */ +constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2; // Number of keys in the array populated by PluralTableSink. -constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 2; +constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3; + +// TODO(icu-units#28): load this list from resources, after creating a "&set" +// function for use in ldml2icu rules. +const int32_t GENDER_COUNT = 7; +const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate", + "masculine", "neuter", "personal"}; + +// Converts a UnicodeString to a const char*, either pointing to a string in +// gGenders, or pointing to an empty string if an appropriate string was not +// found. +const char *getGenderString(UnicodeString uGender, UErrorCode status) { + if (uGender.length() == 0) { + return ""; + } + CharString gender; + gender.appendInvariantChars(uGender, status); + if (U_FAILURE(status)) { + return ""; + } + int32_t first = 0; + int32_t last = GENDER_COUNT; + while (first < last) { + int32_t mid = (first + last) / 2; + int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]); + if (cmp == 0) { + return gGenders[mid]; + } else if (cmp > 0) { + first = mid + 1; + } else if (cmp < 0) { + last = mid; + } + } + // We don't return an error in case our gGenders list is incomplete in + // production. + // + // TODO(icu-units#28): a unit test checking all locales' genders are covered + // by gGenders? Else load a complete list of genders found in + // grammaticalFeatures in an initOnce. + return ""; +} +// Returns the array index that corresponds to the given pluralKeyword. static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { - // pluralKeyword can also be "dnam" or "per" - if (uprv_strcmp(pluralKeyword, "dnam") == 0) { - return DNAM_INDEX; - } else if (uprv_strcmp(pluralKeyword, "per") == 0) { - return PER_INDEX; - } else { - StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); - return plural; + // pluralKeyword can also be "dnam", "per", or "gender" + switch (*pluralKeyword) { + case 'd': + if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) { + return DNAM_INDEX; + } + break; + case 'g': + if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) { + return GENDER_INDEX; + } + break; + case 'p': + if (uprv_strcmp(pluralKeyword + 1, "er") == 0) { + return PER_INDEX; + } + break; + default: + break; } + StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); + return plural; } // Selects a string out of the `strings` array which corresponds to the @@ -74,13 +134,260 @@ static UnicodeString getWithPlural( return result; } +enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END }; + +/** + * Returns three outputs extracted from pattern. + * + * @param coreUnit is extracted as per Extract(...) in the spec: + * https://unicode.org/reports/tr35/tr35-general.html#compound-units + * @param PlaceholderPosition indicates where in the string the placeholder was + * found. + * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar + * contains the space character (if any) that separated the placeholder from + * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one + * space character is considered. + */ +void extractCorePattern(const UnicodeString &pattern, + UnicodeString &coreUnit, + PlaceholderPosition &placeholderPosition, + UChar &joinerChar) { + joinerChar = 0; + int32_t len = pattern.length(); + if (pattern.startsWith(u"{0}", 3)) { + placeholderPosition = PH_BEGINNING; + if (u_isJavaSpaceChar(pattern[3])) { + joinerChar = pattern[3]; + coreUnit.setTo(pattern, 4, len - 4); + } else { + coreUnit.setTo(pattern, 3, len - 3); + } + } else if (pattern.endsWith(u"{0}", 3)) { + placeholderPosition = PH_END; + if (u_isJavaSpaceChar(pattern[len - 4])) { + coreUnit.setTo(pattern, 0, len - 4); + joinerChar = pattern[len - 4]; + } else { + coreUnit.setTo(pattern, 0, len - 3); + } + } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) { + placeholderPosition = PH_NONE; + coreUnit = pattern; + } else { + placeholderPosition = PH_MIDDLE; + coreUnit = pattern; + } +} ////////////////////////// /// BEGIN DATA LOADING /// ////////////////////////// +// Gets the gender of a built-in unit: unit must be a built-in. Returns an empty +// string both in case of unknown gender and in case of unknown unit. +UnicodeString +getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) { + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return {}; } + + // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... + // TODO(ICU-20400): Get duration-*-person data properly with aliases. + StringPiece subtypeForResource; + int32_t subtypeLen = static_cast(uprv_strlen(builtinUnit.getSubtype())); + if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) { + subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7}; + } else { + subtypeForResource = builtinUnit.getSubtype(); + } + + CharString key; + key.append("units/", status); + key.append(builtinUnit.getType(), status); + key.append("/", status); + key.append(subtypeForResource, status); + key.append("/gender", status); + + UErrorCode localStatus = status; + int32_t resultLen = 0; + const UChar *result = + ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus); + if (U_SUCCESS(localStatus)) { + status = localStatus; + return UnicodeString(true, result, resultLen); + } else { + // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to + // check whether the parent "$unitRes" exists? Then we could return + // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not + // being a builtin). + return {}; + } +} + +// Loads data from a resource tree with paths matching +// $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases +// and genders. +// +// An InflectedPluralSink is configured to load data for a specific gender and +// case. It loads all plural forms, because selection between plural forms is +// dependent upon the value being formatted. +// +// See data/unit/de.txt and data/unit/fr.txt for examples - take a look at +// units/compound/power2: German has case, French has differences for gender, +// but no case. +// +// TODO(icu-units#138): Conceptually similar to PluralTableSink, however the +// tree structures are different. After homogenizing the structures, we may be +// able to unify the two classes. +// +// TODO: Spec violation: expects presence of "count" - does not fallback to an +// absent "count"! If this fallback were added, getCompoundValue could be +// superseded? +class InflectedPluralSink : public ResourceSink { + public: + // Accepts `char*` rather than StringPiece because + // ResourceTable::findValue(...) requires a null-terminated `char*`. + // + // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds + // checking is performed. + explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray) + : gender(gender), caseVariant(caseVariant), outArray(outArray) { + // Initialize the array to bogus strings. + for (int32_t i = 0; i < ARRAY_LENGTH; i++) { + outArray[i].setToBogus(); + } + } + + // See ResourceSink::put(). + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { + ResourceTable pluralsTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { + int32_t pluralIndex = getIndex(key, status); + if (U_FAILURE(status)) { return; } + if (!outArray[pluralIndex].isBogus()) { + // We already have a pattern + continue; + } + ResourceTable genderTable = value.getTable(status); + ResourceTable caseTable; // This instance has to outlive `value` + if (loadForPluralForm(genderTable, caseTable, value, status)) { + outArray[pluralIndex] = value.getUnicodeString(status); + } + } + } + + private: + // Tries to load data for the configured gender from `genderTable`. Returns + // true if found, returning the data in `value`. The returned data will be + // for the configured gender if found, falling back to "neuter" and + // no-gender if not. The caseTable parameter holds the intermediate + // ResourceTable for the sake of lifetime management. + bool loadForPluralForm(const ResourceTable &genderTable, + ResourceTable &caseTable, + ResourceValue &value, + UErrorCode &status) { + if (uprv_strcmp(gender, "") != 0) { + if (loadForGender(genderTable, gender, caseTable, value, status)) { + return true; + } + if (uprv_strcmp(gender, "neuter") != 0 && + loadForGender(genderTable, "neuter", caseTable, value, status)) { + return true; + } + } + if (loadForGender(genderTable, "_", caseTable, value, status)) { + return true; + } + return false; + } + + // Tries to load data for the given gender from `genderTable`. Returns true + // if found, returning the data in `value`. The returned data will be for + // the configured case if found, falling back to "nominative" and no-case if + // not. + bool loadForGender(const ResourceTable &genderTable, + const char *genderVal, + ResourceTable &caseTable, + ResourceValue &value, + UErrorCode &status) { + if (!genderTable.findValue(genderVal, value)) { + return false; + } + caseTable = value.getTable(status); + if (uprv_strcmp(caseVariant, "") != 0) { + if (loadForCase(caseTable, caseVariant, value)) { + return true; + } + if (uprv_strcmp(caseVariant, "nominative") != 0 && + loadForCase(caseTable, "nominative", value)) { + return true; + } + } + if (loadForCase(caseTable, "_", value)) { + return true; + } + return false; + } + + // Tries to load data for the given case from `caseTable`. Returns true if + // found, returning the data in `value`. + bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) { + if (!caseTable.findValue(caseValue, value)) { + return false; + } + return true; + } + + const char *gender; + const char *caseVariant; + UnicodeString *outArray; +}; + +// Fetches localised formatting patterns for the given subKey. See documentation +// for InflectedPluralSink for details. +// +// Data is loaded for the appropriate unit width, with missing data filled in +// from unitsShort. +void getInflectedMeasureData(StringPiece subKey, + const Locale &locale, + const UNumberUnitWidth &width, + const char *gender, + const char *caseVariant, + UnicodeString *outArray, + UErrorCode &status) { + InflectedPluralSink sink(gender, caseVariant, outArray); + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return; } + + CharString key; + key.append("units", status); + if (width == UNUM_UNIT_WIDTH_NARROW) { + key.append("Narrow", status); + } else if (width == UNUM_UNIT_WIDTH_SHORT) { + key.append("Short", status); + } + key.append("/", status); + key.append(subKey, status); + + UErrorCode localStatus = status; + ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); + if (width == UNUM_UNIT_WIDTH_SHORT) { + status = localStatus; + return; + } + + // TODO(ICU-13353): The fallback to short does not work in ICU4C. + // Manually fall back to short (this is done automatically in Java). + key.clear(); + key.append("unitsShort/", status); + key.append(subKey, status); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status); +} + class PluralTableSink : public ResourceSink { public: + // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds + // checking is performed. explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { // Initialize the array to bogus strings. for (int32_t i = 0; i < ARRAY_LENGTH; i++) { @@ -92,6 +399,9 @@ class PluralTableSink : public ResourceSink { ResourceTable pluralsTable = value.getTable(status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { + if (uprv_strcmp(key, "case") == 0) { + continue; + } int32_t index = getIndex(key, status); if (U_FAILURE(status)) { return; } if (!outArray[index].isBogus()) { @@ -106,8 +416,6 @@ class PluralTableSink : public ResourceSink { UnicodeString *outArray; }; -// NOTE: outArray MUST have room for all StandardPlural values. No bounds checking is performed. - /** * Populates outArray with `locale`-specific values for `unit` through use of * PluralTableSink. Only the set of basic units are supported! @@ -116,24 +424,47 @@ class PluralTableSink : public ResourceSink { * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units". * - * @param unit must have a type and subtype (i.e. it must be a unit listed in - * gTypes and gSubTypes in measunit.cpp). + * @param unit must be a built-in unit, i.e. must have a type and subtype, + * listed in gTypes and gSubTypes in measunit.cpp. + * @param unitDisplayCase the empty string and "nominative" are treated the + * same. For other cases, strings for the requested case are used if found. + * (For any missing case-specific data, we fall back to nominative.) * @param outArray must be of fixed length ARRAY_LENGTH. */ -void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width, - UnicodeString *outArray, UErrorCode &status) { +void getMeasureData(const Locale &locale, + const MeasureUnit &unit, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + UnicodeString *outArray, + UErrorCode &status) { PluralTableSink sink(outArray); LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return; } + CharString subKey; + subKey.append("/", status); + subKey.append(unit.getType(), status); + subKey.append("/", status); + // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... // TODO(ICU-20400): Get duration-*-person data properly with aliases. - StringPiece subtypeForResource; int32_t subtypeLen = static_cast(uprv_strlen(unit.getSubtype())); if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) { - subtypeForResource = {unit.getSubtype(), subtypeLen - 7}; + subKey.append({unit.getSubtype(), subtypeLen - 7}, status); } else { - subtypeForResource = unit.getSubtype(); + subKey.append({unit.getSubtype(), subtypeLen}, status); + } + + if (width != UNUM_UNIT_WIDTH_FULL_NAME) { + UErrorCode localStatus = status; + CharString genderKey; + genderKey.append("units", localStatus); + genderKey.append(subKey, localStatus); + genderKey.append("/gender", localStatus); + StackUResourceBundle fillIn; + ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(), + &localStatus); + outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus); } CharString key; @@ -143,11 +474,30 @@ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumber } else if (width == UNUM_UNIT_WIDTH_SHORT) { key.append("Short", status); } - key.append("/", status); - key.append(unit.getType(), status); - key.append("/", status); - key.append(subtypeForResource, status); + key.append(subKey, status); + + // Grab desired case first, if available. Then grab no-case data to fill in + // the gaps. + if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) { + CharString caseKey; + caseKey.append(key, status); + caseKey.append("/case/", status); + caseKey.append(unitDisplayCase, status); + UErrorCode localStatus = U_ZERO_ERROR; + // TODO(icu-units#138): our fallback logic is not spec-compliant: + // lateral fallback should happen before locale fallback. Switch to + // getInflectedMeasureData after homogenizing data format? Find a unit + // test case that demonstrates the incorrect fallback logic (via + // regional variant of an inflected language?) + ures_getAllItemsWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus); + } + + // TODO(icu-units#138): our fallback logic is not spec-compliant: we + // check the given case, then go straight to the no-case data. The spec + // states we should first look for case="nominative". As part of #138, + // either get the spec changed, or add unit tests that warn us if + // case="nominative" data differs from no-case data? UErrorCode localStatus = U_ZERO_ERROR; ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); if (width == UNUM_UNIT_WIDTH_SHORT) { @@ -160,13 +510,12 @@ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumber // TODO(ICU-13353): The fallback to short does not work in ICU4C. // Manually fall back to short (this is done automatically in Java). key.clear(); - key.append("unitsShort/", status); - key.append(unit.getType(), status); - key.append("/", status); - key.append(subtypeForResource, status); + key.append("unitsShort", status); + key.append(subKey, status); ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status); } +// NOTE: outArray MUST have a length of at least ARRAY_LENGTH. void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, UErrorCode &status) { // In ICU4J, this method gets a CurrencyData from CurrencyData.provider. @@ -195,7 +544,10 @@ void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, } } -UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &width, UErrorCode& status) { +UnicodeString getCompoundValue(StringPiece compoundKey, + const Locale &locale, + const UNumberUnitWidth &width, + UErrorCode &status) { LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return {}; } CharString key; @@ -205,119 +557,893 @@ UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &wid } else if (width == UNUM_UNIT_WIDTH_SHORT) { key.append("Short", status); } - key.append("/compound/per", status); + key.append("/compound/", status); + key.append(compoundKey, status); + + UErrorCode localStatus = status; int32_t len = 0; - const UChar* ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); + const UChar *ptr = + ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus); + if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) { + // Fall back to short, which contains more compound data + key.clear(); + key.append("unitsShort/compound/", status); + key.append(compoundKey, status); + ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); + } else { + status = localStatus; + } + if (U_FAILURE(status)) { + return {}; + } return UnicodeString(ptr, len); } +/** + * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml. + * + * Consider a deriveComponent rule that looks like this: + * + * + * + * Instantiating an instance as follows: + * + * DerivedComponents d(loc, "case", "per"); + * + * Applying the rule in the XML element above, `d.value0("foo")` will be "foo", + * and `d.value1("foo")` will be "nominative". + * + * The values returned by value0(...) and value1(...) are valid only while the + * instance exists. In case of any kind of failure, value0(...) and value1(...) + * will return "". + */ +class DerivedComponents { + public: + /** + * Constructor. + * + * The feature and structure parameters must be null-terminated. The string + * referenced by compoundValue must exist for longer than the + * DerivedComponents instance. + */ + DerivedComponents(const Locale &locale, const char *feature, const char *structure) { + StackUResourceBundle derivationsBundle, stackBundle; + ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status); + ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), + &status); + ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), + &status); + if (U_FAILURE(status)) { + return; + } + UErrorCode localStatus = U_ZERO_ERROR; + // TODO(icu-units#28): use standard normal locale resolution algorithms + // rather than just grabbing language: + ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), + &localStatus); + // TODO(icu-units#28): + // - code currently assumes if the locale exists, the rules are there - + // instead of falling back to root when the requested rule is missing. + // - investigate ures.h functions, see if one that uses res_findResource() + // might be better (or use res_findResource directly), or maybe help + // improve ures documentation to guide function selection? + if (localStatus == U_MISSING_RESOURCE_ERROR) { + ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); + } else { + status = localStatus; + } + ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status); + UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status); + UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status); + if (U_SUCCESS(status)) { + if (val0.compare(UnicodeString(u"compound")) == 0) { + compound0_ = true; + } else { + compound0_ = false; + value0_.appendInvariantChars(val0, status); + } + if (val1.compare(UnicodeString(u"compound")) == 0) { + compound1_ = true; + } else { + compound1_ = false; + value1_.appendInvariantChars(val1, status); + } + } + } + + // Returns a StringPiece that is only valid as long as the instance exists. + StringPiece value0(const StringPiece compoundValue) const { + return compound0_ ? compoundValue : value0_.toStringPiece(); + } + + // Returns a StringPiece that is only valid as long as the instance exists. + StringPiece value1(const StringPiece compoundValue) const { + return compound1_ ? compoundValue : value1_.toStringPiece(); + } + + // Returns a char* that is only valid as long as the instance exists. + const char *value0(const char *compoundValue) const { + return compound0_ ? compoundValue : value0_.data(); + } + + // Returns a char* that is only valid as long as the instance exists. + const char *value1(const char *compoundValue) const { + return compound1_ ? compoundValue : value1_.data(); + } + + private: + UErrorCode status = U_ZERO_ERROR; + + // Holds strings referred to by value0 and value1; + bool compound0_ = false, compound1_ = false; + CharString value0_, value1_; +}; + +// TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding +// testsuite support for testing with synthetic data? +/** + * Loads and returns the value in rules that look like these: + * + * + * + * + * Currently a fake example, but spec compliant: + * + * + * NOTE: If U_FAILURE(status), returns an empty string. + */ +UnicodeString +getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) { + StackUResourceBundle derivationsBundle, stackBundle; + ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status); + ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), + &status); + ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status); + // TODO: use standard normal locale resolution algorithms rather than just grabbing language: + ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status); + // TODO: + // - code currently assumes if the locale exists, the rules are there - + // instead of falling back to root when the requested rule is missing. + // - investigate ures.h functions, see if one that uses res_findResource() + // might be better (or use res_findResource directly), or maybe help + // improve ures documentation to guide function selection? + if (status == U_MISSING_RESOURCE_ERROR) { + status = U_ZERO_ERROR; + ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); + } + ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); + UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status); + if (U_FAILURE(status)) { + return {}; + } + U_ASSERT(!uVal.isBogus()); + return uVal; +} + +// Returns the gender string for structures following these rules: +// +// +// +// +// Fake example: +// +// +// data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that +// correspond to value="0" and value="1". +// +// Pass a nullptr to data1 if the structure has no concept of value="1" (e.g. +// "prefix" doesn't). +UnicodeString getDerivedGender(Locale locale, + const char *structure, + UnicodeString *data0, + UnicodeString *data1, + UErrorCode &status) { + UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status); + if (val.length() == 1) { + switch (val[0]) { + case u'0': + return data0[GENDER_INDEX]; + case u'1': + if (data1 == nullptr) { + return {}; + } + return data1[GENDER_INDEX]; + } + } + return val; +} + //////////////////////// /// END DATA LOADING /// //////////////////////// -} // namespace +// TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace +const UChar *trimSpaceChars(const UChar *s, int32_t &length) { + if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) { + return s; + } + int32_t start = 0; + int32_t limit = length; + while (start < limit && u_isJavaSpaceChar(s[start])) { + ++start; + } + if (start < limit) { + // There is non-white space at start; we will not move limit below that, + // so we need not test start 0); // Else it would not be COMPOUND + if (mui.singleUnits[endSlice]->dimensionality < 0) { + // We have a -per- construct + UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status); + if (perRule.length() != 1) { + // Fixed gender for -per- units + return perRule; + } + if (perRule[0] == u'1') { + // Find the start of the denominator. We already know there is one. + while (mui.singleUnits[startSlice]->dimensionality >= 0) { + startSlice++; + } + } else { + // Find the end of the numerator + while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) { + endSlice--; + } + if (endSlice < 0) { + // We have only a denominator, e.g. "per-second". + // TODO(icu-units#28): find out what gender to use in the + // absence of a first value - mentioned in CLDR-14253. + return {}; + } + } + } + if (endSlice > startSlice) { + // We have a -times- construct + UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status); + if (timesRule.length() != 1) { + // Fixed gender for -times- units + return timesRule; + } + if (timesRule[0] == u'0') { + endSlice = startSlice; + } else { + // We assume timesRule[0] == u'1' + startSlice = endSlice; + } + } + U_ASSERT(startSlice == endSlice); + singleUnitIndex = startSlice; + } else if (mui.complexity == UMEASURE_UNIT_MIXED) { + status = U_INTERNAL_PROGRAM_ERROR; + return {}; + } else { + U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE); + U_ASSERT(mui.singleUnits.length() == 1); + } - MeasureUnit unit = unitRef; - if (uprv_strcmp(perUnit.getType(), "none") != 0) { - // Compound unit: first try to simplify (e.g., meters per second is its own unit). - MeasureUnit simplified = unit.product(perUnit.reciprocal(status), status); - if (uprv_strcmp(simplified.getType(), "") != 0) { - unit = simplified; - } else { - // No simplified form is available. - forCompoundUnit(loc, unit, perUnit, width, rules, parent, fillIn, status); + // Now we know which singleUnit's gender we want + const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex]; + // Check for any power-prefix gender override: + if (std::abs(singleUnit->dimensionality) != 1) { + UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status); + if (powerRule.length() != 1) { + // Fixed gender for -powN- units + return powerRule; + } + // powerRule[0] == u'0'; u'1' not currently in spec. + } + // Check for any SI and binary prefix gender override: + if (std::abs(singleUnit->dimensionality) != 1) { + UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status); + if (prefixRule.length() != 1) { + // Fixed gender for -powN- units + return prefixRule; + } + // prefixRule[0] == u'0'; u'1' not currently in spec. + } + // Now we've boiled it down to the gender of one simple unit identifier: + return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status), + status); +} + +void maybeCalculateGender(const Locale &locale, + const MeasureUnit &unitRef, + UnicodeString *outArray, + UErrorCode &status) { + if (outArray[GENDER_INDEX].isBogus()) { + UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status); + if (meterGender.isEmpty()) { + // No gender for meter: assume ungendered language return; } + // We have a gendered language, but are lacking gender for unitRef. + outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status); } +} - if (uprv_strcmp(unit.getType(), "") == 0) { - // TODO(ICU-20941): Unsanctioned unit. Not yet fully supported. Set an - // error code. Once we support not-built-in units here, unitRef may be - // anything, but if not built-in, perUnit has to be "none". - status = U_UNSUPPORTED_ERROR; +} // namespace + +void LongNameHandler::forMeasureUnit(const Locale &loc, + const MeasureUnit &unitRef, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + LongNameHandler *fillIn, + UErrorCode &status) { + // From https://unicode.org/reports/tr35/tr35-general.html#compound-units - + // Points 1 and 2 are mostly handled by MeasureUnit: + // + // 1. If the unitId is empty or invalid, fail + // 2. Put the unitId into normalized order + U_ASSERT(fillIn != nullptr); + + if (uprv_strcmp(unitRef.getType(), "") != 0) { + // Handling built-in units: + // + // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant) + // - If result is not empty, return it + UnicodeString simpleFormats[ARRAY_LENGTH]; + getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status); + maybeCalculateGender(loc, unitRef, simpleFormats, status); + if (U_FAILURE(status)) { + return; + } + fillIn->rules = rules; + fillIn->parent = parent; + fillIn->simpleFormatsToModifiers(simpleFormats, + {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + if (!simpleFormats[GENDER_INDEX].isBogus()) { + fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status); + } return; - } - UnicodeString simpleFormats[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, simpleFormats, status); - if (U_FAILURE(status)) { + // TODO(icu-units#145): figure out why this causes a failure in + // format/MeasureFormatTest/TestIndividualPluralFallback and other + // tests, when it should have been an alternative for the lines above: + + // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); + // fillIn->rules = rules; + // fillIn->parent = parent; + // return; + } else { + // Check if it is a MeasureUnit this constructor handles: this + // constructor does not handle mixed units + U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED); + forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); + fillIn->rules = rules; + fillIn->parent = parent; return; } - fillIn->rules = rules; - fillIn->parent = parent; - fillIn->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, - status); } -void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, - const MeasureUnit &perUnit, const UNumberUnitWidth &width, - const PluralRules *rules, const MicroPropsGenerator *parent, - LongNameHandler *fillIn, UErrorCode &status) { - if (uprv_strcmp(unit.getType(), "") == 0 || uprv_strcmp(perUnit.getType(), "") == 0) { - // TODO(ICU-20941): Unsanctioned unit. Not yet fully supported. Set an - // error code. Once we support not-built-in units here, unitRef may be - // anything, but if not built-in, perUnit has to be "none". - status = U_UNSUPPORTED_ERROR; +void LongNameHandler::forArbitraryUnit(const Locale &loc, + const MeasureUnit &unitRef, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + LongNameHandler *fillIn, + UErrorCode &status) { + if (U_FAILURE(status)) { return; } if (fillIn == nullptr) { status = U_INTERNAL_PROGRAM_ERROR; return; } - UnicodeString primaryData[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, primaryData, status); - if (U_FAILURE(status)) { - return; - } - UnicodeString secondaryData[ARRAY_LENGTH]; - getMeasureData(loc, perUnit, width, secondaryData, status); - if (U_FAILURE(status)) { - return; + + // Numbered list items are from the algorithms at + // https://unicode.org/reports/tr35/tr35-general.html#compound-units: + // + // 4. Divide the unitId into numerator (the part before the "-per-") and + // denominator (the part after the "-per-). If both are empty, fail + MeasureUnitImpl unit; + MeasureUnitImpl perUnit; + { + MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status); + if (U_FAILURE(status)) { + return; + } + for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) { + SingleUnitImpl *subUnit = fullUnit.singleUnits[i]; + if (subUnit->dimensionality > 0) { + unit.appendSingleUnit(*subUnit, status); + } else { + subUnit->dimensionality *= -1; + perUnit.appendSingleUnit(*subUnit, status); + } + } } - UnicodeString perUnitFormat; - if (!secondaryData[PER_INDEX].isBogus()) { - perUnitFormat = secondaryData[PER_INDEX]; + // TODO(icu-units#28): check placeholder logic, see if it needs to be + // present here instead of only in processPatternTimes: + // + // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty + + DerivedComponents derivedPerCases(loc, "case", "per"); + + // 6. numeratorUnitString + UnicodeString numeratorUnitData[ARRAY_LENGTH]; + processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase), + numeratorUnitData, status); + + // 7. denominatorUnitString + UnicodeString denominatorUnitData[ARRAY_LENGTH]; + processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase), + denominatorUnitData, status); + + // TODO(icu-units#139): + // - implement DerivedComponents for "plural/times" and "plural/power": + // French has different rules, we'll be producing the wrong results + // currently. (Prove via tests!) + // - implement DerivedComponents for "plural/per", "plural/prefix", + // "case/times", "case/power", and "case/prefix" - although they're + // currently hardcoded. Languages with different rules are surely on the + // way. + // + // Currently we only use "case/per", "plural/times", "case/times", and + // "case/power". + // + // This may have impact on multiSimpleFormatsToModifiers(...) below too? + // These rules are currently (ICU 69) all the same and hard-coded below. + UnicodeString perUnitPattern; + if (!denominatorUnitData[PER_INDEX].isBogus()) { + // If we have no denominator, we obtain the empty string: + perUnitPattern = denominatorUnitData[PER_INDEX]; } else { - UnicodeString rawPerUnitFormat = getPerUnitFormat(loc, width, status); + // 8. Set perPattern to be getValue([per], locale, length) + UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status); + // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit. + SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status); if (U_FAILURE(status)) { return; } - // rawPerUnitFormat is something like "{0}/{1}"; we need to substitute in the secondary unit. - SimpleFormatter compiled(rawPerUnitFormat, 2, 2, status); + // Plural and placeholder handling for 7. denominatorUnitString: + // TODO(icu-units#139): hardcoded: + // + UnicodeString denominatorFormat = + getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status); + // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. + SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status); if (U_FAILURE(status)) { return; } - UnicodeString secondaryFormat = getWithPlural(secondaryData, StandardPlural::Form::ONE, status); + UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments(); + int32_t trimmedLen = denominatorPattern.length(); + const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen); + UnicodeString denominatorString(false, trimmed, trimmedLen); + // 9. If the denominatorString is empty, set result to + // [numeratorString], otherwise set result to format(perPattern, + // numeratorString, denominatorString) + // + // TODO(icu-units#28): Why does UnicodeString need to be explicit in the + // following line? + perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status); if (U_FAILURE(status)) { return; } - // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. - SimpleFormatter secondaryCompiled(secondaryFormat, 0, 1, status); - if (U_FAILURE(status)) { + } + if (perUnitPattern.length() == 0) { + fillIn->simpleFormatsToModifiers(numeratorUnitData, + {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + } else { + fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern, + {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + } + + // Gender + // + // TODO(icu-units#28): find out what gender to use in the absence of a first + // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253. + // + // gender/per deriveCompound rules don't say: + // + fillIn->gender = getGenderString( + getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status); +} + +void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit, + Locale loc, + const UNumberUnitWidth &width, + const char *caseVariant, + UnicodeString *outArray, + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (productUnit.complexity == UMEASURE_UNIT_MIXED) { + // These are handled by MixedUnitLongNameHandler + status = U_UNSUPPORTED_ERROR; + return; + } + +#if U_DEBUG + for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { + U_ASSERT(outArray[pluralIndex].length() == 0); + U_ASSERT(!outArray[pluralIndex].isBogus()); + } +#endif + + if (productUnit.identifier.isEmpty()) { + // TODO(icu-units#28): consider when serialize should be called. + // identifier might also be empty for MeasureUnit(). + productUnit.serialize(status); + } + if (U_FAILURE(status)) { + return; + } + if (productUnit.identifier.length() == 0) { + // MeasureUnit(): no units: return empty strings. + return; + } + + MeasureUnit builtinUnit; + if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) { + // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it + // breaks them all down. Do we want to drop this? + // - findBySubType isn't super efficient, if we skip it and go to basic + // singles, we don't have to construct MeasureUnit's anymore. + // - Check all the existing unit tests that fail without this: is it due + // to incorrect fallback via getMeasureData? + // - Do those unit tests cover this code path representatively? + if (builtinUnit != MeasureUnit()) { + getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status); + maybeCalculateGender(loc, builtinUnit, outArray, status); + } + return; + } + + // 2. Set timesPattern to be getValue(times, locale, length) + UnicodeString timesPattern = getCompoundValue("times", loc, width, status); + SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status); + if (U_FAILURE(status)) { + return; + } + + PlaceholderPosition globalPlaceholder[ARRAY_LENGTH]; + UChar globalJoinerChar = 0; + // Numbered list items are from the algorithms at + // https://unicode.org/reports/tr35/tr35-general.html#compound-units: + // + // pattern(...) point 5: + // - Set both globalPlaceholder and globalPlaceholderPosition to be empty + // + // 3. Set result to be empty + for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { + // Initial state: empty string pattern, via all falling back to OTHER: + if (pluralIndex == StandardPlural::Form::OTHER) { + outArray[pluralIndex].remove(); + } else { + outArray[pluralIndex].setToBogus(); + } + globalPlaceholder[pluralIndex] = PH_EMPTY; + } + + // Empty string represents "compound" (propagate the plural form). + const char *pluralCategory = ""; + DerivedComponents derivedTimesPlurals(loc, "plural", "times"); + DerivedComponents derivedTimesCases(loc, "case", "times"); + DerivedComponents derivedPowerCases(loc, "case", "power"); + + // 4. For each single_unit in product_unit + for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length(); + singleUnitIndex++) { + SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex]; + const char *singlePluralCategory; + const char *singleCaseVariant; + // TODO(icu-units#28): ensure we have unit tests that change/fail if we + // assign incorrect case variants here: + if (singleUnitIndex < productUnit.singleUnits.length() - 1) { + // 4.1. If hasMultiple + singlePluralCategory = derivedTimesPlurals.value0(pluralCategory); + singleCaseVariant = derivedTimesCases.value0(caseVariant); + pluralCategory = derivedTimesPlurals.value1(pluralCategory); + caseVariant = derivedTimesCases.value1(caseVariant); + } else { + singlePluralCategory = derivedTimesPlurals.value1(pluralCategory); + singleCaseVariant = derivedTimesCases.value1(caseVariant); + } + + // 4.2. Get the gender of that single_unit + MeasureUnit simpleUnit; + if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) { + // Ideally all simple units should be known, but they're not: + // 100-kilometer is internally treated as a simple unit, but it is + // not a built-in unit and does not have formatting data in CLDR 39. + // + // TODO(icu-units#28): test (desirable) invariants in unit tests. + status = U_UNSUPPORTED_ERROR; return; } - UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim(); - // TODO: Why does UnicodeString need to be explicit in the following line? - compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status); + const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status); + + // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-' + U_ASSERT(singleUnit->dimensionality > 0); + int32_t dimensionality = singleUnit->dimensionality; + UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH]; + if (dimensionality != 1) { + // 4.3.1. set dimensionalityPrefixPattern to be + // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender), + // such as "{0} kwadratowym" + CharString dimensionalityKey("compound/power", status); + dimensionalityKey.appendNumber(dimensionality, status); + getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender, + singleCaseVariant, dimensionalityPrefixPatterns, status); + if (U_FAILURE(status)) { + // At the time of writing, only pow2 and pow3 are supported. + // Attempting to format other powers results in a + // U_RESOURCE_TYPE_MISMATCH. We convert the error if we + // understand it: + if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) { + status = U_UNSUPPORTED_ERROR; + } + return; + } + + // TODO(icu-units#139): + // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory) + + // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant) + singleCaseVariant = derivedPowerCases.value0(singleCaseVariant); + // 4.3.4. remove the dimensionality_prefix from singleUnit + singleUnit->dimensionality = 1; + } + + // 4.4. if singleUnit starts with an si_prefix, such as 'centi' + UMeasurePrefix prefix = singleUnit->unitPrefix; + UnicodeString prefixPattern; + if (prefix != UMEASURE_PREFIX_ONE) { + // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale, + // length), such as "centy{0}" + CharString prefixKey; + // prefixKey looks like "1024p3" or "10p-2": + prefixKey.appendNumber(umeas_getPrefixBase(prefix), status); + prefixKey.append('p', status); + prefixKey.appendNumber(umeas_getPrefixPower(prefix), status); + // Contains a pattern like "centy{0}". + prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status); + + // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory) + // + // TODO(icu-units#139): that refers to these rules: + // + // though I'm not sure what other value they might end up having. + // + // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant) + // + // TODO(icu-units#139): that refers to: + // but the prefix (value0) doesn't have case, the rest simply + // propagates. + + // 4.4.4. remove the si_prefix from singleUnit + singleUnit->unitPrefix = UMEASURE_PREFIX_ONE; + } + + // 4.5. Set corePattern to be the getValue(singleUnit, locale, length, + // singlePluralCategory, singleCaseVariant), such as "{0} metrem" + UnicodeString singleUnitArray[ARRAY_LENGTH]; + // At this point we are left with a Simple Unit: + U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) == + 0); + getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray, + status); if (U_FAILURE(status)) { + // Shouldn't happen if we have data for all single units return; } + + // Calculate output gender + if (!singleUnitArray[GENDER_INDEX].isBogus()) { + U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty()); + UnicodeString uVal; + + if (prefix != UMEASURE_PREFIX_ONE) { + singleUnitArray[GENDER_INDEX] = + getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status); + } + + if (dimensionality != 1) { + singleUnitArray[GENDER_INDEX] = + getDerivedGender(loc, "power", singleUnitArray, nullptr, status); + } + + UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status); + if (timesGenderRule.length() == 1) { + switch (timesGenderRule[0]) { + case u'0': + if (singleUnitIndex == 0) { + U_ASSERT(outArray[GENDER_INDEX].isBogus()); + outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; + } + break; + case u'1': + if (singleUnitIndex == productUnit.singleUnits.length() - 1) { + U_ASSERT(outArray[GENDER_INDEX].isBogus()); + outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; + } + } + } else { + if (outArray[GENDER_INDEX].isBogus()) { + outArray[GENDER_INDEX] = timesGenderRule; + } + } + } + + // Calculate resulting patterns for each plural form + for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { + StandardPlural::Form plural = static_cast(pluralIndex); + + // singleUnitArray[pluralIndex] looks something like "{0} Meter" + if (outArray[pluralIndex].isBogus()) { + if (singleUnitArray[pluralIndex].isBogus()) { + // Let the usual plural fallback mechanism take care of this + // plural form + continue; + } else { + // Since our singleUnit can have a plural form that outArray + // doesn't yet have (relying on fallback to OTHER), we start + // by grabbing it with the normal plural fallback mechanism + outArray[pluralIndex] = getWithPlural(outArray, plural, status); + if (U_FAILURE(status)) { + return; + } + } + } + + if (uprv_strcmp(singlePluralCategory, "") != 0) { + plural = static_cast(getIndex(singlePluralCategory, status)); + } + + // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern. + UnicodeString coreUnit; + PlaceholderPosition placeholderPosition; + UChar joinerChar; + extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit, + placeholderPosition, joinerChar); + + // 4.7 If the position is middle, then fail + if (placeholderPosition == PH_MIDDLE) { + status = U_UNSUPPORTED_ERROR; + return; + } + + // 4.8. If globalPlaceholder is empty + if (globalPlaceholder[pluralIndex] == PH_EMPTY) { + globalPlaceholder[pluralIndex] = placeholderPosition; + globalJoinerChar = joinerChar; + } else { + // Expect all units involved to have the same placeholder position + U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition); + // TODO(icu-units#28): Do we want to add a unit test that checks + // for consistent joiner chars? Probably not, given how + // inconsistent they are. File a CLDR ticket with examples? + } + // Now coreUnit would be just "Meter" + + // 4.9. If siPrefixPattern is not empty + if (prefix != UMEASURE_PREFIX_ONE) { + SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status); + if (U_FAILURE(status)) { + return; + } + + // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern, + // coreUnit) + UnicodeString tmp; + // combineLowercasing(locale, length, prefixPattern, coreUnit) + // + // TODO(icu-units#28): run this only if prefixPattern does not + // contain space characters - do languages "as", "bn", "hi", + // "kk", etc have concepts of upper and lower case?: + if (width == UNUM_UNIT_WIDTH_FULL_NAME) { + coreUnit.toLower(loc); + } + prefixCompiled.format(coreUnit, tmp, status); + if (U_FAILURE(status)) { + return; + } + coreUnit = tmp; + } + + // 4.10. If dimensionalityPrefixPattern is not empty + if (dimensionality != 1) { + SimpleFormatter dimensionalityCompiled( + getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status); + if (U_FAILURE(status)) { + return; + } + + // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length, + // dimensionalityPrefixPattern, coreUnit) + UnicodeString tmp; + // combineLowercasing(locale, length, prefixPattern, coreUnit) + // + // TODO(icu-units#28): run this only if prefixPattern does not + // contain space characters - do languages "as", "bn", "hi", + // "kk", etc have concepts of upper and lower case?: + if (width == UNUM_UNIT_WIDTH_FULL_NAME) { + coreUnit.toLower(loc); + } + dimensionalityCompiled.format(coreUnit, tmp, status); + if (U_FAILURE(status)) { + return; + } + coreUnit = tmp; + } + + if (outArray[pluralIndex].length() == 0) { + // 4.11. If the result is empty, set result to be coreUnit + outArray[pluralIndex] = coreUnit; + } else { + // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit) + UnicodeString tmp; + timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status); + outArray[pluralIndex] = tmp; + } + } + } + for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { + if (globalPlaceholder[pluralIndex] == PH_BEGINNING) { + UnicodeString tmp; + tmp.append(u"{0}", 3); + if (globalJoinerChar != 0) { + tmp.append(globalJoinerChar); + } + tmp.append(outArray[pluralIndex]); + outArray[pluralIndex] = tmp; + } else if (globalPlaceholder[pluralIndex] == PH_END) { + if (globalJoinerChar != 0) { + outArray[pluralIndex].append(globalJoinerChar); + } + outArray[pluralIndex].append(u"{0}", 3); + } } - fillIn->rules = rules; - fillIn->parent = parent; - fillIn->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, - {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); } UnicodeString LongNameHandler::getUnitDisplayName( @@ -329,7 +1455,7 @@ UnicodeString LongNameHandler::getUnitDisplayName( return ICU_Utility::makeBogusString(); } UnicodeString simpleFormats[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, simpleFormats, status); + getMeasureData(loc, unit, width, "", simpleFormats, status); return simpleFormats[DNAM_INDEX]; } @@ -343,7 +1469,7 @@ UnicodeString LongNameHandler::getUnitPattern( return ICU_Utility::makeBogusString(); } UnicodeString simpleFormats[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, simpleFormats, status); + getMeasureData(loc, unit, width, "", simpleFormats, status); // The above already handles fallback from other widths to short if (U_FAILURE(status)) { return ICU_Utility::makeBogusString(); @@ -366,6 +1492,7 @@ LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const getCurrencyLongNameData(loc, currency, simpleFormats, status); if (U_FAILURE(status)) { return nullptr; } result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); + // TODO(icu-units#28): currency gender? return result; } @@ -390,8 +1517,12 @@ void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFor UnicodeString leadFormat = getWithPlural(leadFormats, plural, status); if (U_FAILURE(status)) { return; } UnicodeString compoundFormat; - trailCompiled.format(leadFormat, compoundFormat, status); - if (U_FAILURE(status)) { return; } + if (leadFormat.length() == 0) { + compoundFormat = trailFormat; + } else { + trailCompiled.format(leadFormat, compoundFormat, status); + if (U_FAILURE(status)) { return; } + } SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); if (U_FAILURE(status)) { return; } fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural}); @@ -405,29 +1536,53 @@ void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &mic } StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status); micros.modOuter = &fModifiers[pluralForm]; + micros.gender = gender; } const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { return &fModifiers[plural]; } -void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &mixedUnit, - const UNumberUnitWidth &width, const PluralRules *rules, +void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, + const MeasureUnit &mixedUnit, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, const MicroPropsGenerator *parent, - MixedUnitLongNameHandler *fillIn, UErrorCode &status) { + MixedUnitLongNameHandler *fillIn, + UErrorCode &status) { U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED); U_ASSERT(fillIn != nullptr); + if (U_FAILURE(status)) { + return; + } MeasureUnitImpl temp; - const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status); - fillIn->fMixedUnitCount = impl.units.length(); + const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status); + // Defensive, for production code: + if (impl.complexity != UMEASURE_UNIT_MIXED) { + // Should be using the normal LongNameHandler + status = U_UNSUPPORTED_ERROR; + return; + } + + fillIn->fMixedUnitCount = impl.singleUnits.length(); fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]); for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) { // Grab data for each of the components. UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH]; - getMeasureData(loc, impl.units[i]->build(status), width, unitData, status); + // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this + // propagation of unitDisplayCase is correct: + getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData, + status); + // TODO(ICU-21494): if we add support for gender for mixed units, we may + // need maybeCalculateGender() here. } + // TODO(icu-units#120): Make sure ICU doesn't output zero-valued + // high-magnitude fields + // * for mixed units count N, produce N listFormatters, one for each subset + // that might be formatted. UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT; if (width == UNUM_UNIT_WIDTH_NARROW) { listWidth = ULISTFMT_WIDTH_NARROW; @@ -437,12 +1592,14 @@ void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUn } fillIn->fListFormatter.adoptInsteadAndCheckErrorCode( ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status); + // TODO(ICU-21494): grab gender of each unit, calculate the gender + // associated with this list formatter, save it for later. fillIn->rules = rules; fillIn->parent = parent; - // We need a localised NumberFormatter for the integers of the bigger units + // We need a localised NumberFormatter for the numbers of the bigger units // (providing Arabic numerals, for example). - fillIn->fIntegerFormatter = NumberFormatter::withLocale(loc); + fillIn->fNumberFormatter = NumberFormatter::withLocale(loc); } void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, @@ -462,12 +1619,6 @@ const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity & status = U_UNSUPPORTED_ERROR; return µs.helpers.emptyWeakModifier; } - // If we don't have at least one mixedMeasure, the LongNameHandler would be - // sufficient and we shouldn't be running MixedUnitLongNameHandler code: - U_ASSERT(micros.mixedMeasuresCount > 0); - // mixedMeasures does not contain the last value: - U_ASSERT(fMixedUnitCount == micros.mixedMeasuresCount + 1); - U_ASSERT(fListFormatter.isValid()); // Algorithm: // @@ -492,39 +1643,45 @@ const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity & return µs.helpers.emptyWeakModifier; } + StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER; for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) { DecimalQuantity fdec; - fdec.setToLong(micros.mixedMeasures[i]); - if (i > 0 && fdec.isNegative()) { - // If numbers are negative, only the first number needs to have its - // negative sign formatted. - fdec.negate(); - } - StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec); - UnicodeString simpleFormat = - getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status); - SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); + // If numbers are negative, only the first number needs to have its + // negative sign formatted. + int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i]; - UnicodeString num; - auto appendable = UnicodeStringAppendable(num); - fIntegerFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status); - compiledFormatter.format(num, outputMeasuresList[i], status); - // TODO(icu-units#67): fix field positions - } + if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity` + // If quantity is not the first value and quantity is negative + if (micros.indexOfQuantity > 0 && quantity.isNegative()) { + quantity.negate(); + } - // Reiterated: we have at least one mixedMeasure: - U_ASSERT(micros.mixedMeasuresCount > 0); - // Thus if negative, a negative has already been formatted: - if (quantity.isNegative()) { - quantity.negate(); + StandardPlural::Form quantityPlural = + utils::getPluralSafe(micros.rounder, rules, quantity, status); + UnicodeString quantityFormatWithPlural = + getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status); + SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status); + quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status); + } else { + fdec.setToLong(number); + StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec); + UnicodeString simpleFormat = + getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status); + SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); + UnicodeString num; + auto appendable = UnicodeStringAppendable(num); + + fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status); + compiledFormatter.format(num, outputMeasuresList[i], status); + } } - UnicodeString *finalSimpleFormats = &fMixedUnitData[(fMixedUnitCount - 1) * ARRAY_LENGTH]; - StandardPlural::Form finalPlural = utils::getPluralSafe(micros.rounder, rules, quantity, status); - UnicodeString finalSimpleFormat = getWithPlural(finalSimpleFormats, finalPlural, status); - SimpleFormatter finalFormatter(finalSimpleFormat, 0, 1, status); - finalFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[fMixedUnitCount - 1], status); + // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we + // can set micros.gender to the gender associated with the list formatter in + // use below (once we have correct support for that). And then document this + // appropriately? "getMixedUnitModifier" doesn't sound like it would do + // something like this. // Combine list into a "premixed" pattern UnicodeString premixedFormatPattern; @@ -535,26 +1692,27 @@ const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity & return µs.helpers.emptyWeakModifier; } - // TODO(icu-units#67): fix field positions - // Return a SimpleModifier for the "premixed" pattern micros.helpers.mixedUnitModifier = - SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, finalPlural}); + SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural}); return µs.helpers.mixedUnitModifier; } const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form /*plural*/) const { - // TODO(units): investigate this method when investigating where - // LongNameHandler::getModifier() gets used. To be sure it remains + // TODO(icu-units#28): investigate this method when investigating where + // ModifierStore::getModifier() gets used. To be sure it remains // unreachable: UPRV_UNREACHABLE; return nullptr; } -LongNameMultiplexer * -LongNameMultiplexer::forMeasureUnits(const Locale &loc, const MaybeStackVector &units, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, UErrorCode &status) { +LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc, + const MaybeStackVector &units, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + UErrorCode &status) { LocalPointer result(new LongNameMultiplexer(parent), status); if (U_FAILURE(status)) { return nullptr; @@ -566,15 +1724,16 @@ LongNameMultiplexer::forMeasureUnits(const Locale &loc, const MaybeStackVectorfMeasureUnits.adoptInstead(new MeasureUnit[units.length()]); for (int32_t i = 0, length = units.length(); i < length; i++) { - const MeasureUnit& unit = *units[i]; + const MeasureUnit &unit = *units[i]; result->fMeasureUnits[i] = unit; if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) { MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status); - MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, rules, NULL, mlnh, status); + MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, + mlnh, status); result->fHandlers[i] = mlnh; } else { LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status); - LongNameHandler::forMeasureUnit(loc, unit, MeasureUnit(), width, rules, NULL, lnh, status); + LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status); result->fHandlers[i] = lnh; } if (U_FAILURE(status)) { diff --git a/deps/icu-small/source/i18n/number_longnames.h b/deps/icu-small/source/i18n/number_longnames.h index 67f2316a9cd4bd..bca55e010317dc 100644 --- a/deps/icu-small/source/i18n/number_longnames.h +++ b/deps/icu-small/source/i18n/number_longnames.h @@ -16,6 +16,8 @@ U_NAMESPACE_BEGIN namespace number { namespace impl { +// LongNameHandler takes care of formatting currency and measurement unit names, +// as well as populating the gender of measure units. class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UMemory { public: static UnicodeString getUnitDisplayName( @@ -24,6 +26,8 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UNumberUnitWidth width, UErrorCode& status); + // This function does not support inflections or other newer NumberFormatter + // features: it exists to support the older not-recommended MeasureFormat. static UnicodeString getUnitPattern( const Locale& loc, const MeasureUnit& unit, @@ -38,9 +42,6 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public /** * Construct a localized LongNameHandler for the specified MeasureUnit. * - * Compound units can be constructed via `unit` and `perUnit`. Both of these - * must then be built-in units. - * * Mixed units are not supported, use MixedUnitLongNameHandler::forMeasureUnit. * * This function uses a fillIn intead of returning a pointer, because we @@ -48,17 +49,21 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public * didn't create itself). * * @param loc The desired locale. - * @param unit The measure unit to construct a LongNameHandler for. If - * `perUnit` is also defined, `unit` must not be a mixed unit. - * @param perUnit If `unit` is a mixed unit, `perUnit` must be "none". + * @param unitRef The measure unit to construct a LongNameHandler for. * @param width Specifies the desired unit rendering. + * @param unitDisplayCase Specifies the desired grammatical case. If the + * specified case is not found, we fall back to nominative or no-case. * @param rules Does not take ownership. * @param parent Does not take ownership. * @param fillIn Required. */ - static void forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, LongNameHandler *fillIn, + static void forMeasureUnit(const Locale &loc, + const MeasureUnit &unitRef, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + LongNameHandler *fillIn, UErrorCode &status); /** @@ -68,10 +73,6 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public void processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const U_OVERRIDE; - // TODO(units): investigate whether we might run into Mixed Unit trouble - // with this. This override for ModifierStore::getModifier does not support - // mixed units: investigate under which circumstances it gets called (check - // both ImmutablePatternModifier and in NumberRangeFormatterImpl). const Modifier* getModifier(Signum signum, StandardPlural::Form plural) const U_OVERRIDE; private: @@ -81,6 +82,9 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public const PluralRules *rules; // Not owned const MicroPropsGenerator *parent; + // Grammatical gender of the formatted result. Not owned: must point at + // static or global strings. + const char *gender = ""; LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) : rules(rules), parent(parent) { @@ -96,13 +100,25 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public // Allow macrosToMicroGenerator to call the private default constructor. friend class NumberFormatterImpl; - // Fills in LongNameHandler fields for formatting compound units identified - // via `unit` and `perUnit`. Both `unit` and `perUnit` need to be built-in - // units (for which data exists). - static void forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, LongNameHandler *fillIn, - UErrorCode &status); + // Fills in LongNameHandler fields for formatting units identified `unit`. + static void forArbitraryUnit(const Locale &loc, + const MeasureUnit &unit, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + LongNameHandler *fillIn, + UErrorCode &status); + + // Roughly corresponds to patternTimes(...) in the spec: + // https://unicode.org/reports/tr35/tr35-general.html#compound-units + // + // productUnit is an rvalue reference to indicate this function consumes it, + // leaving it in a not-useful / undefined state. + static void processPatternTimes(MeasureUnitImpl &&productUnit, + Locale loc, + const UNumberUnitWidth &width, + const char *caseVariant, + UnicodeString *outArray, + UErrorCode &status); // Sets fModifiers to use the patterns from `simpleFormats`. void simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, UErrorCode &status); @@ -111,7 +127,7 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public // and `trailFormat` appended to each. // // With a leadFormat of "{0}m" and a trailFormat of "{0}/s", it produces a - // pattern of "{0}m/s" by inserting the leadFormat pattern into trailFormat. + // pattern of "{0}m/s" by inserting each leadFormat pattern into trailFormat. void multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, Field field, UErrorCode &status); }; @@ -131,13 +147,19 @@ class MixedUnitLongNameHandler : public MicroPropsGenerator, public ModifierStor * @param mixedUnit The mixed measure unit to construct a * MixedUnitLongNameHandler for. * @param width Specifies the desired unit rendering. + * @param unitDisplayCase Specifies the desired grammatical case. If the + * specified case is not found, we fall back to nominative or no-case. * @param rules Does not take ownership. * @param parent Does not take ownership. * @param fillIn Required. */ - static void forMeasureUnit(const Locale &loc, const MeasureUnit &mixedUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, MixedUnitLongNameHandler *fillIn, + static void forMeasureUnit(const Locale &loc, + const MeasureUnit &mixedUnit, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + MixedUnitLongNameHandler *fillIn, UErrorCode &status); /** @@ -156,21 +178,24 @@ class MixedUnitLongNameHandler : public MicroPropsGenerator, public ModifierStor private: // Not owned const PluralRules *rules; + // Not owned const MicroPropsGenerator *parent; // Total number of units in the MeasureUnit this handler was configured for: // for "foot-and-inch", this will be 2. int32_t fMixedUnitCount = 1; + // Stores unit data for each of the individual units. For each unit, it // stores ARRAY_LENGTH strings, as returned by getMeasureData. (Each unit // with index `i` has ARRAY_LENGTH strings starting at index // `i*ARRAY_LENGTH` in this array.) LocalArray fMixedUnitData; - // A localized NumberFormatter used to format the integer-valued bigger - // units of Mixed Unit measurements. - LocalizedNumberFormatter fIntegerFormatter; - // A localised list formatter for joining mixed units together. + + // Formats the larger units of Mixed Unit measurements. + LocalizedNumberFormatter fNumberFormatter; + + // Joins mixed units together. LocalPointer fListFormatter; MixedUnitLongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) @@ -207,8 +232,11 @@ class LongNameMultiplexer : public MicroPropsGenerator, public UMemory { // `units`. An individual unit might be a mixed unit. static LongNameMultiplexer *forMeasureUnits(const Locale &loc, const MaybeStackVector &units, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, UErrorCode &status); + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + UErrorCode &status); // The output unit must be provided via `micros.outputUnit`, it must match // one of the units provided to the factory function. diff --git a/deps/icu-small/source/i18n/number_microprops.h b/deps/icu-small/source/i18n/number_microprops.h index 058c5923b4567b..a18d5fc470eda1 100644 --- a/deps/icu-small/source/i18n/number_microprops.h +++ b/deps/icu-small/source/i18n/number_microprops.h @@ -36,8 +36,7 @@ class IntMeasures : public MaybeStackArray { * Stack Capacity: most mixed units are expected to consist of two or three * subunits, so one or two integer measures should be enough. */ - IntMeasures() : MaybeStackArray() { - } + IntMeasures() : MaybeStackArray() {} /** * Copy constructor. @@ -84,6 +83,11 @@ struct MicroProps : public MicroPropsGenerator { bool useCurrency; char nsName[9]; + // No ownership: must point at a string which will outlive MicroProps + // instances, e.g. a string with static storage duration, or just a string + // that will never be deallocated or modified. + const char *gender; + // Note: This struct has no direct ownership of the following pointers. const DecimalFormatSymbols* symbols; @@ -122,9 +126,14 @@ struct MicroProps : public MicroPropsGenerator { // play. MeasureUnit outputUnit; - // In the case of mixed units, this is the set of integer-only units - // *preceding* the final unit. + // Contains all the values of each unit in mixed units. For quantity (which is the floating value of + // the smallest unit in the mixed unit), the value stores in `quantity`. + // NOTE: the value of quantity in `mixedMeasures` will be left unset. IntMeasures mixedMeasures; + + // Points to quantity position, -1 if the position is not set yet. + int32_t indexOfQuantity = -1; + // Number of mixedMeasures that have been populated int32_t mixedMeasuresCount = 0; diff --git a/deps/icu-small/source/i18n/number_multiplier.cpp b/deps/icu-small/source/i18n/number_multiplier.cpp index 8f07e548de121b..58e1e441bd28c5 100644 --- a/deps/icu-small/source/i18n/number_multiplier.cpp +++ b/deps/icu-small/source/i18n/number_multiplier.cpp @@ -46,6 +46,7 @@ Scale::Scale(const Scale& other) } Scale& Scale::operator=(const Scale& other) { + if (this == &other) { return *this; } // self-assignment: no-op fMagnitude = other.fMagnitude; if (other.fArbitrary != nullptr) { UErrorCode localStatus = U_ZERO_ERROR; diff --git a/deps/icu-small/source/i18n/number_output.cpp b/deps/icu-small/source/i18n/number_output.cpp index 7129b94e0f86e4..2c2c25eaedb427 100644 --- a/deps/icu-small/source/i18n/number_output.cpp +++ b/deps/icu-small/source/i18n/number_output.cpp @@ -39,6 +39,11 @@ MeasureUnit FormattedNumber::getOutputUnit(UErrorCode& status) const { return fData->outputUnit; } +const char *FormattedNumber::getGender(UErrorCode &status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD("") + return fData->gender; +} + void FormattedNumber::getDecimalQuantity(impl::DecimalQuantity& output, UErrorCode& status) const { UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG) output = fData->quantity; @@ -52,16 +57,6 @@ UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedNumberRange) #define UPRV_NOARG -UnicodeString FormattedNumberRange::getFirstDecimal(UErrorCode& status) const { - UPRV_FORMATTED_VALUE_METHOD_GUARD(ICU_Utility::makeBogusString()) - return fData->quantity1.toScientificString(); -} - -UnicodeString FormattedNumberRange::getSecondDecimal(UErrorCode& status) const { - UPRV_FORMATTED_VALUE_METHOD_GUARD(ICU_Utility::makeBogusString()) - return fData->quantity2.toScientificString(); -} - void FormattedNumberRange::getDecimalNumbers(ByteSink& sink1, ByteSink& sink2, UErrorCode& status) const { UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG) impl::DecNum decnum1; diff --git a/deps/icu-small/source/i18n/number_patternstring.cpp b/deps/icu-small/source/i18n/number_patternstring.cpp index 9d845056069b80..ac9e8b7e8e4d75 100644 --- a/deps/icu-small/source/i18n/number_patternstring.cpp +++ b/deps/icu-small/source/i18n/number_patternstring.cpp @@ -1106,6 +1106,20 @@ PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDi } break; + case UNUM_SIGN_NEGATIVE: + case UNUM_SIGN_ACCOUNTING_NEGATIVE: + switch (signum) { + case SIGNUM_NEG: + return PATTERN_SIGN_TYPE_NEG; + case SIGNUM_NEG_ZERO: + case SIGNUM_POS_ZERO: + case SIGNUM_POS: + return PATTERN_SIGN_TYPE_POS; + default: + break; + } + break; + case UNUM_SIGN_NEVER: return PATTERN_SIGN_TYPE_POS; diff --git a/deps/icu-small/source/i18n/number_rounding.cpp b/deps/icu-small/source/i18n/number_rounding.cpp index a8fd6bc892a8a5..40392ee857e493 100644 --- a/deps/icu-small/source/i18n/number_rounding.cpp +++ b/deps/icu-small/source/i18n/number_rounding.cpp @@ -193,6 +193,12 @@ Precision Precision::minMaxSignificantDigits(int32_t minSignificantDigits, int32 } } +Precision Precision::trailingZeroDisplay(UNumberTrailingZeroDisplay trailingZeroDisplay) const { + Precision result(*this); // copy constructor + result.fTrailingZeroDisplay = trailingZeroDisplay; + return result; +} + IncrementPrecision Precision::increment(double roundingIncrement) { if (roundingIncrement > 0.0) { return constructIncrement(roundingIncrement, 0); @@ -205,10 +211,32 @@ CurrencyPrecision Precision::currency(UCurrencyUsage currencyUsage) { return constructCurrency(currencyUsage); } +Precision FractionPrecision::withSignificantDigits( + int32_t minSignificantDigits, + int32_t maxSignificantDigits, + UNumberRoundingPriority priority) const { + if (fType == RND_ERROR) { return *this; } // no-op in error state + if (minSignificantDigits >= 1 && + maxSignificantDigits >= minSignificantDigits && + maxSignificantDigits <= kMaxIntFracSig) { + return constructFractionSignificant( + *this, + minSignificantDigits, + maxSignificantDigits, + priority); + } else { + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; + } +} + Precision FractionPrecision::withMinDigits(int32_t minSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) { - return constructFractionSignificant(*this, minSignificantDigits, -1); + return constructFractionSignificant( + *this, + 1, + minSignificantDigits, + UNUM_ROUNDING_PRIORITY_RELAXED); } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -217,7 +245,10 @@ Precision FractionPrecision::withMinDigits(int32_t minSignificantDigits) const { Precision FractionPrecision::withMaxDigits(int32_t maxSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) { - return constructFractionSignificant(*this, -1, maxSignificantDigits); + return constructFractionSignificant(*this, + 1, + maxSignificantDigits, + UNUM_ROUNDING_PRIORITY_STRICT); } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -231,11 +262,11 @@ Precision Precision::withCurrency(const CurrencyUnit ¤cy, UErrorCode &stat double increment = ucurr_getRoundingIncrementForUsage(isoCode, fUnion.currencyUsage, &status); int32_t minMaxFrac = ucurr_getDefaultFractionDigitsForUsage( isoCode, fUnion.currencyUsage, &status); - if (increment != 0.0) { - return constructIncrement(increment, minMaxFrac); - } else { - return constructFraction(minMaxFrac, minMaxFrac); - } + Precision retval = (increment != 0.0) + ? static_cast(constructIncrement(increment, minMaxFrac)) + : static_cast(constructFraction(minMaxFrac, minMaxFrac)); + retval.fTrailingZeroDisplay = fTrailingZeroDisplay; + return retval; } // Public method on CurrencyPrecision subclass @@ -280,10 +311,15 @@ Precision Precision::constructSignificant(int32_t minSig, int32_t maxSig) { } Precision -Precision::constructFractionSignificant(const FractionPrecision &base, int32_t minSig, int32_t maxSig) { +Precision::constructFractionSignificant( + const FractionPrecision &base, + int32_t minSig, + int32_t maxSig, + UNumberRoundingPriority priority) { FractionSignificantSettings settings = base.fUnion.fracSig; settings.fMinSig = static_cast(minSig); settings.fMaxSig = static_cast(maxSig); + settings.fPriority = priority; PrecisionUnion union_; union_.fracSig = settings; return {RND_FRACTION_SIGNIFICANT, union_}; @@ -383,6 +419,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const if (fPassThrough) { return; } + int32_t resolvedMinFraction = 0; switch (fPrecision.fType) { case Precision::RND_BOGUS: case Precision::RND_ERROR: @@ -399,8 +436,8 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const getRoundingMagnitudeFraction(fPrecision.fUnion.fracSig.fMaxFrac), fRoundingMode, status); - value.setMinFraction( - uprv_max(0, -getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac))); + resolvedMinFraction = + uprv_max(0, -getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac)); break; case Precision::RND_SIGNIFICANT: @@ -408,8 +445,8 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const getRoundingMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMaxSig), fRoundingMode, status); - value.setMinFraction( - uprv_max(0, -getDisplayMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMinSig))); + resolvedMinFraction = + uprv_max(0, -getDisplayMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMinSig)); // Make sure that digits are displayed on zero. if (value.isZeroish() && fPrecision.fUnion.fracSig.fMinSig > 0) { value.setMinInteger(1); @@ -417,23 +454,21 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const break; case Precision::RND_FRACTION_SIGNIFICANT: { - int32_t displayMag = getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac); - int32_t roundingMag = getRoundingMagnitudeFraction(fPrecision.fUnion.fracSig.fMaxFrac); - if (fPrecision.fUnion.fracSig.fMinSig == -1) { - // Max Sig override - int32_t candidate = getRoundingMagnitudeSignificant( - value, - fPrecision.fUnion.fracSig.fMaxSig); - roundingMag = uprv_max(roundingMag, candidate); + int32_t roundingMag1 = getRoundingMagnitudeFraction(fPrecision.fUnion.fracSig.fMaxFrac); + int32_t roundingMag2 = getRoundingMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMaxSig); + int32_t roundingMag; + if (fPrecision.fUnion.fracSig.fPriority == UNUM_ROUNDING_PRIORITY_RELAXED) { + roundingMag = uprv_min(roundingMag1, roundingMag2); } else { - // Min Sig override - int32_t candidate = getDisplayMagnitudeSignificant( - value, - fPrecision.fUnion.fracSig.fMinSig); - roundingMag = uprv_min(roundingMag, candidate); + roundingMag = uprv_max(roundingMag1, roundingMag2); } value.roundToMagnitude(roundingMag, fRoundingMode, status); - value.setMinFraction(uprv_max(0, -displayMag)); + + int32_t displayMag1 = getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac); + int32_t displayMag2 = getDisplayMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMinSig); + int32_t displayMag = uprv_min(displayMag1, displayMag2); + resolvedMinFraction = uprv_max(0, -displayMag); + break; } @@ -442,7 +477,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const fPrecision.fUnion.increment.fIncrement, fRoundingMode, status); - value.setMinFraction(fPrecision.fUnion.increment.fMinFrac); + resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; break; case Precision::RND_INCREMENT_ONE: @@ -450,7 +485,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const -fPrecision.fUnion.increment.fMaxFrac, fRoundingMode, status); - value.setMinFraction(fPrecision.fUnion.increment.fMinFrac); + resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; break; case Precision::RND_INCREMENT_FIVE: @@ -458,7 +493,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const -fPrecision.fUnion.increment.fMaxFrac, fRoundingMode, status); - value.setMinFraction(fPrecision.fUnion.increment.fMinFrac); + resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; break; case Precision::RND_CURRENCY: @@ -468,10 +503,17 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const default: UPRV_UNREACHABLE; } + + if (fPrecision.fTrailingZeroDisplay == UNUM_TRAILING_ZERO_AUTO || + // PLURAL_OPERAND_T returns fraction digits as an integer + value.getPluralOperand(PLURAL_OPERAND_T) != 0) { + value.setMinFraction(resolvedMinFraction); + } } void RoundingImpl::apply(impl::DecimalQuantity &value, int32_t minInt, UErrorCode /*status*/) { // This method is intended for the one specific purpose of helping print "00.000E0". + // Question: Is it useful to look at trailingZeroDisplay here? U_ASSERT(isSignificantDigits()); U_ASSERT(value.isZeroish()); value.setMinFraction(fPrecision.fUnion.fracSig.fMinSig - minInt); diff --git a/deps/icu-small/source/i18n/number_roundingutils.h b/deps/icu-small/source/i18n/number_roundingutils.h index e85cbae9fdd2ea..06fadd29fd544e 100644 --- a/deps/icu-small/source/i18n/number_roundingutils.h +++ b/deps/icu-small/source/i18n/number_roundingutils.h @@ -104,6 +104,45 @@ getRoundingDirection(bool isEven, bool isNegative, Section section, RoundingMode } break; + case RoundingMode::UNUM_ROUND_HALF_ODD: + switch (section) { + case SECTION_MIDPOINT: + return !isEven; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + + case RoundingMode::UNUM_ROUND_HALF_CEILING: + switch (section) { + case SECTION_MIDPOINT: + return isNegative; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + + case RoundingMode::UNUM_ROUND_HALF_FLOOR: + switch (section) { + case SECTION_MIDPOINT: + return !isNegative; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + default: break; } @@ -204,7 +243,7 @@ class RoundingImpl { * - see blueprint_helpers::parseIncrementOption(). * * Referencing MacroProps means needing to pull in the .o files that have the - * destructors for the SymbolsWrapper, Usage, and Scale classes. + * destructors for the SymbolsWrapper, StringProp, and Scale classes. */ void parseIncrementOption(const StringSegment &segment, Precision &outPrecision, UErrorCode &status); diff --git a/deps/icu-small/source/i18n/number_skeletons.cpp b/deps/icu-small/source/i18n/number_skeletons.cpp index 028525a589db91..97d74303a4358e 100644 --- a/deps/icu-small/source/i18n/number_skeletons.cpp +++ b/deps/icu-small/source/i18n/number_skeletons.cpp @@ -68,6 +68,9 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"rounding-mode-down", STEM_ROUNDING_MODE_DOWN, status); b.add(u"rounding-mode-up", STEM_ROUNDING_MODE_UP, status); b.add(u"rounding-mode-half-even", STEM_ROUNDING_MODE_HALF_EVEN, status); + b.add(u"rounding-mode-half-odd", STEM_ROUNDING_MODE_HALF_ODD, status); + b.add(u"rounding-mode-half-ceiling", STEM_ROUNDING_MODE_HALF_CEILING, status); + b.add(u"rounding-mode-half-floor", STEM_ROUNDING_MODE_HALF_FLOOR, status); b.add(u"rounding-mode-half-down", STEM_ROUNDING_MODE_HALF_DOWN, status); b.add(u"rounding-mode-half-up", STEM_ROUNDING_MODE_HALF_UP, status); b.add(u"rounding-mode-unnecessary", STEM_ROUNDING_MODE_UNNECESSARY, status); @@ -91,6 +94,8 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"sign-accounting-always", STEM_SIGN_ACCOUNTING_ALWAYS, status); b.add(u"sign-except-zero", STEM_SIGN_EXCEPT_ZERO, status); b.add(u"sign-accounting-except-zero", STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, status); + b.add(u"sign-negative", STEM_SIGN_NEGATIVE, status); + b.add(u"sign-accounting-negative", STEM_SIGN_ACCOUNTING_NEGATIVE, status); b.add(u"decimal-auto", STEM_DECIMAL_AUTO, status); b.add(u"decimal-always", STEM_DECIMAL_ALWAYS, status); if (U_FAILURE(status)) { return; } @@ -121,6 +126,8 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"()!", STEM_SIGN_ACCOUNTING_ALWAYS, status); b.add(u"+?", STEM_SIGN_EXCEPT_ZERO, status); b.add(u"()?", STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, status); + b.add(u"+-", STEM_SIGN_NEGATIVE, status); + b.add(u"()-", STEM_SIGN_ACCOUNTING_NEGATIVE, status); if (U_FAILURE(status)) { return; } // Build the CharsTrie @@ -213,6 +220,12 @@ UNumberFormatRoundingMode stem_to_object::roundingMode(skeleton::StemEnum stem) return UNUM_ROUND_UP; case STEM_ROUNDING_MODE_HALF_EVEN: return UNUM_ROUND_HALFEVEN; + case STEM_ROUNDING_MODE_HALF_ODD: + return UNUM_ROUND_HALF_ODD; + case STEM_ROUNDING_MODE_HALF_CEILING: + return UNUM_ROUND_HALF_CEILING; + case STEM_ROUNDING_MODE_HALF_FLOOR: + return UNUM_ROUND_HALF_FLOOR; case STEM_ROUNDING_MODE_HALF_DOWN: return UNUM_ROUND_HALFDOWN; case STEM_ROUNDING_MODE_HALF_UP: @@ -278,6 +291,10 @@ UNumberSignDisplay stem_to_object::signDisplay(skeleton::StemEnum stem) { return UNUM_SIGN_EXCEPT_ZERO; case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO: return UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; + case STEM_SIGN_NEGATIVE: + return UNUM_SIGN_NEGATIVE; + case STEM_SIGN_ACCOUNTING_NEGATIVE: + return UNUM_SIGN_ACCOUNTING_NEGATIVE; default: return UNUM_SIGN_COUNT; // for objects, throw; for enums, return COUNT } @@ -312,6 +329,15 @@ void enum_to_stem_string::roundingMode(UNumberFormatRoundingMode value, UnicodeS case UNUM_ROUND_HALFEVEN: sb.append(u"rounding-mode-half-even", -1); break; + case UNUM_ROUND_HALF_ODD: + sb.append(u"rounding-mode-half-odd", -1); + break; + case UNUM_ROUND_HALF_CEILING: + sb.append(u"rounding-mode-half-ceiling", -1); + break; + case UNUM_ROUND_HALF_FLOOR: + sb.append(u"rounding-mode-half-floor", -1); + break; case UNUM_ROUND_HALFDOWN: sb.append(u"rounding-mode-half-down", -1); break; @@ -399,6 +425,12 @@ void enum_to_stem_string::signDisplay(UNumberSignDisplay value, UnicodeString& s case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: sb.append(u"sign-accounting-except-zero", -1); break; + case UNUM_SIGN_NEGATIVE: + sb.append(u"sign-negative", -1); + break; + case UNUM_SIGN_ACCOUNTING_NEGATIVE: + sb.append(u"sign-accounting-negative", -1); + break; default: UPRV_UNREACHABLE; } @@ -584,7 +616,7 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case u'@': CHECK_NULL(seen, precision, status); blueprint_helpers::parseDigitsStem(segment, macros, status); - return STATE_NULL; + return STATE_PRECISION; case u'E': CHECK_NULL(seen, notation, status); blueprint_helpers::parseScientificStem(segment, macros, status); @@ -650,7 +682,7 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_PRECISION_INTEGER: return STATE_FRACTION_PRECISION; // allows for "precision-integer/@##" default: - return STATE_NULL; + return STATE_PRECISION; } case STEM_ROUNDING_MODE_CEILING: @@ -658,6 +690,9 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_ROUNDING_MODE_DOWN: case STEM_ROUNDING_MODE_UP: case STEM_ROUNDING_MODE_HALF_EVEN: + case STEM_ROUNDING_MODE_HALF_ODD: + case STEM_ROUNDING_MODE_HALF_CEILING: + case STEM_ROUNDING_MODE_HALF_FLOOR: case STEM_ROUNDING_MODE_HALF_DOWN: case STEM_ROUNDING_MODE_HALF_UP: case STEM_ROUNDING_MODE_UNNECESSARY: @@ -697,6 +732,8 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_SIGN_ACCOUNTING_ALWAYS: case STEM_SIGN_EXCEPT_ZERO: case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO: + case STEM_SIGN_NEGATIVE: + case STEM_SIGN_ACCOUNTING_NEGATIVE: CHECK_NULL(seen, sign, status); macros.sign = stem_to_object::signDisplay(stem); return STATE_NULL; @@ -776,7 +813,7 @@ ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, return STATE_NULL; case STATE_INCREMENT_PRECISION: blueprint_helpers::parseIncrementOption(segment, macros, status); - return STATE_NULL; + return STATE_PRECISION; case STATE_INTEGER_WIDTH: blueprint_helpers::parseIntegerWidthOption(segment, macros, status); return STATE_NULL; @@ -816,6 +853,22 @@ ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, switch (stem) { case STATE_FRACTION_PRECISION: if (blueprint_helpers::parseFracSigOption(segment, macros, status)) { + return STATE_PRECISION; + } + if (U_FAILURE(status)) { + return {}; + } + // If the fracSig option was not found, try normal precision options. + stem = STATE_PRECISION; + break; + default: + break; + } + + // Trailing zeros option + switch (stem) { + case STATE_PRECISION: + if (blueprint_helpers::parseTrailingZeroOption(segment, macros, status)) { return STATE_NULL; } if (U_FAILURE(status)) { @@ -890,6 +943,10 @@ void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString& status = U_UNSUPPORTED_ERROR; return; } + if (macros.unitDisplayCase.isSet()) { + status = U_UNSUPPORTED_ERROR; + return; + } if (macros.affixProvider != nullptr) { status = U_UNSUPPORTED_ERROR; return; @@ -1041,37 +1098,12 @@ void blueprint_helpers::parseIdentifierUnitOption(const StringSegment& segment, SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); ErrorCode internalStatus; - auto fullUnit = MeasureUnitImpl::forIdentifier(buffer.toStringPiece(), internalStatus); + macros.unit = MeasureUnit::forIdentifier(buffer.toStringPiece(), internalStatus); if (internalStatus.isFailure()) { // throw new SkeletonSyntaxException("Invalid core unit identifier", segment, e); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } - - // Mixed units can only be represented by full MeasureUnit instances, so we - // don't split the denominator into macros.perUnit. - if (fullUnit.complexity == UMEASURE_UNIT_MIXED) { - macros.unit = std::move(fullUnit).build(status); - return; - } - - // When we have a built-in unit (e.g. meter-per-second), we don't split it up - MeasureUnit testBuiltin = fullUnit.copy(status).build(status); - if (uprv_strcmp(testBuiltin.getType(), "") != 0) { - macros.unit = std::move(testBuiltin); - return; - } - - // TODO(ICU-20941): Clean this up. - for (int32_t i = 0; i < fullUnit.units.length(); i++) { - SingleUnitImpl* subUnit = fullUnit.units[i]; - if (subUnit->dimensionality > 0) { - macros.unit = macros.unit.product(subUnit->build(status), status); - } else { - subUnit->dimensionality *= -1; - macros.perUnit = macros.perUnit.product(subUnit->build(status), status); - } - } } void blueprint_helpers::parseUnitUsageOption(const StringSegment &segment, MacroProps ¯os, @@ -1226,6 +1258,7 @@ void blueprint_helpers::parseScientificStem(const StringSegment& segment, MacroP } else if (segment.charAt(offset) == u'?') { signDisplay = UNUM_SIGN_EXCEPT_ZERO; } else { + // NOTE: Other sign displays are not included because they aren't useful in this context goto fail; } offset++; @@ -1284,21 +1317,14 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr break; } } - // For the frac-sig option, there must be minSig or maxSig but not both. - // Valid: @+, @@+, @@@+ - // Valid: @#, @##, @### - // Invalid: @, @@, @@@ - // Invalid: @@#, @@##, @@@# if (offset < segment.length()) { if (isWildcardChar(segment.charAt(offset))) { + // @+, @@+, @@@+ maxSig = -1; offset++; - } else if (minSig > 1) { - // @@#, @@##, @@@# - // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); - status = U_NUMBER_SKELETON_SYNTAX_ERROR; - return false; } else { + // @#, @##, @### + // @@#, @@##, @@@# maxSig = minSig; for (; offset < segment.length(); offset++) { if (segment.charAt(offset) == u'#') { @@ -1310,25 +1336,56 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr } } else { // @, @@, @@@ - // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); - status = U_NUMBER_SKELETON_SYNTAX_ERROR; - return false; + maxSig = minSig; } + UNumberRoundingPriority priority; if (offset < segment.length()) { - // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); + if (maxSig == -1) { + // The wildcard character is not allowed with the priority annotation + status = U_NUMBER_SKELETON_SYNTAX_ERROR; + return false; + } + if (segment.codePointAt(offset) == u'r') { + priority = UNUM_ROUNDING_PRIORITY_RELAXED; + offset++; + } else if (segment.codePointAt(offset) == u's') { + priority = UNUM_ROUNDING_PRIORITY_STRICT; + offset++; + } else { + U_ASSERT(offset < segment.length()); + } + if (offset < segment.length()) { + // Invalid digits option for fraction rounder + status = U_NUMBER_SKELETON_SYNTAX_ERROR; + return false; + } + } else if (maxSig == -1) { + // withMinDigits + maxSig = minSig; + minSig = 1; + priority = UNUM_ROUNDING_PRIORITY_RELAXED; + } else if (minSig == 1) { + // withMaxDigits + priority = UNUM_ROUNDING_PRIORITY_STRICT; + } else { + // Digits options with both min and max sig require the priority option status = U_NUMBER_SKELETON_SYNTAX_ERROR; return false; } auto& oldPrecision = static_cast(macros.precision); - if (maxSig == -1) { - macros.precision = oldPrecision.withMinDigits(minSig); - } else { - macros.precision = oldPrecision.withMaxDigits(maxSig); - } + macros.precision = oldPrecision.withSignificantDigits(minSig, maxSig, priority); return true; } +bool blueprint_helpers::parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode&) { + if (segment == u"w") { + macros.precision = macros.precision.trailingZeroDisplay(UNUM_TRAILING_ZERO_HIDE_IF_WHOLE); + return true; + } + return false; +} + void blueprint_helpers::parseIncrementOption(const StringSegment &segment, MacroProps ¯os, UErrorCode &status) { number::impl::parseIncrementOption(segment, macros.precision, status); @@ -1537,7 +1594,7 @@ bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorC bool GeneratorHelpers::usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& /* status */) { if (macros.usage.isSet()) { sb.append(u"usage/", -1); - sb.append(UnicodeString(macros.usage.fUsage, -1, US_INV)); + sb.append(UnicodeString(macros.usage.fValue, -1, US_INV)); return true; } return false; @@ -1556,10 +1613,11 @@ bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UE const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig; blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status); sb.append(u'/'); - if (impl.fMinSig == -1) { - blueprint_helpers::generateDigitsStem(1, impl.fMaxSig, sb, status); + blueprint_helpers::generateDigitsStem(impl.fMinSig, impl.fMaxSig, sb, status); + if (impl.fPriority == UNUM_ROUNDING_PRIORITY_RELAXED) { + sb.append(u'r'); } else { - blueprint_helpers::generateDigitsStem(impl.fMinSig, -1, sb, status); + sb.append(u's'); } } else if (macros.precision.fType == Precision::RND_INCREMENT || macros.precision.fType == Precision::RND_INCREMENT_ONE @@ -1583,6 +1641,10 @@ bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UE return false; } + if (macros.precision.fTrailingZeroDisplay == UNUM_TRAILING_ZERO_HIDE_IF_WHOLE) { + sb.append(u"/w", -1); + } + // NOTE: Always return true for rounding because the default value depends on other options. return true; } diff --git a/deps/icu-small/source/i18n/number_skeletons.h b/deps/icu-small/source/i18n/number_skeletons.h index 201267e635cd6a..af636504283059 100644 --- a/deps/icu-small/source/i18n/number_skeletons.h +++ b/deps/icu-small/source/i18n/number_skeletons.h @@ -42,6 +42,7 @@ enum ParseState { STATE_SCIENTIFIC, STATE_FRACTION_PRECISION, + STATE_PRECISION, // Section 2: An option is required: @@ -85,6 +86,9 @@ enum StemEnum { STEM_ROUNDING_MODE_DOWN, STEM_ROUNDING_MODE_UP, STEM_ROUNDING_MODE_HALF_EVEN, + STEM_ROUNDING_MODE_HALF_ODD, + STEM_ROUNDING_MODE_HALF_CEILING, + STEM_ROUNDING_MODE_HALF_FLOOR, STEM_ROUNDING_MODE_HALF_DOWN, STEM_ROUNDING_MODE_HALF_UP, STEM_ROUNDING_MODE_UNNECESSARY, @@ -108,6 +112,8 @@ enum StemEnum { STEM_SIGN_ACCOUNTING_ALWAYS, STEM_SIGN_EXCEPT_ZERO, STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, + STEM_SIGN_NEGATIVE, + STEM_SIGN_ACCOUNTING_NEGATIVE, STEM_DECIMAL_AUTO, STEM_DECIMAL_ALWAYS, @@ -273,6 +279,9 @@ void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCo /** @return Whether we successfully found and parsed a frac-sig option. */ bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); +/** @return Whether we successfully found and parsed a trailing zero option. */ +bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); + void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void diff --git a/deps/icu-small/source/i18n/number_usageprefs.cpp b/deps/icu-small/source/i18n/number_usageprefs.cpp index 0d9cb06c50a2ab..ed426da086e327 100644 --- a/deps/icu-small/source/i18n/number_usageprefs.cpp +++ b/deps/icu-small/source/i18n/number_usageprefs.cpp @@ -28,116 +28,124 @@ using icu::StringSegment; using icu::units::ConversionRates; // Copy constructor -Usage::Usage(const Usage &other) : Usage() { +StringProp::StringProp(const StringProp &other) : StringProp() { this->operator=(other); } // Copy assignment operator -Usage &Usage::operator=(const Usage &other) { +StringProp &StringProp::operator=(const StringProp &other) { + if (this == &other) { return *this; } // self-assignment: no-op fLength = 0; fError = other.fError; - if (fUsage != nullptr) { - uprv_free(fUsage); - fUsage = nullptr; + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; } - if (other.fUsage == nullptr) { + if (other.fValue == nullptr) { return *this; } if (U_FAILURE(other.fError)) { // We don't bother trying to allocating memory if we're in any case busy - // copying an errored Usage. + // copying an errored StringProp. return *this; } - fUsage = (char *)uprv_malloc(other.fLength + 1); - if (fUsage == nullptr) { + fValue = (char *)uprv_malloc(other.fLength + 1); + if (fValue == nullptr) { fError = U_MEMORY_ALLOCATION_ERROR; return *this; } fLength = other.fLength; - uprv_strncpy(fUsage, other.fUsage, fLength + 1); + uprv_strncpy(fValue, other.fValue, fLength + 1); return *this; } // Move constructor -Usage::Usage(Usage &&src) U_NOEXCEPT : fUsage(src.fUsage), fLength(src.fLength), fError(src.fError) { +StringProp::StringProp(StringProp &&src) U_NOEXCEPT : fValue(src.fValue), + fLength(src.fLength), + fError(src.fError) { // Take ownership away from src if necessary - src.fUsage = nullptr; + src.fValue = nullptr; } // Move assignment operator -Usage &Usage::operator=(Usage &&src) U_NOEXCEPT { +StringProp &StringProp::operator=(StringProp &&src) U_NOEXCEPT { if (this == &src) { return *this; } - if (fUsage != nullptr) { - uprv_free(fUsage); + if (fValue != nullptr) { + uprv_free(fValue); } - fUsage = src.fUsage; + fValue = src.fValue; fLength = src.fLength; fError = src.fError; // Take ownership away from src if necessary - src.fUsage = nullptr; + src.fValue = nullptr; return *this; } -Usage::~Usage() { - if (fUsage != nullptr) { - uprv_free(fUsage); - fUsage = nullptr; +StringProp::~StringProp() { + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; } } -void Usage::set(StringPiece value) { - if (fUsage != nullptr) { - uprv_free(fUsage); - fUsage = nullptr; +void StringProp::set(StringPiece value) { + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; } fLength = value.length(); - fUsage = (char *)uprv_malloc(fLength + 1); - if (fUsage == nullptr) { + fValue = (char *)uprv_malloc(fLength + 1); + if (fValue == nullptr) { fLength = 0; fError = U_MEMORY_ALLOCATION_ERROR; return; } - uprv_strncpy(fUsage, value.data(), fLength); - fUsage[fLength] = 0; + uprv_strncpy(fValue, value.data(), fLength); + fValue[fLength] = 0; } // Populates micros.mixedMeasures and modifies quantity, based on the values in // measures. void mixedMeasuresToMicros(const MaybeStackVector &measures, DecimalQuantity *quantity, MicroProps *micros, UErrorCode status) { - micros->mixedMeasuresCount = measures.length() - 1; - if (micros->mixedMeasuresCount > 0) { -#ifdef U_DEBUG - U_ASSERT(micros->outputUnit.getComplexity(status) == UMEASURE_UNIT_MIXED); - U_ASSERT(U_SUCCESS(status)); - // Check that we received measurements with the expected MeasureUnits: - MeasureUnitImpl temp; - const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(micros->outputUnit, temp, status); - U_ASSERT(U_SUCCESS(status)); - U_ASSERT(measures.length() == impl.units.length()); - for (int32_t i = 0; i < measures.length(); i++) { - U_ASSERT(measures[i]->getUnit() == impl.units[i]->build(status)); - } - (void)impl; -#endif - // Mixed units: except for the last value, we pass all values to the - // LongNameHandler via micros->mixedMeasures. - if (micros->mixedMeasures.getCapacity() < micros->mixedMeasuresCount) { - if (micros->mixedMeasures.resize(micros->mixedMeasuresCount) == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } + micros->mixedMeasuresCount = measures.length(); + + if (micros->mixedMeasures.getCapacity() < micros->mixedMeasuresCount) { + if (micros->mixedMeasures.resize(micros->mixedMeasuresCount) == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; } - for (int32_t i = 0; i < micros->mixedMeasuresCount; i++) { + } + + for (int32_t i = 0; i < micros->mixedMeasuresCount; i++) { + switch (measures[i]->getNumber().getType()) { + case Formattable::kInt64: micros->mixedMeasures[i] = measures[i]->getNumber().getInt64(); + break; + + case Formattable::kDouble: + U_ASSERT(micros->indexOfQuantity < 0); + quantity->setToDouble(measures[i]->getNumber().getDouble()); + micros->indexOfQuantity = i; + break; + + default: + U_ASSERT(0 == "Found a Measure Number which is neither a double nor an int"); + UPRV_UNREACHABLE; + break; } - } else { - micros->mixedMeasuresCount = 0; + + if (U_FAILURE(status)) { + return; + } + } + + if (micros->indexOfQuantity < 0) { + // There is no quantity. + status = U_INTERNAL_PROGRAM_ERROR; } - // The last value (potentially the only value) gets passed on via quantity. - quantity->setToDouble(measures[measures.length() - 1]->getNumber().getDouble()); } UsagePrefsHandler::UsagePrefsHandler(const Locale &locale, @@ -170,22 +178,20 @@ void UsagePrefsHandler::processQuantity(DecimalQuantity &quantity, MicroProps &m mixedMeasuresToMicros(routedMeasures, &quantity, µs, status); } -UnitConversionHandler::UnitConversionHandler(const MeasureUnit &inputUnit, const MeasureUnit &outputUnit, +UnitConversionHandler::UnitConversionHandler(const MeasureUnit &targetUnit, const MicroPropsGenerator *parent, UErrorCode &status) - : fOutputUnit(outputUnit), fParent(parent) { + : fOutputUnit(targetUnit), fParent(parent) { MeasureUnitImpl tempInput, tempOutput; - const MeasureUnitImpl &inputUnitImpl = MeasureUnitImpl::forMeasureUnit(inputUnit, tempInput, status); - const MeasureUnitImpl &outputUnitImpl = - MeasureUnitImpl::forMeasureUnit(outputUnit, tempOutput, status); - // TODO: this should become an initOnce thing? Review with other - // ConversionRates usages. ConversionRates conversionRates(status); if (U_FAILURE(status)) { return; } + + const MeasureUnitImpl &targetUnitImpl = + MeasureUnitImpl::forMeasureUnit(targetUnit, tempOutput, status); fUnitConverter.adoptInsteadAndCheckErrorCode( - new ComplexUnitsConverter(inputUnitImpl, outputUnitImpl, conversionRates, status), status); + new ComplexUnitsConverter(targetUnitImpl, conversionRates, status), status); } void UnitConversionHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, diff --git a/deps/icu-small/source/i18n/number_usageprefs.h b/deps/icu-small/source/i18n/number_usageprefs.h index 9e8bd936bd7248..70547225a00761 100644 --- a/deps/icu-small/source/i18n/number_usageprefs.h +++ b/deps/icu-small/source/i18n/number_usageprefs.h @@ -97,14 +97,15 @@ class U_I18N_API UnitConversionHandler : public MicroPropsGenerator, public UMem /** * Constructor. * - * @param inputUnit Specifies the input MeasureUnit. Mixed units are not - * supported as input (because input is just a single decimal quantity). - * @param outputUnit Specifies the output MeasureUnit. + * @param targetUnit Specifies the output MeasureUnit. The input MeasureUnit + * is derived from it: in case of a mixed unit, the biggest unit is + * taken as the input unit. If not a mixed unit, the input unit will be + * the same as the output unit and no unit conversion takes place. * @param parent The parent MicroPropsGenerator. * @param status Receives status. */ - UnitConversionHandler(const MeasureUnit &inputUnit, const MeasureUnit &outputUnit, - const MicroPropsGenerator *parent, UErrorCode &status); + UnitConversionHandler(const MeasureUnit &targetUnit, const MicroPropsGenerator *parent, + UErrorCode &status); /** * Obtains the appropriate output values from the Unit Converter. diff --git a/deps/icu-small/source/i18n/number_utypes.h b/deps/icu-small/source/i18n/number_utypes.h index d97eadc5cdb96e..50c861787f4ed9 100644 --- a/deps/icu-small/source/i18n/number_utypes.h +++ b/deps/icu-small/source/i18n/number_utypes.h @@ -42,6 +42,9 @@ class U_I18N_API UFormattedNumberData : public FormattedValueStringBuilderImpl { // TODO(units,hugovdm): populate this correctly for the general case - it's // currently only implemented for the .usage() use case. MeasureUnit outputUnit; + + // The gender of the formatted output. + const char *gender = ""; }; diff --git a/deps/icu-small/source/i18n/numfmt.cpp b/deps/icu-small/source/i18n/numfmt.cpp index bf78179bcddefe..bffefa5e3998b8 100644 --- a/deps/icu-small/source/i18n/numfmt.cpp +++ b/deps/icu-small/source/i18n/numfmt.cpp @@ -13,7 +13,7 @@ * Date Name Description * 02/19/97 aliu Converted from java. * 03/18/97 clhuang Implemented with C++ APIs. -* 04/17/97 aliu Enlarged MAX_INTEGER_DIGITS to fully accomodate the +* 04/17/97 aliu Enlarged MAX_INTEGER_DIGITS to fully accommodate the * largest double, by default. * Changed DigitCount to int per code review. * 07/20/98 stephen Changed operator== to check for grouping diff --git a/deps/icu-small/source/i18n/numrange_fluent.cpp b/deps/icu-small/source/i18n/numrange_fluent.cpp index d9286d1d713d2c..f1060b3c21d45e 100644 --- a/deps/icu-small/source/i18n/numrange_fluent.cpp +++ b/deps/icu-small/source/i18n/numrange_fluent.cpp @@ -245,6 +245,7 @@ LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(NFS&& src) U_N } LocalizedNumberRangeFormatter& LocalizedNumberRangeFormatter::operator=(const LNF& other) { + if (this == &other) { return *this; } // self-assignment: no-op NFS::operator=(static_cast&>(other)); // Do not steal; just clear delete fAtomicFormatter.exchange(nullptr); diff --git a/deps/icu-small/source/i18n/numrange_impl.cpp b/deps/icu-small/source/i18n/numrange_impl.cpp index 3aae5c23548349..aa713f1398b502 100644 --- a/deps/icu-small/source/i18n/numrange_impl.cpp +++ b/deps/icu-small/source/i18n/numrange_impl.cpp @@ -305,7 +305,7 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, // INNER MODIFIER collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner); - // All done checking for collapsability. + // All done checking for collapsibility. break; } @@ -328,6 +328,7 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, #define UPRV_INDEX_1 (lengthPrefix + length1) #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix) #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2) + #define UPRV_INDEX_4 (lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix) int32_t lengthRange = SimpleModifier::formatTwoArgPattern( fRangeFormatter, @@ -367,31 +368,38 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, // TODO: Support padding? if (collapseInner) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } if (collapseMiddle) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } if (collapseOuter) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } + + // Now that all pieces are added, save the span info. + data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, UPRV_INDEX_0, length1, status); + data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, UPRV_INDEX_2, length2, status); } diff --git a/deps/icu-small/source/i18n/olsontz.cpp b/deps/icu-small/source/i18n/olsontz.cpp index cb142f7b9e080f..67aa1f7af81d68 100644 --- a/deps/icu-small/source/i18n/olsontz.cpp +++ b/deps/icu-small/source/i18n/olsontz.cpp @@ -274,6 +274,7 @@ OlsonTimeZone::OlsonTimeZone(const OlsonTimeZone& other) : * Assignment operator */ OlsonTimeZone& OlsonTimeZone::operator=(const OlsonTimeZone& other) { + if (this == &other) { return *this; } // self-assignment: no-op canonicalID = other.canonicalID; transitionTimesPre32 = other.transitionTimesPre32; @@ -399,9 +400,9 @@ void OlsonTimeZone::getOffset(UDate date, UBool local, int32_t& rawoff, } } -void -OlsonTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, - int32_t& rawoff, int32_t& dstoff, UErrorCode& ec) const { +void OlsonTimeZone::getOffsetFromLocal(UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawoff, int32_t& dstoff, UErrorCode& ec) const { if (U_FAILURE(ec)) { return; } @@ -812,7 +813,7 @@ OlsonTimeZone::initTransitionRules(UErrorCode& status) { if (finalZone->useDaylightTime()) { /* * Note: When an OlsonTimeZone is constructed, we should set the final year - * as the start year of finalZone. However, the bounday condition used for + * as the start year of finalZone. However, the boundary condition used for * getting offset from finalZone has some problems. * For now, we do not set the valid start year when the construction time * and create a clone and set the start year when extracting rules. diff --git a/deps/icu-small/source/i18n/olsontz.h b/deps/icu-small/source/i18n/olsontz.h index 643a695369ef48..6bedb8792b0cd4 100644 --- a/deps/icu-small/source/i18n/olsontz.h +++ b/deps/icu-small/source/i18n/olsontz.h @@ -187,8 +187,10 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { /** * BasicTimeZone API. */ - virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, - int32_t& rawoff, int32_t& dstoff, UErrorCode& ec) const; + virtual void getOffsetFromLocal( + UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const; /** * TimeZone API. This method has no effect since objects of this diff --git a/deps/icu-small/source/i18n/plurrule.cpp b/deps/icu-small/source/i18n/plurrule.cpp index 884db7c4f59311..bc106acce23aef 100644 --- a/deps/icu-small/source/i18n/plurrule.cpp +++ b/deps/icu-small/source/i18n/plurrule.cpp @@ -60,6 +60,7 @@ static const UChar PK_VAR_I[]={LOW_I,0}; static const UChar PK_VAR_F[]={LOW_F,0}; static const UChar PK_VAR_T[]={LOW_T,0}; static const UChar PK_VAR_E[]={LOW_E,0}; +static const UChar PK_VAR_C[]={LOW_C,0}; static const UChar PK_VAR_V[]={LOW_V,0}; static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0}; @@ -421,7 +422,6 @@ getSamplesFromString(const UnicodeString &samples, double *destDbl, destFd[sampleCount++] = fixed; } } else { - FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status); FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status); double rangeLo = fixedLo.source; @@ -514,6 +514,7 @@ PluralRules::getSamples(const UnicodeString &keyword, FixedDecimal *dest, if (rc == nullptr) { return 0; } + int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, nullptr, dest, destCapacity, status); if (numSamples == 0) { numSamples = getSamplesFromString(rc->fDecimalSamples, nullptr, dest, destCapacity, status); @@ -706,6 +707,7 @@ PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErr case tVariableF: case tVariableT: case tVariableE: + case tVariableC: case tVariableV: U_ASSERT(curAndConstraint != nullptr); curAndConstraint->digitsType = type; @@ -1092,6 +1094,8 @@ static UnicodeString tokenString(tokenType tok) { s.append(LOW_T); break; case tVariableE: s.append(LOW_E); break; + case tVariableC: + s.append(LOW_C); break; default: s.append(TILDE); } @@ -1269,6 +1273,7 @@ PluralRuleParser::checkSyntax(UErrorCode &status) case tVariableF: case tVariableT: case tVariableE: + case tVariableC: case tVariableV: if (type != tIs && type != tMod && type != tIn && type != tNot && type != tWithin && type != tEqual && type != tNotEqual) { @@ -1286,6 +1291,7 @@ PluralRuleParser::checkSyntax(UErrorCode &status) type == tVariableF || type == tVariableT || type == tVariableE || + type == tVariableC || type == tVariableV || type == tAt)) { status = U_UNEXPECTED_TOKEN; @@ -1318,6 +1324,7 @@ PluralRuleParser::checkSyntax(UErrorCode &status) type != tVariableF && type != tVariableT && type != tVariableE && + type != tVariableC && type != tVariableV) { status = U_UNEXPECTED_TOKEN; } @@ -1497,6 +1504,8 @@ PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType) keyType = tVariableT; } else if (0 == token.compare(PK_VAR_E, 1)) { keyType = tVariableE; + } else if (0 == token.compare(PK_VAR_C, 1)) { + keyType = tVariableC; } else if (0 == token.compare(PK_VAR_V, 1)) { keyType = tVariableV; } else if (0 == token.compare(PK_IS, 2)) { @@ -1596,11 +1605,17 @@ PluralOperand tokenTypeToPluralOperand(tokenType tt) { return PLURAL_OPERAND_T; case tVariableE: return PLURAL_OPERAND_E; + case tVariableC: + return PLURAL_OPERAND_E; default: UPRV_UNREACHABLE; // unexpected. } } +FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f, int32_t e, int32_t c) { + init(n, v, f, e, c); +} + FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f, int32_t e) { init(n, v, f, e); // check values. TODO make into unit test. @@ -1642,16 +1657,30 @@ FixedDecimal::FixedDecimal() { FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) { CharString cs; int32_t parsedExponent = 0; + int32_t parsedCompactExponent = 0; int32_t exponentIdx = num.indexOf(u'e'); if (exponentIdx < 0) { exponentIdx = num.indexOf(u'E'); } + int32_t compactExponentIdx = num.indexOf(u'c'); + if (compactExponentIdx < 0) { + compactExponentIdx = num.indexOf(u'C'); + } + if (exponentIdx >= 0) { cs.appendInvariantChars(num.tempSubString(0, exponentIdx), status); int32_t expSubstrStart = exponentIdx + 1; parsedExponent = ICU_Utility::parseAsciiInteger(num, expSubstrStart); } + else if (compactExponentIdx >= 0) { + cs.appendInvariantChars(num.tempSubString(0, compactExponentIdx), status); + int32_t expSubstrStart = compactExponentIdx + 1; + parsedCompactExponent = ICU_Utility::parseAsciiInteger(num, expSubstrStart); + + parsedExponent = parsedCompactExponent; + exponentIdx = compactExponentIdx; + } else { cs.appendInvariantChars(num, status); } @@ -1706,13 +1735,20 @@ void FixedDecimal::init(double n, int32_t v, int64_t f) { init(n, v, f, exponent); } - void FixedDecimal::init(double n, int32_t v, int64_t f, int32_t e) { + // Currently, `c` is an alias for `e` + init(n, v, f, e, e); +} + +void FixedDecimal::init(double n, int32_t v, int64_t f, int32_t e, int32_t c) { isNegative = n < 0.0; source = fabs(n); _isNaN = uprv_isNaN(source); _isInfinite = uprv_isInfinite(source); exponent = e; + if (exponent == 0) { + exponent = c; + } if (_isNaN || _isInfinite) { v = 0; f = 0; @@ -1843,6 +1879,7 @@ double FixedDecimal::getPluralOperand(PluralOperand operand) const { case PLURAL_OPERAND_T: return static_cast(decimalDigitsWithoutTrailingZeros); case PLURAL_OPERAND_V: return visibleDecimalDigitCount; case PLURAL_OPERAND_E: return exponent; + case PLURAL_OPERAND_C: return exponent; default: UPRV_UNREACHABLE; // unexpected. } @@ -1876,12 +1913,12 @@ bool FixedDecimal::operator==(const FixedDecimal &other) const { UnicodeString FixedDecimal::toString() const { char pattern[15]; char buffer[20]; - if (exponent == 0) { - snprintf(pattern, sizeof(pattern), "%%.%df", visibleDecimalDigitCount); - snprintf(buffer, sizeof(buffer), pattern, source); - } else { + if (exponent != 0) { snprintf(pattern, sizeof(pattern), "%%.%dfe%%d", visibleDecimalDigitCount); snprintf(buffer, sizeof(buffer), pattern, source, exponent); + } else { + snprintf(pattern, sizeof(pattern), "%%.%df", visibleDecimalDigitCount); + snprintf(buffer, sizeof(buffer), pattern, source); } return UnicodeString(buffer, -1, US_INV); } diff --git a/deps/icu-small/source/i18n/plurrule_impl.h b/deps/icu-small/source/i18n/plurrule_impl.h index 8849e67e571da6..69d44f83d4a0fa 100644 --- a/deps/icu-small/source/i18n/plurrule_impl.h +++ b/deps/icu-small/source/i18n/plurrule_impl.h @@ -145,6 +145,7 @@ enum tokenType { tVariableV, tVariableT, tVariableE, + tVariableC, tDecimal, tInteger, tEOF @@ -222,11 +223,20 @@ enum PluralOperand { PLURAL_OPERAND_W, /** - * Suppressed exponent for compact notation (exponent needed in - * scientific notation with compact notation to approximate i). + * Suppressed exponent for scientific notation (exponent needed in + * scientific notation to approximate i). */ PLURAL_OPERAND_E, + /** + * This operand is currently treated as an alias for `PLURAL_OPERAND_E`. + * In the future, it will represent: + * + * Suppressed exponent for compact notation (exponent needed in + * compact notation to approximate i). + */ + PLURAL_OPERAND_C, + /** * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. * @@ -280,8 +290,10 @@ class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { * @param n the number, e.g. 12.345 * @param v The number of visible fraction digits, e.g. 3 * @param f The fraction digits, e.g. 345 - * @param e The exponent, e.g. 7 in 1.2e7 (for compact/scientific) + * @param e The exponent, e.g. 7 in 1.2e7, for scientific notation + * @param c Currently: an alias for param `e`. */ + FixedDecimal(double n, int32_t v, int64_t f, int32_t e, int32_t c); FixedDecimal(double n, int32_t v, int64_t f, int32_t e); FixedDecimal(double n, int32_t v, int64_t f); FixedDecimal(double n, int32_t); @@ -302,6 +314,7 @@ class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { int32_t getVisibleFractionDigitCount() const; + void init(double n, int32_t v, int64_t f, int32_t e, int32_t c); void init(double n, int32_t v, int64_t f, int32_t e); void init(double n, int32_t v, int64_t f); void init(double n); diff --git a/deps/icu-small/source/i18n/rbt.cpp b/deps/icu-small/source/i18n/rbt.cpp index 02d0ce6ceb20f8..65199478449014 100644 --- a/deps/icu-small/source/i18n/rbt.cpp +++ b/deps/icu-small/source/i18n/rbt.cpp @@ -101,7 +101,7 @@ RuleBasedTransliterator::RuleBasedTransliterator( }*/ /** - * Covenience constructor with no filter. + * Convenience constructor with no filter. */ /*RuleBasedTransliterator::RuleBasedTransliterator( const UnicodeString& id, @@ -114,7 +114,7 @@ RuleBasedTransliterator::RuleBasedTransliterator( }*/ /** - * Covenience constructor with no filter and FORWARD direction. + * Convenience constructor with no filter and FORWARD direction. */ /*RuleBasedTransliterator::RuleBasedTransliterator( const UnicodeString& id, @@ -126,7 +126,7 @@ RuleBasedTransliterator::RuleBasedTransliterator( }*/ /** - * Covenience constructor with FORWARD direction. + * Convenience constructor with FORWARD direction. */ /*RuleBasedTransliterator::RuleBasedTransliterator( const UnicodeString& id, diff --git a/deps/icu-small/source/i18n/rbt.h b/deps/icu-small/source/i18n/rbt.h index 6c34824181fec3..4d9991c48f8b72 100644 --- a/deps/icu-small/source/i18n/rbt.h +++ b/deps/icu-small/source/i18n/rbt.h @@ -80,7 +80,7 @@ class RuleBasedTransliterator : public Transliterator { UErrorCode& status);*/ /** - * Covenience constructor with no filter. + * Convenience constructor with no filter. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ /*RuleBasedTransliterator(const UnicodeString& id, @@ -89,7 +89,7 @@ class RuleBasedTransliterator : public Transliterator { UErrorCode& status);*/ /** - * Covenience constructor with no filter and FORWARD direction. + * Convenience constructor with no filter and FORWARD direction. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ /*RuleBasedTransliterator(const UnicodeString& id, @@ -97,7 +97,7 @@ class RuleBasedTransliterator : public Transliterator { UErrorCode& status);*/ /** - * Covenience constructor with FORWARD direction. + * Convenience constructor with FORWARD direction. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ /*RuleBasedTransliterator(const UnicodeString& id, @@ -108,7 +108,7 @@ class RuleBasedTransliterator : public Transliterator { friend class TransliteratorRegistry; // to access TransliterationRuleData convenience ctor /** - * Covenience constructor. + * Convenience constructor. * @param id the id for the transliterator. * @param theData the rule data for the transliterator. * @param adoptedFilter the filter for the transliterator diff --git a/deps/icu-small/source/i18n/rbt_pars.cpp b/deps/icu-small/source/i18n/rbt_pars.cpp index 802d65d0ac9bbe..69465aecb9b142 100644 --- a/deps/icu-small/source/i18n/rbt_pars.cpp +++ b/deps/icu-small/source/i18n/rbt_pars.cpp @@ -945,7 +945,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, if (c == RULE_COMMENT_CHAR) { pos = rule.indexOf((UChar)0x000A /*\n*/, pos) + 1; if (pos == 0) { - break; // No "\n" found; rest of rule is a commnet + break; // No "\n" found; rest of rule is a comment } continue; // Either fall out or restart with next line } @@ -1159,7 +1159,7 @@ void TransliteratorParser::setVariableRange(int32_t start, int32_t end, UErrorCo /** * Assert that the given character is NOT within the variable range. - * If it is, return FALSE. This is neccesary to ensure that the + * If it is, return FALSE. This is necessary to ensure that the * variable range does not overlap characters used in a rule. */ UBool TransliteratorParser::checkVariableRange(UChar32 ch) const { diff --git a/deps/icu-small/source/i18n/rbt_pars.h b/deps/icu-small/source/i18n/rbt_pars.h index 3e6517cfeb9078..9336d410d351e5 100644 --- a/deps/icu-small/source/i18n/rbt_pars.h +++ b/deps/icu-small/source/i18n/rbt_pars.h @@ -210,7 +210,7 @@ class TransliteratorParser : public UMemory { /** * Assert that the given character is NOT within the variable range. - * If it is, return false. This is neccesary to ensure that the + * If it is, return false. This is necessary to ensure that the * variable range does not overlap characters used in a rule. * @param ch the given character. * @return True, if the given character is NOT within the variable range. diff --git a/deps/icu-small/source/i18n/rbtz.cpp b/deps/icu-small/source/i18n/rbtz.cpp index 2c3747abdafeaf..9d8eea9263a280 100644 --- a/deps/icu-small/source/i18n/rbtz.cpp +++ b/deps/icu-small/source/i18n/rbtz.cpp @@ -403,9 +403,9 @@ RuleBasedTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffset, getOffsetInternal(date, local, kFormer, kLatter, rawOffset, dstOffset, status); } -void -RuleBasedTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, - int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const { +void RuleBasedTimeZone::getOffsetFromLocal(UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const { getOffsetInternal(date, TRUE, nonExistingTimeOpt, duplicatedTimeOpt, rawOffset, dstOffset, status); } diff --git a/deps/icu-small/source/i18n/regexcmp.cpp b/deps/icu-small/source/i18n/regexcmp.cpp index 7b67ce82b5db12..6c3a9e10ba12f2 100644 --- a/deps/icu-small/source/i18n/regexcmp.cpp +++ b/deps/icu-small/source/i18n/regexcmp.cpp @@ -557,7 +557,7 @@ UBool RegexCompile::doParseActions(int32_t action) // // Note: Addition of transparent input regions, with the need to // restore the original regions when failing out of a lookahead - // block, complicated this sequence. Some conbined opcodes + // block, complicated this sequence. Some combined opcodes // might make sense - or might not, lookahead aren't that common. // // Caution: min match length optimization knows about this @@ -2397,7 +2397,7 @@ void RegexCompile::compileSet(UnicodeSet *theSet) } // Remove any strings from the set. // There shoudn't be any, but just in case. - // (Case Closure can add them; if we had a simple case closure avaialble that + // (Case Closure can add them; if we had a simple case closure available that // ignored strings, that would be better.) theSet->removeAllStrings(); int32_t setSize = theSet->size(); @@ -2485,7 +2485,7 @@ void RegexCompile::compileInterval(int32_t InitOp, int32_t LoopOp) fRXPat->fCompiledPat->setElementAt(fIntervalLow, topOfBlock+2); fRXPat->fCompiledPat->setElementAt(fIntervalUpper, topOfBlock+3); - // Apend the CTR_LOOP op. The operand is the location of the CTR_INIT op. + // Append the CTR_LOOP op. The operand is the location of the CTR_INIT op. // Goes at end of the block being looped over, so just append to the code so far. appendOp(LoopOp, topOfBlock); @@ -3475,6 +3475,9 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { // value may be longer than the actual maximum; it must // never be shorter. // +// start, end: the range of the pattern to check. +// end is inclusive. +// //------------------------------------------------------------------------------ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { if (U_FAILURE(*fStatus)) { @@ -3720,14 +3723,14 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { // Look-behind. Scan forward until the matching look-around end, // without processing the look-behind block. int32_t dataLoc = URX_VAL(op); - for (loc = loc + 1; loc < end; ++loc) { + for (loc = loc + 1; loc <= end; ++loc) { op = (int32_t)fRXPat->fCompiledPat->elementAti(loc); int32_t opType = URX_TYPE(op); if ((opType == URX_LA_END || opType == URX_LBN_END) && (URX_VAL(op) == dataLoc)) { break; } } - U_ASSERT(loc < end); + U_ASSERT(loc <= end); } break; diff --git a/deps/icu-small/source/i18n/reldatefmt.cpp b/deps/icu-small/source/i18n/reldatefmt.cpp index 5d43606169c8a8..d41ff22b9c212a 100644 --- a/deps/icu-small/source/i18n/reldatefmt.cpp +++ b/deps/icu-small/source/i18n/reldatefmt.cpp @@ -95,7 +95,7 @@ class RelativeDateTimeCacheData: public SharedObject { const UnicodeString emptyString; - // Mappping from source to target styles for alias fallback. + // Mapping from source to target styles for alias fallback. int32_t fallBackCache[UDAT_STYLE_COUNT]; void adoptCombinedDateAndTime(SimpleFormatter *fmtToAdopt) { diff --git a/deps/icu-small/source/i18n/reldtfmt.cpp b/deps/icu-small/source/i18n/reldtfmt.cpp index c74c30c20ca383..2bc59c5128b43a 100644 --- a/deps/icu-small/source/i18n/reldtfmt.cpp +++ b/deps/icu-small/source/i18n/reldtfmt.cpp @@ -334,7 +334,7 @@ UDate RelativeDateFormat::parse( const UnicodeString& text, ParsePosition& pos) const { // redefined here because the other parse() function hides this function's - // cunterpart on DateFormat + // counterpart on DateFormat return DateFormat::parse(text, pos); } diff --git a/deps/icu-small/source/i18n/rematch.cpp b/deps/icu-small/source/i18n/rematch.cpp index 653ef4d6c1f8d1..e358dbd1e983f8 100644 --- a/deps/icu-small/source/i18n/rematch.cpp +++ b/deps/icu-small/source/i18n/rematch.cpp @@ -3913,7 +3913,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; if (lbStartIdx > 0) { - // move index to a code point boudary, if it's not on one already. + // move index to a code point boundary, if it's not on one already. UTEXT_SETNATIVEINDEX(fInputText, lbStartIdx); lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); } @@ -3999,7 +3999,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; if (lbStartIdx > 0) { - // move index to a code point boudary, if it's not on one already. + // move index to a code point boundary, if it's not on one already. UTEXT_SETNATIVEINDEX(fInputText, lbStartIdx); lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); } diff --git a/deps/icu-small/source/i18n/scriptset.h b/deps/icu-small/source/i18n/scriptset.h index a41ab737a6dc38..b770995832872d 100644 --- a/deps/icu-small/source/i18n/scriptset.h +++ b/deps/icu-small/source/i18n/scriptset.h @@ -51,7 +51,7 @@ class U_I18N_API ScriptSet: public UMemory { ScriptSet &reset(UScriptCode script, UErrorCode &status); ScriptSet &intersect(const ScriptSet &other); ScriptSet &intersect(UScriptCode script, UErrorCode &status); - UBool intersects(const ScriptSet &other) const; // Sets contain at least one script in commmon. + UBool intersects(const ScriptSet &other) const; // Sets contain at least one script in common. UBool contains(const ScriptSet &other) const; // All set bits in other are also set in this. ScriptSet &setAll(); diff --git a/deps/icu-small/source/i18n/simpletz.cpp b/deps/icu-small/source/i18n/simpletz.cpp index 12c220595cd2c1..0007d4aec8977e 100644 --- a/deps/icu-small/source/i18n/simpletz.cpp +++ b/deps/icu-small/source/i18n/simpletz.cpp @@ -42,7 +42,7 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleTimeZone) // Use only for decodeStartRule() and decodeEndRule() where the year is not -// available. Set February to 29 days to accomodate rules with that date +// available. Set February to 29 days to accommodate rules with that date // and day-of-week-on-or-before-that-date mode (DOW_LE_DOM_MODE). // The compareToRule() method adjusts to February 28 in non-leap years. // @@ -509,8 +509,10 @@ SimpleTimeZone::getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, } void -SimpleTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, - int32_t& rawOffsetGMT, int32_t& savingsDST, UErrorCode& status) const { +SimpleTimeZone::getOffsetFromLocal(UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, int32_t& rawOffsetGMT, + int32_t& savingsDST, UErrorCode& status) const +{ if (U_FAILURE(status)) { return; } diff --git a/deps/icu-small/source/i18n/smpdtfmt.cpp b/deps/icu-small/source/i18n/smpdtfmt.cpp index 4717899cf38d7d..a3ec7cb026591c 100644 --- a/deps/icu-small/source/i18n/smpdtfmt.cpp +++ b/deps/icu-small/source/i18n/smpdtfmt.cpp @@ -231,6 +231,13 @@ static const int32_t gFieldRangeBias[] = { static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000; static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000; +/** + * Maximum range for detecting daylight offset of a time zone when parsed time zone + * string indicates it's daylight saving time, but the detected time zone does not + * observe daylight saving time at the parsed date. + */ +static const double MAX_DAYLIGHT_DETECTION_RANGE = 30*365*24*60*60*1000.0; + static UMutex LOCK; UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat) @@ -2575,51 +2582,47 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& } else { // tztype == TZTYPE_DST if (dst == 0) { if (btz != NULL) { - UDate time = localMillis + raw; - // We use the nearest daylight saving time rule. - TimeZoneTransition beforeTrs, afterTrs; - UDate beforeT = time, afterT = time; - int32_t beforeSav = 0, afterSav = 0; - UBool beforeTrsAvail, afterTrsAvail; - - // Search for DST rule before or on the time - while (TRUE) { - beforeTrsAvail = btz->getPreviousTransition(beforeT, TRUE, beforeTrs); - if (!beforeTrsAvail) { + // This implementation resolves daylight saving time offset + // closest rule after the given time. + UDate baseTime = localMillis + raw; + UDate time = baseTime; + UDate limit = baseTime + MAX_DAYLIGHT_DETECTION_RANGE; + TimeZoneTransition trs; + UBool trsAvail; + + // Search for DST rule after the given time + while (time < limit) { + trsAvail = btz->getNextTransition(time, FALSE, trs); + if (!trsAvail) { break; } - beforeT = beforeTrs.getTime() - 1; - beforeSav = beforeTrs.getFrom()->getDSTSavings(); - if (beforeSav != 0) { + resolvedSavings = trs.getTo()->getDSTSavings(); + if (resolvedSavings != 0) { break; } + time = trs.getTime(); } - // Search for DST rule after the time - while (TRUE) { - afterTrsAvail = btz->getNextTransition(afterT, FALSE, afterTrs); - if (!afterTrsAvail) { - break; + if (resolvedSavings == 0) { + // If no DST rule after the given time was found, search for + // DST rule before. + time = baseTime; + limit = baseTime - MAX_DAYLIGHT_DETECTION_RANGE; + while (time > limit) { + trsAvail = btz->getPreviousTransition(time, TRUE, trs); + if (!trsAvail) { + break; + } + resolvedSavings = trs.getFrom()->getDSTSavings(); + if (resolvedSavings != 0) { + break; + } + time = trs.getTime() - 1; } - afterT = afterTrs.getTime(); - afterSav = afterTrs.getTo()->getDSTSavings(); - if (afterSav != 0) { - break; - } - } - if (beforeTrsAvail && afterTrsAvail) { - if (time - beforeT > afterT - time) { - resolvedSavings = afterSav; - } else { - resolvedSavings = beforeSav; + if (resolvedSavings == 0) { + resolvedSavings = btz->getDSTSavings(); } - } else if (beforeTrsAvail && beforeSav != 0) { - resolvedSavings = beforeSav; - } else if (afterTrsAvail && afterSav != 0) { - resolvedSavings = afterSav; - } else { - resolvedSavings = btz->getDSTSavings(); } } else { resolvedSavings = tz.getDSTSavings(); diff --git a/deps/icu-small/source/i18n/sortkey.cpp b/deps/icu-small/source/i18n/sortkey.cpp index fb030c499083e6..430fd5d3500948 100644 --- a/deps/icu-small/source/i18n/sortkey.cpp +++ b/deps/icu-small/source/i18n/sortkey.cpp @@ -20,7 +20,7 @@ // // 6/20/97 helena Java class name change. // 6/23/97 helena Added comments to make code more readable. -// 6/26/98 erm Canged to use byte arrays instead of UnicodeString +// 6/26/98 erm Changed to use byte arrays instead of UnicodeString // 7/31/98 erm hashCode: minimum inc should be 2 not 1, // Cleaned up operator= // 07/12/99 helena HPUX 11 CC port. diff --git a/deps/icu-small/source/i18n/standardplural.cpp b/deps/icu-small/source/i18n/standardplural.cpp index 0391034b3e4a8d..5a6069bf7ddc47 100644 --- a/deps/icu-small/source/i18n/standardplural.cpp +++ b/deps/icu-small/source/i18n/standardplural.cpp @@ -23,7 +23,7 @@ U_NAMESPACE_BEGIN static const char *gKeywords[StandardPlural::COUNT] = { - "zero", "one", "two", "few", "many", "other" + "zero", "one", "two", "few", "many", "other", "=0", "=1" }; const char *StandardPlural::getKeyword(Form p) { @@ -60,21 +60,55 @@ int32_t StandardPlural::indexOrNegativeFromString(const char *keyword) { return ZERO; } break; + case '=': + if (uprv_strcmp(keyword, "0") == 0) { + return EQ_0; + } else if (uprv_strcmp(keyword, "1") == 0) { + return EQ_1; + } + break; + // Also allow "0" and "1" + case '0': + if (*keyword == 0) { + return EQ_0; + } + break; + case '1': + if (*keyword == 0) { + return EQ_1; + } + break; default: break; } return -1; } -static const UChar gZero[] = { 0x7A, 0x65, 0x72, 0x6F }; -static const UChar gOne[] = { 0x6F, 0x6E, 0x65 }; -static const UChar gTwo[] = { 0x74, 0x77, 0x6F }; -static const UChar gFew[] = { 0x66, 0x65, 0x77 }; -static const UChar gMany[] = { 0x6D, 0x61, 0x6E, 0x79 }; -static const UChar gOther[] = { 0x6F, 0x74, 0x68, 0x65, 0x72 }; +static const UChar gZero[] = u"zero"; +static const UChar gOne[] = u"one"; +static const UChar gTwo[] = u"two"; +static const UChar gFew[] = u"few"; +static const UChar gMany[] = u"many"; +static const UChar gOther[] = u"other"; +static const UChar gEq0[] = u"=0"; +static const UChar gEq1[] = u"=1"; int32_t StandardPlural::indexOrNegativeFromString(const UnicodeString &keyword) { switch (keyword.length()) { + case 1: + if (keyword.charAt(0) == '0') { + return EQ_0; + } else if (keyword.charAt(0) == '1') { + return EQ_1; + } + break; + case 2: + if (keyword.compare(gEq0, 2) == 0) { + return EQ_0; + } else if (keyword.compare(gEq1, 2) == 0) { + return EQ_1; + } + break; case 3: if (keyword.compare(gOne, 3) == 0) { return ONE; diff --git a/deps/icu-small/source/i18n/standardplural.h b/deps/icu-small/source/i18n/standardplural.h index 33e1d605f6856b..16593065c8aa91 100644 --- a/deps/icu-small/source/i18n/standardplural.h +++ b/deps/icu-small/source/i18n/standardplural.h @@ -35,6 +35,8 @@ class U_I18N_API StandardPlural { FEW, MANY, OTHER, + EQ_0, + EQ_1, COUNT }; diff --git a/deps/icu-small/source/i18n/stsearch.cpp b/deps/icu-small/source/i18n/stsearch.cpp index 32481a14004075..003d86b64016f0 100644 --- a/deps/icu-small/source/i18n/stsearch.cpp +++ b/deps/icu-small/source/i18n/stsearch.cpp @@ -184,7 +184,7 @@ StringSearch::clone() const { // operator overloading --------------------------------------------- StringSearch & StringSearch::operator=(const StringSearch &that) { - if ((*this) != that) { + if (this != &that) { UErrorCode status = U_ZERO_ERROR; m_text_ = that.m_text_; m_breakiterator_ = that.m_breakiterator_; diff --git a/deps/icu-small/source/i18n/timezone.cpp b/deps/icu-small/source/i18n/timezone.cpp index 1461b80494e109..fe564e6530e7b1 100644 --- a/deps/icu-small/source/i18n/timezone.cpp +++ b/deps/icu-small/source/i18n/timezone.cpp @@ -1678,7 +1678,7 @@ TimeZone::getIDForWindowsID(const UnicodeString& winid, const char* region, Unic winidKey[winKeyLen] = 0; ures_getByKey(zones, winidKey, zones, &tmperr); // use tmperr, because windows mapping might not - // be avaiable by design + // be available by design if (U_FAILURE(tmperr)) { ures_close(zones); return id; diff --git a/deps/icu-small/source/i18n/translit.cpp b/deps/icu-small/source/i18n/translit.cpp index a2ade1b4fe8578..c2a15837bedb0d 100644 --- a/deps/icu-small/source/i18n/translit.cpp +++ b/deps/icu-small/source/i18n/translit.cpp @@ -170,6 +170,7 @@ Transliterator* Transliterator::clone() const { * Assignment operator. */ Transliterator& Transliterator::operator=(const Transliterator& other) { + if (this == &other) { return *this; } // self-assignment: no-op ID = other.ID; // NUL-terminate the ID string ID.getTerminatedBuffer(); diff --git a/deps/icu-small/source/i18n/transreg.h b/deps/icu-small/source/i18n/transreg.h index 04ed3fb501059c..0a0698862be1ac 100644 --- a/deps/icu-small/source/i18n/transreg.h +++ b/deps/icu-small/source/i18n/transreg.h @@ -144,7 +144,7 @@ class TransliteratorRegistry : public UMemory { public: /** - * Contructor + * Constructor * @param status Output param set to success/failure code. */ TransliteratorRegistry(UErrorCode& status); diff --git a/deps/icu-small/source/i18n/tzfmt.cpp b/deps/icu-small/source/i18n/tzfmt.cpp index 267d507aa7eca6..e70005a384085e 100644 --- a/deps/icu-small/source/i18n/tzfmt.cpp +++ b/deps/icu-small/source/i18n/tzfmt.cpp @@ -1873,7 +1873,7 @@ TimeZoneFormat::parseOffsetFieldsWithPattern(const UnicodeString& text, int32_t // When TimeZoneFormat parse() is called from SimpleDateFormat, // leading space characters might be truncated. If the first pattern text // starts with such character (e.g. Bidi control), then we need to - // skip the leading space charcters. + // skip the leading space characters. if (idx < text.length() && !PatternProps::isWhiteSpace(text.char32At(idx))) { while (len > 0) { UChar32 ch; diff --git a/deps/icu-small/source/i18n/ucal.cpp b/deps/icu-small/source/i18n/ucal.cpp index 275ef7ea87e273..39a9508ca93443 100644 --- a/deps/icu-small/source/i18n/ucal.cpp +++ b/deps/icu-small/source/i18n/ucal.cpp @@ -33,8 +33,8 @@ U_NAMESPACE_USE static TimeZone* _createTimeZone(const UChar* zoneID, int32_t len, UErrorCode* ec) { - TimeZone* zone = NULL; - if (ec != NULL && U_SUCCESS(*ec)) { + TimeZone* zone = nullptr; + if (ec != nullptr && U_SUCCESS(*ec)) { // Note that if zoneID is invalid, we get back GMT. This odd // behavior is by design and goes back to the JDK. The only // failure we will see is a memory allocation failure. @@ -42,7 +42,7 @@ _createTimeZone(const UChar* zoneID, int32_t len, UErrorCode* ec) { UnicodeString zoneStrID; zoneStrID.setTo((UBool)(len < 0), zoneID, l); /* temporary read-only alias */ zone = TimeZone::createTimeZone(zoneStrID); - if (zone == NULL) { + if (zone == nullptr) { *ec = U_MEMORY_ALLOCATION_ERROR; } } @@ -58,20 +58,20 @@ ucal_openTimeZoneIDEnumeration(USystemTimeZoneType zoneType, const char* region, U_CAPI UEnumeration* U_EXPORT2 ucal_openTimeZones(UErrorCode* ec) { - return uenum_openFromStringEnumeration(TimeZone::createEnumeration(), ec); + return ucal_openTimeZoneIDEnumeration(UCAL_ZONE_TYPE_ANY, nullptr, nullptr, ec); } U_CAPI UEnumeration* U_EXPORT2 ucal_openCountryTimeZones(const char* country, UErrorCode* ec) { - return uenum_openFromStringEnumeration(TimeZone::createEnumeration(country), ec); + return ucal_openTimeZoneIDEnumeration(UCAL_ZONE_TYPE_ANY, country, nullptr, ec); } U_CAPI int32_t U_EXPORT2 ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec) { int32_t len = 0; - if (ec != NULL && U_SUCCESS(*ec)) { + if (ec != nullptr && U_SUCCESS(*ec)) { TimeZone* zone = TimeZone::createDefault(); - if (zone == NULL) { + if (zone == nullptr) { *ec = U_MEMORY_ALLOCATION_ERROR; } else { UnicodeString id; @@ -86,7 +86,7 @@ ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec) { U_CAPI void U_EXPORT2 ucal_setDefaultTimeZone(const UChar* zoneID, UErrorCode* ec) { TimeZone* zone = _createTimeZone(zoneID, -1, ec); - if (zone != NULL) { + if (zone != nullptr) { TimeZone::adoptDefault(zone); } } @@ -94,9 +94,9 @@ ucal_setDefaultTimeZone(const UChar* zoneID, UErrorCode* ec) { U_CAPI int32_t U_EXPORT2 ucal_getHostTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec) { int32_t len = 0; - if (ec != NULL && U_SUCCESS(*ec)) { + if (ec != nullptr && U_SUCCESS(*ec)) { TimeZone *zone = TimeZone::detectHostTimeZone(); - if (zone == NULL) { + if (zone == nullptr) { *ec = U_MEMORY_ALLOCATION_ERROR; } else { UnicodeString id; @@ -114,7 +114,7 @@ ucal_getDSTSavings(const UChar* zoneID, UErrorCode* ec) { TimeZone* zone = _createTimeZone(zoneID, -1, ec); if (U_SUCCESS(*ec)) { SimpleTimeZone* stz = dynamic_cast(zone); - if (stz != NULL) { + if (stz != nullptr) { result = stz->getDSTSavings(); } else { // Since there is no getDSTSavings on TimeZone, we use a @@ -219,10 +219,10 @@ ucal_setTimeZone( UCalendar* cal, if(U_FAILURE(*status)) return; - TimeZone* zone = (zoneID==NULL) ? TimeZone::createDefault() + TimeZone* zone = (zoneID==nullptr) ? TimeZone::createDefault() : _createTimeZone(zoneID, len, status); - if (zone != NULL) { + if (zone != nullptr) { ((Calendar*)cal)->adoptTimeZone(zone); } } @@ -255,8 +255,8 @@ ucal_getTimeZoneDisplayName(const UCalendar* cal, const TimeZone& tz = ((Calendar*)cal)->getTimeZone(); UnicodeString id; - if(!(result==NULL && resultLength==0)) { - // NULL destination for pure preflighting: empty dummy string + if (!(result == nullptr && resultLength == 0)) { + // Null destination for pure preflighting: empty dummy string // otherwise, alias the destination buffer id.setTo(result, 0, resultLength); } @@ -298,12 +298,12 @@ ucal_setGregorianChange(UCalendar *cal, UDate date, UErrorCode *pErrorCode) { } Calendar *cpp_cal = (Calendar *)cal; GregorianCalendar *gregocal = dynamic_cast(cpp_cal); - // Not if(gregocal == NULL) { + // Not if(gregocal == nullptr) { // because we really want to work only with a GregorianCalendar, not with // its subclasses like BuddhistCalendar. - if (cpp_cal == NULL) { - // We normally don't check "this" pointers for NULL, but this here avoids - // compiler-generated exception-throwing code in case cal == NULL. + if (cpp_cal == nullptr) { + // We normally don't check "this" pointers for nullptr, but this here avoids + // compiler-generated exception-throwing code in case cal == nullptr. *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return; } @@ -321,11 +321,11 @@ ucal_getGregorianChange(const UCalendar *cal, UErrorCode *pErrorCode) { } const Calendar *cpp_cal = (const Calendar *)cal; const GregorianCalendar *gregocal = dynamic_cast(cpp_cal); - // Not if(gregocal == NULL) { + // Not if(gregocal == nullptr) { // see comments in ucal_setGregorianChange(). - if (cpp_cal == NULL) { - // We normally don't check "this" pointers for NULL, but this here avoids - // compiler-generated exception-throwing code in case cal == NULL. + if (cpp_cal == nullptr) { + // We normally don't check "this" pointers for nullptr, but this here avoids + // compiler-generated exception-throwing code in case cal == nullptr. *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return (UDate)0; } @@ -572,11 +572,11 @@ ucal_getLimit( const UCalendar* cal, U_CAPI const char * U_EXPORT2 ucal_getLocaleByType(const UCalendar *cal, ULocDataLocaleType type, UErrorCode* status) { - if (cal == NULL) { + if (cal == nullptr) { if (U_SUCCESS(*status)) { *status = U_ILLEGAL_ARGUMENT_ERROR; } - return NULL; + return nullptr; } return ((Calendar*)cal)->getLocaleID(type, *status); } @@ -617,7 +617,7 @@ U_CAPI const char * U_EXPORT2 ucal_getType(const UCalendar *cal, UErrorCode* status) { if (U_FAILURE(*status)) { - return NULL; + return nullptr; } return ((Calendar*)cal)->getType(); } @@ -662,8 +662,8 @@ ucal_getFieldDifference(UCalendar* cal, UDate target, static const UEnumeration defaultKeywordValues = { - NULL, - NULL, + nullptr, + nullptr, ulist_close_keyword_values_iterator, ulist_count_keyword_values, uenum_unextDefault, @@ -690,7 +690,7 @@ static const char * const CAL_TYPES[] = { "islamic-umalqura", "islamic-tbla", "islamic-rgsa", - NULL + nullptr }; U_CAPI UEnumeration* U_EXPORT2 @@ -700,16 +700,16 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool (void)ulocimp_getRegionForSupplementalData(locale, TRUE, prefRegion, sizeof(prefRegion), status); // Read preferred calendar values from supplementalData calendarPreference - UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", status); + UResourceBundle *rb = ures_openDirect(nullptr, "supplementalData", status); ures_getByKey(rb, "calendarPreferenceData", rb, status); - UResourceBundle *order = ures_getByKey(rb, prefRegion, NULL, status); - if (*status == U_MISSING_RESOURCE_ERROR && rb != NULL) { + UResourceBundle *order = ures_getByKey(rb, prefRegion, nullptr, status); + if (*status == U_MISSING_RESOURCE_ERROR && rb != nullptr) { *status = U_ZERO_ERROR; - order = ures_getByKey(rb, "001", NULL, status); + order = ures_getByKey(rb, "001", nullptr, status); } // Create a list of calendar type strings - UList *values = NULL; + UList *values = nullptr; if (U_SUCCESS(*status)) { values = ulist_createEmptyList(status); if (U_SUCCESS(*status)) { @@ -717,7 +717,7 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool int32_t len; const UChar *type = ures_getStringByIndex(order, i, &len, status); char *caltype = (char*)uprv_malloc(len + 1); - if (caltype == NULL) { + if (caltype == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; break; } @@ -732,7 +732,7 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool if (U_SUCCESS(*status) && !commonlyUsed) { // If not commonlyUsed, add other available values - for (int32_t i = 0; CAL_TYPES[i] != NULL; i++) { + for (int32_t i = 0; CAL_TYPES[i] != nullptr; i++) { if (!ulist_containsString(values, CAL_TYPES[i], (int32_t)uprv_strlen(CAL_TYPES[i]))) { ulist_addItemEndList(values, CAL_TYPES[i], FALSE, status); if (U_FAILURE(*status)) { @@ -743,7 +743,7 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool } if (U_FAILURE(*status)) { ulist_deleteList(values); - values = NULL; + values = nullptr; } } } @@ -751,16 +751,16 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool ures_close(order); ures_close(rb); - if (U_FAILURE(*status) || values == NULL) { - return NULL; + if (U_FAILURE(*status) || values == nullptr) { + return nullptr; } // Create string enumeration UEnumeration *en = (UEnumeration*)uprv_malloc(sizeof(UEnumeration)); - if (en == NULL) { + if (en == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; ulist_deleteList(values); - return NULL; + return nullptr; } ulist_resetList(values); memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); @@ -778,7 +778,7 @@ ucal_getTimeZoneTransitionDate(const UCalendar* cal, UTimeZoneTransitionType typ UDate base = ((Calendar*)cal)->getTime(*status); const TimeZone& tz = ((Calendar*)cal)->getTimeZone(); const BasicTimeZone * btz = dynamic_cast(&tz); - if (btz != NULL && U_SUCCESS(*status)) { + if (btz != nullptr && U_SUCCESS(*status)) { TimeZoneTransition tzt; UBool inclusive = (type == UCAL_TZ_TRANSITION_NEXT_INCLUSIVE || type == UCAL_TZ_TRANSITION_PREVIOUS_INCLUSIVE); UBool result = (type == UCAL_TZ_TRANSITION_NEXT || type == UCAL_TZ_TRANSITION_NEXT_INCLUSIVE)? @@ -828,4 +828,28 @@ ucal_getTimeZoneIDForWindowsID(const UChar* winid, int32_t len, const char* regi return resultLen; } +U_CAPI void U_EXPORT2 ucal_getTimeZoneOffsetFromLocal( + const UCalendar* cal, + UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t* rawOffset, int32_t* dstOffset, UErrorCode* status) +{ + if (U_FAILURE(*status)) { + return; + } + UDate date = ((Calendar*)cal)->getTime(*status); + if (U_FAILURE(*status)) { + return; + } + const TimeZone& tz = ((Calendar*)cal)->getTimeZone(); + const BasicTimeZone* btz = dynamic_cast(&tz); + if (btz == nullptr) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + btz->getOffsetFromLocal( + date, nonExistingTimeOpt, duplicatedTimeOpt, + *rawOffset, *dstOffset, *status); +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/uni2name.cpp b/deps/icu-small/source/i18n/uni2name.cpp index 9df3924ae5f147..729a1e5fa82a62 100644 --- a/deps/icu-small/source/i18n/uni2name.cpp +++ b/deps/icu-small/source/i18n/uni2name.cpp @@ -81,7 +81,7 @@ void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPos return; } - // Accomodate the longest possible name plus padding + // Accommodate the longest possible name plus padding char* buf = (char*) uprv_malloc(maxLen); if (buf == NULL) { offsets.start = offsets.limit; diff --git a/deps/icu-small/source/i18n/unicode/basictz.h b/deps/icu-small/source/i18n/unicode/basictz.h index 7199fb341deec8..ea8720ed13cf41 100644 --- a/deps/icu-small/source/i18n/unicode/basictz.h +++ b/deps/icu-small/source/i18n/unicode/basictz.h @@ -152,6 +152,17 @@ class U_I18N_API BasicTimeZone: public TimeZone { virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial, AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) const; +#ifndef U_FORCE_HIDE_DRAFT_API + /** + * Get time zone offsets from local wall time. + * @draft ICU 69 + */ + virtual void getOffsetFromLocal( + UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, int32_t& rawOffset, + int32_t& dstOffset, UErrorCode& status) const; + +#endif /* U_FORCE_HIDE_DRAFT_API */ #ifndef U_HIDE_INTERNAL_API /** @@ -161,17 +172,17 @@ class U_I18N_API BasicTimeZone: public TimeZone { enum { kStandard = 0x01, kDaylight = 0x03, - kFormer = 0x04, - kLatter = 0x0C + kFormer = 0x04, /* UCAL_TZ_LOCAL_FORMER */ + kLatter = 0x0C /* UCAL_TZ_LOCAL_LATTER */ }; -#endif /* U_HIDE_INTERNAL_API */ /** * Get time zone offsets from local wall time. * @internal */ - virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const; +#endif /* U_HIDE_INTERNAL_API */ protected: diff --git a/deps/icu-small/source/i18n/unicode/calendar.h b/deps/icu-small/source/i18n/unicode/calendar.h index cc84bb274dee04..be774ab26fb619 100644 --- a/deps/icu-small/source/i18n/unicode/calendar.h +++ b/deps/icu-small/source/i18n/unicode/calendar.h @@ -47,6 +47,8 @@ U_NAMESPACE_BEGIN class ICUServiceFactory; +// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, +// it is a return type for a virtual method (@internal) /** * @internal */ @@ -1849,7 +1851,7 @@ class U_I18N_API Calendar : public UObject { * @param startValue starting (least max) value of field * @param endValue ending (greatest max) value of field * @param status return type - * @internal + * @internal (private) */ int32_t getActualHelper(UCalendarDateFields field, int32_t startValue, int32_t endValue, UErrorCode &status) const; diff --git a/deps/icu-small/source/i18n/unicode/datefmt.h b/deps/icu-small/source/i18n/unicode/datefmt.h index 21217e567acf7d..bbba0785edaccc 100644 --- a/deps/icu-small/source/i18n/unicode/datefmt.h +++ b/deps/icu-small/source/i18n/unicode/datefmt.h @@ -139,7 +139,7 @@ template class U_I18N_API EnumSet * You can also use forms of the parse and format methods with ParsePosition and * FieldPosition to allow you to diff --git a/deps/icu-small/source/i18n/unicode/dcfmtsym.h b/deps/icu-small/source/i18n/unicode/dcfmtsym.h index b2c39a0236e4aa..d0f844a51a369f 100644 --- a/deps/icu-small/source/i18n/unicode/dcfmtsym.h +++ b/deps/icu-small/source/i18n/unicode/dcfmtsym.h @@ -446,7 +446,7 @@ class U_I18N_API DecimalFormatSymbols : public UObject { inline const UnicodeString& getConstDigitSymbol(int32_t digit) const; /** - * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API. + * Returns that pattern stored in currency info. Internal API for use by NumberFormat API. * @internal */ inline const char16_t* getCurrencyPattern(void) const; diff --git a/deps/icu-small/source/i18n/unicode/dtptngen.h b/deps/icu-small/source/i18n/unicode/dtptngen.h index 357a25d9a8e197..dab7dcfb3dc61a 100644 --- a/deps/icu-small/source/i18n/unicode/dtptngen.h +++ b/deps/icu-small/source/i18n/unicode/dtptngen.h @@ -492,7 +492,6 @@ class U_I18N_API DateTimePatternGenerator : public UObject { #if !UCONFIG_NO_FORMATTING -#ifndef U_HIDE_DRAFT_API /** * Get the default hour cycle for a locale. Uses the locale that the * DateTimePatternGenerator was initially created with. @@ -503,10 +502,9 @@ class U_I18N_API DateTimePatternGenerator : public UObject { * which must not indicate a failure before the function call. * Set to U_UNSUPPORTED_ERROR if used on an empty instance. * @return the default hour cycle. - * @draft ICU 67 + * @stable ICU 67 */ UDateFormatHourCycle getDefaultHourCycle(UErrorCode& status) const; -#endif /* U_HIDE_DRAFT_API */ #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/unicode/listformatter.h b/deps/icu-small/source/i18n/unicode/listformatter.h index eddb5dab6701b3..3cc750c8387499 100644 --- a/deps/icu-small/source/i18n/unicode/listformatter.h +++ b/deps/icu-small/source/i18n/unicode/listformatter.h @@ -198,22 +198,6 @@ class U_I18N_API ListFormatter : public UObject{ static ListFormatter* createInstance( const Locale& locale, UListFormatterType type, UListFormatterWidth width, UErrorCode& errorCode); -#ifndef U_HIDE_INTERNAL_API - /** - * Creates a ListFormatter appropriate for a locale and style. - * - * TODO(ICU-20888): Remove this in ICU 68. - * - * @param locale The locale. - * @param style the style, either "standard", "or", "unit", "unit-narrow", or "unit-short" - * @param errorCode ICU error code, set if no data available for the given locale. - * @return A ListFormatter object created from internal data derived from - * CLDR data. - * @internal - */ - static ListFormatter* createInstance(const Locale& locale, const char* style, UErrorCode& errorCode); -#endif /* U_HIDE_INTERNAL_API */ - /** * Destructor. * @@ -273,6 +257,15 @@ class U_I18N_API ListFormatter : public UObject{ #endif /* U_HIDE_INTERNAL_API */ private: + + /** + * Creates a ListFormatter appropriate for a locale and style. + * + * @param locale The locale. + * @param style the style, either "standard", "or", "unit", "unit-narrow", or "unit-short" + */ + static ListFormatter* createInstance(const Locale& locale, const char* style, UErrorCode& errorCode); + static void initializeHash(UErrorCode& errorCode); static const ListFormatInternal* getListFormatInternal(const Locale& locale, const char *style, UErrorCode& errorCode); struct ListPatternsSink; diff --git a/deps/icu-small/source/i18n/unicode/measfmt.h b/deps/icu-small/source/i18n/unicode/measfmt.h index f95f39f0d5e6a2..f48dada2abf02a 100644 --- a/deps/icu-small/source/i18n/unicode/measfmt.h +++ b/deps/icu-small/source/i18n/unicode/measfmt.h @@ -91,7 +91,8 @@ class DateFormat; /** *

IMPORTANT: New users are strongly encouraged to see if * numberformatter.h fits their use case. Although not deprecated, this header - * is provided for backwards compatibility only. + * is provided for backwards compatibility only, and has much more limited + * capabilities. * * @see Format * @author Alan Liu diff --git a/deps/icu-small/source/i18n/unicode/measunit.h b/deps/icu-small/source/i18n/unicode/measunit.h index ed8773c7710f3e..0985ba0706eaa4 100644 --- a/deps/icu-small/source/i18n/unicode/measunit.h +++ b/deps/icu-small/source/i18n/unicode/measunit.h @@ -30,201 +30,333 @@ U_NAMESPACE_BEGIN class StringEnumeration; -struct MeasureUnitImpl; +class MeasureUnitImpl; + +namespace number { +namespace impl { +class LongNameHandler; +} +} // namespace number -#ifndef U_HIDE_DRAFT_API /** * Enumeration for unit complexity. There are three levels: * - * - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare, - * square-kilometer, kilojoule, per-second. + * - SINGLE: A single unit, optionally with a power and/or SI or binary prefix. + * Examples: hectare, square-kilometer, kilojoule, per-second, mebibyte. * - COMPOUND: A unit composed of the product of multiple single units. Examples: * meter-per-second, kilowatt-hour, kilogram-meter-per-square-second. * - MIXED: A unit composed of the sum of multiple single units. Examples: foot+inch, * hour+minute+second, degree+arcminute+arcsecond. * * The complexity determines which operations are available. For example, you cannot set the power - * or SI prefix of a compound unit. + * or prefix of a compound unit. * - * @draft ICU 67 + * @stable ICU 67 */ enum UMeasureUnitComplexity { /** * A single unit, like kilojoule. * - * @draft ICU 67 + * @stable ICU 67 */ UMEASURE_UNIT_SINGLE, /** * A compound unit, like meter-per-second. * - * @draft ICU 67 + * @stable ICU 67 */ UMEASURE_UNIT_COMPOUND, /** * A mixed unit, like hour+minute. * - * @draft ICU 67 + * @stable ICU 67 */ UMEASURE_UNIT_MIXED }; + +#ifndef U_HIDE_DRAFT_API /** - * Enumeration for SI prefixes, such as "kilo". + * Enumeration for SI and binary prefixes, e.g. "kilo-", "nano-", "mebi-". + * + * Enum values should be treated as opaque: use umeas_getPrefixPower() and + * umeas_getPrefixBase() to find their corresponding values. * - * @draft ICU 67 + * @draft ICU 69 + * @see umeas_getPrefixBase + * @see umeas_getPrefixPower */ -typedef enum UMeasureSIPrefix { +typedef enum UMeasurePrefix { + /** + * The absence of an SI or binary prefix. + * + * The integer representation of this enum value is an arbitrary + * implementation detail and should not be relied upon: use + * umeas_getPrefixPower() to obtain meaningful values. + * + * @draft ICU 69 + */ + UMEASURE_PREFIX_ONE = 30 + 0, /** * SI prefix: yotta, 10^24. * - * @draft ICU 67 + * @draft ICU 69 + */ + UMEASURE_PREFIX_YOTTA = UMEASURE_PREFIX_ONE + 24, + + /** + * ICU use only. + * Used to determine the set of base-10 SI prefixes. + * @internal */ - UMEASURE_SI_PREFIX_YOTTA = 24, + UMEASURE_PREFIX_INTERNAL_MAX_SI = UMEASURE_PREFIX_YOTTA, /** * SI prefix: zetta, 10^21. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_ZETTA = 21, + UMEASURE_PREFIX_ZETTA = UMEASURE_PREFIX_ONE + 21, /** * SI prefix: exa, 10^18. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_EXA = 18, + UMEASURE_PREFIX_EXA = UMEASURE_PREFIX_ONE + 18, /** * SI prefix: peta, 10^15. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_PETA = 15, + UMEASURE_PREFIX_PETA = UMEASURE_PREFIX_ONE + 15, /** * SI prefix: tera, 10^12. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_TERA = 12, + UMEASURE_PREFIX_TERA = UMEASURE_PREFIX_ONE + 12, /** * SI prefix: giga, 10^9. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_GIGA = 9, + UMEASURE_PREFIX_GIGA = UMEASURE_PREFIX_ONE + 9, /** * SI prefix: mega, 10^6. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_MEGA = 6, + UMEASURE_PREFIX_MEGA = UMEASURE_PREFIX_ONE + 6, /** * SI prefix: kilo, 10^3. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_KILO = 3, + UMEASURE_PREFIX_KILO = UMEASURE_PREFIX_ONE + 3, /** * SI prefix: hecto, 10^2. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_HECTO = 2, + UMEASURE_PREFIX_HECTO = UMEASURE_PREFIX_ONE + 2, /** * SI prefix: deka, 10^1. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_DEKA = 1, - - /** - * The absence of an SI prefix. - * - * @draft ICU 67 - */ - UMEASURE_SI_PREFIX_ONE = 0, + UMEASURE_PREFIX_DEKA = UMEASURE_PREFIX_ONE + 1, /** * SI prefix: deci, 10^-1. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_DECI = -1, + UMEASURE_PREFIX_DECI = UMEASURE_PREFIX_ONE + -1, /** * SI prefix: centi, 10^-2. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_CENTI = -2, + UMEASURE_PREFIX_CENTI = UMEASURE_PREFIX_ONE + -2, /** * SI prefix: milli, 10^-3. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_MILLI = -3, + UMEASURE_PREFIX_MILLI = UMEASURE_PREFIX_ONE + -3, /** * SI prefix: micro, 10^-6. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_MICRO = -6, + UMEASURE_PREFIX_MICRO = UMEASURE_PREFIX_ONE + -6, /** * SI prefix: nano, 10^-9. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_NANO = -9, + UMEASURE_PREFIX_NANO = UMEASURE_PREFIX_ONE + -9, /** * SI prefix: pico, 10^-12. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_PICO = -12, + UMEASURE_PREFIX_PICO = UMEASURE_PREFIX_ONE + -12, /** * SI prefix: femto, 10^-15. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_FEMTO = -15, + UMEASURE_PREFIX_FEMTO = UMEASURE_PREFIX_ONE + -15, /** * SI prefix: atto, 10^-18. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_ATTO = -18, + UMEASURE_PREFIX_ATTO = UMEASURE_PREFIX_ONE + -18, /** * SI prefix: zepto, 10^-21. * - * @draft ICU 67 + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_ZEPTO = -21, + UMEASURE_PREFIX_ZEPTO = UMEASURE_PREFIX_ONE + -21, /** * SI prefix: yocto, 10^-24. * - * @draft ICU 67 + * @draft ICU 69 + */ + UMEASURE_PREFIX_YOCTO = UMEASURE_PREFIX_ONE + -24, + +#ifndef U_HIDE_INTERNAL_API + /** + * ICU use only. + * Used to determine the set of base-10 SI prefixes. + * @internal + */ + UMEASURE_PREFIX_INTERNAL_MIN_SI = UMEASURE_PREFIX_YOCTO, +#endif // U_HIDE_INTERNAL_API + + // Cannot conditionalize the following with #ifndef U_HIDE_INTERNAL_API, + // used in definitions of non-internal enum values + /** + * ICU use only. + * Sets the arbitrary offset of the base-1024 binary prefixes' enum values. + * @internal + */ + UMEASURE_PREFIX_INTERNAL_ONE_BIN = -60, + + /** + * Binary prefix: kibi, 1024^1. + * + * @draft ICU 69 + */ + UMEASURE_PREFIX_KIBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 1, + +#ifndef U_HIDE_INTERNAL_API + /** + * ICU use only. + * Used to determine the set of base-1024 binary prefixes. + * @internal + */ + UMEASURE_PREFIX_INTERNAL_MIN_BIN = UMEASURE_PREFIX_KIBI, +#endif // U_HIDE_INTERNAL_API + + /** + * Binary prefix: mebi, 1024^2. + * + * @draft ICU 69 + */ + UMEASURE_PREFIX_MEBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 2, + + /** + * Binary prefix: gibi, 1024^3. + * + * @draft ICU 69 + */ + UMEASURE_PREFIX_GIBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 3, + + /** + * Binary prefix: tebi, 1024^4. + * + * @draft ICU 69 */ - UMEASURE_SI_PREFIX_YOCTO = -24 -} UMeasureSIPrefix; + UMEASURE_PREFIX_TEBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 4, + + /** + * Binary prefix: pebi, 1024^5. + * + * @draft ICU 69 + */ + UMEASURE_PREFIX_PEBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 5, + + /** + * Binary prefix: exbi, 1024^6. + * + * @draft ICU 69 + */ + UMEASURE_PREFIX_EXBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 6, + + /** + * Binary prefix: zebi, 1024^7. + * + * @draft ICU 69 + */ + UMEASURE_PREFIX_ZEBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 7, + + /** + * Binary prefix: yobi, 1024^8. + * + * @draft ICU 69 + */ + UMEASURE_PREFIX_YOBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 8, + +#ifndef U_HIDE_INTERNAL_API + /** + * ICU use only. + * Used to determine the set of base-1024 binary prefixes. + * @internal + */ + UMEASURE_PREFIX_INTERNAL_MAX_BIN = UMEASURE_PREFIX_YOBI, +#endif // U_HIDE_INTERNAL_API +} UMeasurePrefix; + +/** + * Returns the base of the factor associated with the given unit prefix: the + * base is 10 for SI prefixes (kilo, micro) and 1024 for binary prefixes (kibi, + * mebi). + * + * @draft ICU 69 + */ +U_CAPI int32_t U_EXPORT2 umeas_getPrefixBase(UMeasurePrefix unitPrefix); + +/** + * Returns the exponent of the factor associated with the given unit prefix, for + * example 3 for kilo, -6 for micro, 1 for kibi, 2 for mebi, 3 for gibi. + * + * @draft ICU 69 + */ +U_CAPI int32_t U_EXPORT2 umeas_getPrefixPower(UMeasurePrefix unitPrefix); + #endif // U_HIDE_DRAFT_API /** @@ -250,27 +382,26 @@ class U_I18N_API MeasureUnit: public UObject { */ MeasureUnit(const MeasureUnit &other); -#ifndef U_HIDE_DRAFT_API /** * Move constructor. - * @draft ICU 67 + * @stable ICU 67 */ MeasureUnit(MeasureUnit &&other) noexcept; /** - * Construct a MeasureUnit from a CLDR Unit Identifier, defined in UTS 35. - * Validates and canonicalizes the identifier. + * Construct a MeasureUnit from a CLDR Core Unit Identifier, defined in UTS + * 35. (Core unit identifiers and mixed unit identifiers are supported, long + * unit identifiers are not.) Validates and canonicalizes the identifier. * *

      * MeasureUnit example = MeasureUnit::forIdentifier("furlong-per-nanosecond")
      * 
* - * @param identifier The CLDR Unit Identifier + * @param identifier The CLDR Unit Identifier. * @param status Set if the identifier is invalid. - * @draft ICU 67 + * @stable ICU 67 */ static MeasureUnit forIdentifier(StringPiece identifier, UErrorCode& status); -#endif // U_HIDE_DRAFT_API /** * Copy assignment operator. @@ -278,13 +409,11 @@ class U_I18N_API MeasureUnit: public UObject { */ MeasureUnit &operator=(const MeasureUnit &other); -#ifndef U_HIDE_DRAFT_API /** * Move assignment operator. - * @draft ICU 67 + * @stable ICU 67 */ MeasureUnit &operator=(MeasureUnit &&other) noexcept; -#endif // U_HIDE_DRAFT_API /** * Returns a polymorphic clone of this object. The result will @@ -333,12 +462,11 @@ class U_I18N_API MeasureUnit: public UObject { */ const char *getSubtype() const; -#ifndef U_HIDE_DRAFT_API /** - * Get the CLDR Unit Identifier for this MeasureUnit, as defined in UTS 35. + * Get CLDR Unit Identifier for this MeasureUnit, as defined in UTS 35. * * @return The string form of this unit, owned by this MeasureUnit. - * @draft ICU 67 + * @stable ICU 67 */ const char* getIdentifier() const; @@ -347,38 +475,43 @@ class U_I18N_API MeasureUnit: public UObject { * * @param status Set if an error occurs. * @return The unit complexity. - * @draft ICU 67 + * @stable ICU 67 */ UMeasureUnitComplexity getComplexity(UErrorCode& status) const; +#ifndef U_HIDE_DRAFT_API /** - * Creates a MeasureUnit which is this SINGLE unit augmented with the specified SI prefix. - * For example, UMEASURE_SI_PREFIX_KILO for "kilo". + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified prefix. + * For example, UMEASURE_PREFIX_KILO for "kilo", or UMEASURE_PREFIX_KIBI for "kibi". * - * There is sufficient locale data to format all standard SI prefixes. + * There is sufficient locale data to format all standard prefixes. * * NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will * occur. For more information, see UMeasureUnitComplexity. * - * @param prefix The SI prefix, from UMeasureSIPrefix. + * @param prefix The prefix, from UMeasurePrefix. * @param status Set if this is not a SINGLE unit or if another error occurs. * @return A new SINGLE unit. - * @draft ICU 67 + * @draft ICU 69 */ - MeasureUnit withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const; + MeasureUnit withPrefix(UMeasurePrefix prefix, UErrorCode& status) const; /** - * Gets the current SI prefix of this SINGLE unit. For example, if the unit has the SI prefix - * "kilo", then UMEASURE_SI_PREFIX_KILO is returned. + * Returns the current SI or binary prefix of this SINGLE unit. For example, + * if the unit has the prefix "kilo", then UMEASURE_PREFIX_KILO is + * returned. * * NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will * occur. For more information, see UMeasureUnitComplexity. * * @param status Set if this is not a SINGLE unit or if another error occurs. - * @return The SI prefix of this SINGLE unit, from UMeasureSIPrefix. - * @draft ICU 67 + * @return The prefix of this SINGLE unit, from UMeasurePrefix. + * @see umeas_getPrefixBase + * @see umeas_getPrefixPower + * @draft ICU 69 */ - UMeasureSIPrefix getSIPrefix(UErrorCode& status) const; + UMeasurePrefix getPrefix(UErrorCode& status) const; +#endif // U_HIDE_DRAFT_API /** * Creates a MeasureUnit which is this SINGLE unit augmented with the specified dimensionality @@ -392,7 +525,7 @@ class U_I18N_API MeasureUnit: public UObject { * @param dimensionality The dimensionality (power). * @param status Set if this is not a SINGLE unit or if another error occurs. * @return A new SINGLE unit. - * @draft ICU 67 + * @stable ICU 67 */ MeasureUnit withDimensionality(int32_t dimensionality, UErrorCode& status) const; @@ -407,7 +540,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @param status Set if this is not a SINGLE unit or if another error occurs. * @return The dimensionality (power) of this simple unit. - * @draft ICU 67 + * @stable ICU 67 */ int32_t getDimensionality(UErrorCode& status) const; @@ -421,7 +554,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @param status Set if this is a MIXED unit or if another error occurs. * @return The reciprocal of the target unit. - * @draft ICU 67 + * @stable ICU 67 */ MeasureUnit reciprocal(UErrorCode& status) const; @@ -440,10 +573,9 @@ class U_I18N_API MeasureUnit: public UObject { * @param other The MeasureUnit to multiply with the target. * @param status Set if this or other is a MIXED unit or if another error occurs. * @return The product of the target unit with the provided unit. - * @draft ICU 67 + * @stable ICU 67 */ MeasureUnit product(const MeasureUnit& other, UErrorCode& status) const; -#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DRAFT_API /** @@ -547,7 +679,7 @@ class U_I18N_API MeasureUnit: public UObject { // the "End generated createXXX methods" comment is auto generated code // and must not be edited manually. For instructions on how to correctly // update this code, refer to: -// http://site.icu-project.org/design/formatting/measureformat/updating-measure-unit +// docs/processes/release/tasks/updating-measure-unit.md // // Start generated createXXX methods @@ -839,6 +971,24 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit getKarat(); +#ifndef U_HIDE_DRAFT_API + /** + * Returns by pointer, unit of concentr: milligram-ofglucose-per-deciliter. + * Caller owns returned value and must free it. + * Also see {@link #getMilligramOfglucosePerDeciliter()}. + * @param status ICU error code. + * @draft ICU 69 + */ + static MeasureUnit *createMilligramOfglucosePerDeciliter(UErrorCode &status); + + /** + * Returns by value, unit of concentr: milligram-ofglucose-per-deciliter. + * Also see {@link #createMilligramOfglucosePerDeciliter()}. + * @draft ICU 69 + */ + static MeasureUnit getMilligramOfglucosePerDeciliter(); +#endif /* U_HIDE_DRAFT_API */ + /** * Returns by pointer, unit of concentr: milligram-per-deciliter. * Caller owns returned value and must free it. @@ -3519,7 +3669,6 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit getTeaspoon(); - // End generated createXXX methods protected: @@ -3569,10 +3718,14 @@ class U_I18N_API MeasureUnit: public UObject { /** Internal version of public API */ LocalArray splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const; - friend struct MeasureUnitImpl; + friend class MeasureUnitImpl; + + // For access to findBySubType + friend class number::impl::LongNameHandler; }; -#ifndef U_HIDE_DRAFT_API // @draft ICU 68 +#ifndef U_HIDE_DRAFT_API +// inline impl of @draft ICU 68 method inline std::pair, int32_t> MeasureUnit::splitToSingleUnits(UErrorCode& status) const { int32_t length; diff --git a/deps/icu-small/source/i18n/unicode/msgfmt.h b/deps/icu-small/source/i18n/unicode/msgfmt.h index 2d9bc8f2e2b0f8..14b57a114dc3a5 100644 --- a/deps/icu-small/source/i18n/unicode/msgfmt.h +++ b/deps/icu-small/source/i18n/unicode/msgfmt.h @@ -132,7 +132,7 @@ class NumberFormat; *
  • messageText can contain quoted literal strings including syntax characters. * A quoted literal string begins with an ASCII apostrophe and a syntax character * (usually a {curly brace}) and continues until the next single apostrophe. - * A double ASCII apostrohpe inside or outside of a quoted string represents + * A double ASCII apostrophe inside or outside of a quoted string represents * one literal apostrophe. *
  • Quotable syntax characters are the {curly braces} in all messageText parts, * plus the '#' sign in a messageText immediately inside a pluralStyle, diff --git a/deps/icu-small/source/i18n/unicode/numberformatter.h b/deps/icu-small/source/i18n/unicode/numberformatter.h index 06329b8e7aa024..b987e64b937455 100644 --- a/deps/icu-small/source/i18n/unicode/numberformatter.h +++ b/deps/icu-small/source/i18n/unicode/numberformatter.h @@ -28,10 +28,9 @@ /** * \file - * \brief C++ API: Library for localized number formatting introduced in ICU 60. + * \brief C++ API: All-in-one formatter for localized numbers, currencies, and units. * - * This library was introduced in ICU 60 to simplify the process of formatting localized number strings. - * Basic usage examples: + * For a full list of options, see icu::number::NumberFormatterSettings. * *
      * // Most basic usage:
    @@ -347,15 +346,15 @@ class U_I18N_API Notation : public UMemory {
     
         union NotationUnion {
             // For NTN_SCIENTIFIC
    -        /** @internal */
    +        /** @internal (private) */
             struct ScientificSettings {
    -            /** @internal */
    +            /** @internal (private) */
                 int8_t fEngineeringInterval;
    -            /** @internal */
    +            /** @internal (private) */
                 bool fRequireMinInt;
    -            /** @internal */
    +            /** @internal (private) */
                 impl::digits_t fMinExponentDigits;
    -            /** @internal */
    +            /** @internal (private) */
                 UNumberSignDisplay fExponentSignDisplay;
             } scientific;
     
    @@ -660,6 +659,17 @@ class U_I18N_API Precision : public UMemory {
          */
         static CurrencyPrecision currency(UCurrencyUsage currencyUsage);
     
    +#ifndef U_HIDE_DRAFT_API
    +    /**
    +     * Configure how trailing zeros are displayed on numbers. For example, to hide trailing zeros
    +     * when the number is an integer, use UNUM_TRAILING_ZERO_HIDE_IF_WHOLE.
    +     *
    +     * @param trailingZeroDisplay Option to configure the display of trailing zeros.
    +     * @draft ICU 69
    +     */
    +    Precision trailingZeroDisplay(UNumberTrailingZeroDisplay trailingZeroDisplay) const;
    +#endif // U_HIDE_DRAFT_API
    +
       private:
         enum PrecisionType {
             RND_BOGUS,
    @@ -684,32 +694,36 @@ class U_I18N_API Precision : public UMemory {
         } fType;
     
         union PrecisionUnion {
    -        /** @internal */
    +        /** @internal (private) */
             struct FractionSignificantSettings {
                 // For RND_FRACTION, RND_SIGNIFICANT, and RND_FRACTION_SIGNIFICANT
    -            /** @internal */
    +            /** @internal (private) */
                 impl::digits_t fMinFrac;
    -            /** @internal */
    +            /** @internal (private) */
                 impl::digits_t fMaxFrac;
    -            /** @internal */
    +            /** @internal (private) */
                 impl::digits_t fMinSig;
    -            /** @internal */
    +            /** @internal (private) */
                 impl::digits_t fMaxSig;
    +            /** @internal (private) */
    +            UNumberRoundingPriority fPriority;
             } fracSig;
    -        /** @internal */
    +        /** @internal (private) */
             struct IncrementSettings {
                 // For RND_INCREMENT, RND_INCREMENT_ONE, and RND_INCREMENT_FIVE
    -            /** @internal */
    +            /** @internal (private) */
                 double fIncrement;
    -            /** @internal */
    +            /** @internal (private) */
                 impl::digits_t fMinFrac;
    -            /** @internal */
    +            /** @internal (private) */
                 impl::digits_t fMaxFrac;
             } increment;
             UCurrencyUsage currencyUsage; // For RND_CURRENCY
             UErrorCode errorCode; // For RND_ERROR
         } fUnion;
     
    +    UNumberTrailingZeroDisplay fTrailingZeroDisplay = UNUM_TRAILING_ZERO_AUTO;
    +
         typedef PrecisionUnion::FractionSignificantSettings FractionSignificantSettings;
         typedef PrecisionUnion::IncrementSettings IncrementSettings;
     
    @@ -741,8 +755,11 @@ class U_I18N_API Precision : public UMemory {
     
         static Precision constructSignificant(int32_t minSig, int32_t maxSig);
     
    -    static Precision
    -    constructFractionSignificant(const FractionPrecision &base, int32_t minSig, int32_t maxSig);
    +    static Precision constructFractionSignificant(
    +        const FractionPrecision &base,
    +        int32_t minSig,
    +        int32_t maxSig,
    +        UNumberRoundingPriority priority);
     
         static IncrementPrecision constructIncrement(double increment, int32_t minFrac);
     
    @@ -784,16 +801,38 @@ class U_I18N_API Precision : public UMemory {
      */
     class U_I18N_API FractionPrecision : public Precision {
       public:
    +#ifndef U_HIDE_DRAFT_API
         /**
    -     * Ensure that no less than this number of significant digits are retained when rounding according to fraction
    -     * rules.
    +     * Override maximum fraction digits with maximum significant digits depending on the magnitude
    +     * of the number. See UNumberRoundingPriority.
          *
    -     * 

    - * For example, with integer rounding, the number 3.141 becomes "3". However, with minimum figures set to 2, 3.141 - * becomes "3.1" instead. + * @param minSignificantDigits + * Pad trailing zeros to achieve this minimum number of significant digits. + * @param maxSignificantDigits + * Round the number to achieve this maximum number of significant digits. + * @param priority + * How to disambiguate between fraction digits and significant digits. + * @return A precision for chaining or passing to the NumberFormatter precision() setter. * - *

    - * This setting does not affect the number of trailing zeros. For example, 3.01 would print as "3", not "3.0". + * @draft ICU 69 + */ + Precision withSignificantDigits( + int32_t minSignificantDigits, + int32_t maxSignificantDigits, + UNumberRoundingPriority priority) const; +#endif // U_HIDE_DRAFT_API + + /** + * Ensure that no less than this number of significant digits are retained when rounding + * according to fraction rules. + * + * For example, with integer rounding, the number 3.141 becomes "3". However, with minimum + * figures set to 2, 3.141 becomes "3.1" instead. + * + * This setting does not affect the number of trailing zeros. For example, 3.01 would print as + * "3", not "3.0". + * + * This is equivalent to `withSignificantDigits(1, minSignificantDigits, RELAXED)`. * * @param minSignificantDigits * The number of significant figures to guarantee. @@ -803,16 +842,16 @@ class U_I18N_API FractionPrecision : public Precision { Precision withMinDigits(int32_t minSignificantDigits) const; /** - * Ensure that no more than this number of significant digits are retained when rounding according to fraction - * rules. + * Ensure that no more than this number of significant digits are retained when rounding + * according to fraction rules. * - *

    - * For example, with integer rounding, the number 123.4 becomes "123". However, with maximum figures set to 2, 123.4 - * becomes "120" instead. + * For example, with integer rounding, the number 123.4 becomes "123". However, with maximum + * figures set to 2, 123.4 becomes "120" instead. * - *

    - * This setting does not affect the number of trailing zeros. For example, with fixed fraction of 2, 123.4 would - * become "120.00". + * This setting does not affect the number of trailing zeros. For example, with fixed fraction + * of 2, 123.4 would become "120.00". + * + * This is equivalent to `withSignificantDigits(1, maxSignificantDigits, STRICT)`. * * @param maxSignificantDigits * Round the number to no more than this number of significant figures. @@ -1131,33 +1170,35 @@ class U_I18N_API Scale : public UMemory { namespace impl { -// Do not enclose entire Usage with #ifndef U_HIDE_INTERNAL_API, needed for a protected field +// Do not enclose entire StringProp with #ifndef U_HIDE_INTERNAL_API, needed for a protected field /** * Manages NumberFormatterSettings::usage()'s char* instance on the heap. * @internal */ -class U_I18N_API Usage : public UMemory { +class U_I18N_API StringProp : public UMemory { #ifndef U_HIDE_INTERNAL_API public: /** @internal */ - Usage(const Usage& other); + StringProp(const StringProp &other); /** @internal */ - Usage& operator=(const Usage& other); + StringProp &operator=(const StringProp &other); /** @internal */ - Usage(Usage &&src) U_NOEXCEPT; + StringProp(StringProp &&src) U_NOEXCEPT; /** @internal */ - Usage& operator=(Usage&& src) U_NOEXCEPT; + StringProp &operator=(StringProp &&src) U_NOEXCEPT; /** @internal */ - ~Usage(); + ~StringProp(); /** @internal */ - int16_t length() const { return fLength; } + int16_t length() const { + return fLength; + } /** @internal * Makes a copy of value. Set to "" to unset. @@ -1165,18 +1206,21 @@ class U_I18N_API Usage : public UMemory { void set(StringPiece value); /** @internal */ - bool isSet() const { return fLength > 0; } + bool isSet() const { + return fLength > 0; + } #endif // U_HIDE_INTERNAL_API private: - char *fUsage; + char *fValue; int16_t fLength; UErrorCode fError; - Usage() : fUsage(nullptr), fLength(0), fError(U_ZERO_ERROR) {} + StringProp() : fValue(nullptr), fLength(0), fError(U_ZERO_ERROR) { + } - /** @internal */ + /** @internal (private) */ UBool copyErrorTo(UErrorCode &status) const { if (U_FAILURE(fError)) { status = fError; @@ -1185,7 +1229,7 @@ class U_I18N_API Usage : public UMemory { return false; } - // Allow NumberFormatterImpl to access fUsage. + // Allow NumberFormatterImpl to access fValue. friend class impl::NumberFormatterImpl; // Allow skeleton generation code to access private members. @@ -1480,7 +1524,10 @@ struct U_I18N_API MacroProps : public UMemory { Scale scale; // = Scale(); (benign value) /** @internal */ - Usage usage; // = Usage(); (no usage) + StringProp usage; // = StringProp(); (no usage) + + /** @internal */ + StringProp unitDisplayCase; // = StringProp(); (nominative) /** @internal */ const AffixPatternProvider* affixProvider = nullptr; // no ownership @@ -1503,7 +1550,8 @@ struct U_I18N_API MacroProps : public UMemory { bool copyErrorTo(UErrorCode &status) const { return notation.copyErrorTo(status) || precision.copyErrorTo(status) || padder.copyErrorTo(status) || integerWidth.copyErrorTo(status) || - symbols.copyErrorTo(status) || scale.copyErrorTo(status) || usage.copyErrorTo(status); + symbols.copyErrorTo(status) || scale.copyErrorTo(status) || usage.copyErrorTo(status) || + unitDisplayCase.copyErrorTo(status); } }; @@ -2171,6 +2219,25 @@ class U_I18N_API NumberFormatterSettings { Derived usage(StringPiece usage) &&; #endif // U_HIDE_DRAFT_API +#ifndef U_HIDE_DRAFT_API +#ifndef U_HIDE_INTERNAL_API + /** + * Specifies the desired case for a unit formatter's output (e.g. + * accusative, dative, genitive). + * + * @internal ICU 69 technology preview + */ + Derived unitDisplayCase(StringPiece unitDisplayCase) const &; + + /** + * Overload of unitDisplayCase() for use on an rvalue reference. + * + * @internal ICU 69 technology preview + */ + Derived unitDisplayCase(StringPiece unitDisplayCase) &&; +#endif // U_HIDE_INTERNAL_API +#endif // U_HIDE_DRAFT_API + #ifndef U_HIDE_INTERNAL_API /** @@ -2223,6 +2290,9 @@ class U_I18N_API NumberFormatterSettings { * The returned skeleton is in normalized form, such that two number formatters with equivalent * behavior should produce the same skeleton. * + * For more information on number skeleton strings, see: + * https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html + * * @return A number skeleton string with behavior corresponding to this number formatter. * @stable ICU 62 */ @@ -2658,6 +2728,14 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { * @draft ICU 68 */ MeasureUnit getOutputUnit(UErrorCode& status) const; + + /** + * Gets the gender of the formatted output. Returns "" when the gender is + * unknown, or for ungendered languages. + * + * @internal ICU 69 technology preview. + */ + const char *getGender(UErrorCode& status) const; #endif // U_HIDE_DRAFT_API #ifndef U_HIDE_INTERNAL_API @@ -2685,7 +2763,7 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { /** * Internal constructor from data type. Adopts the data pointer. - * @internal + * @internal (private) */ explicit FormattedNumber(impl::UFormattedNumberData *results) : fData(results), fErrorCode(U_ZERO_ERROR) {} @@ -2702,8 +2780,6 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { friend struct impl::UFormattedNumberImpl; }; -#ifndef U_HIDE_DRAFT_API -// Note: This is draft ICU 65 template StringClass FormattedNumber::toDecimalNumber(UErrorCode& status) const { StringClass result; @@ -2711,7 +2787,6 @@ StringClass FormattedNumber::toDecimalNumber(UErrorCode& status) const { toDecimalNumber(sink, status); return result; } -#endif // U_HIDE_DRAFT_API /** * See the main description in numberformatter.h for documentation and examples. @@ -2747,6 +2822,9 @@ class U_I18N_API NumberFormatter final { * It is possible for an error to occur while parsing. See the overload of this method if you are * interested in the location of a possible parse error. * + * For more information on number skeleton strings, see: + * https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html + * * @param skeleton * The skeleton string off of which to base this NumberFormatter. * @param status @@ -2763,6 +2841,9 @@ class U_I18N_API NumberFormatter final { * If an error occurs while parsing the skeleton string, the offset into the skeleton string at * which the error occurred will be saved into the UParseError, if provided. * + * For more information on number skeleton strings, see: + * https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html + * * @param skeleton * The skeleton string off of which to base this NumberFormatter. * @param perror diff --git a/deps/icu-small/source/i18n/unicode/numberrangeformatter.h b/deps/icu-small/source/i18n/unicode/numberrangeformatter.h index 67339bb6e68da8..432f2f6095dedf 100644 --- a/deps/icu-small/source/i18n/unicode/numberrangeformatter.h +++ b/deps/icu-small/source/i18n/unicode/numberrangeformatter.h @@ -73,7 +73,7 @@ struct UFormattedNumberRangeImpl; * Export an explicit template instantiation. See datefmt.h * (When building DLLs for Windows this is required.) */ -#if U_PLATFORM == U_PF_WINDOWS && !defined(U_IN_DOXYGEN) +#if U_PLATFORM == U_PF_WINDOWS && !defined(U_IN_DOXYGEN) && !defined(U_STATIC_IMPLEMENTATION) } // namespace icu::number U_NAMESPACE_END @@ -608,49 +608,6 @@ class U_I18N_API FormattedNumberRange : public UMemory, public FormattedValue { /** @copydoc FormattedValue::nextPosition() */ UBool nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const U_OVERRIDE; -#ifndef U_HIDE_DEPRECATED_API - /** - * Export the first formatted number as a decimal number. This endpoint - * is useful for obtaining the exact number being printed after scaling - * and rounding have been applied by the number range formatting pipeline. - * - * The syntax of the unformatted number is a "numeric string" - * as defined in the Decimal Arithmetic Specification, available at - * http://speleotrove.com/decimal - * - * TODO(ICU-21275): This function will be removed in ICU 69. - * Use getDecimalNumbers() instead. - * - * @param status Set if an error occurs. - * @return A decimal representation of the first formatted number. - * @deprecated ICU 68 Use getDecimalNumbers instead. - * @see NumberRangeFormatter - * @see #getSecondDecimal - */ - UnicodeString getFirstDecimal(UErrorCode& status) const; - - /** - * Export the second formatted number as a decimal number. This endpoint - * is useful for obtaining the exact number being printed after scaling - * and rounding have been applied by the number range formatting pipeline. - * - * The syntax of the unformatted number is a "numeric string" - * as defined in the Decimal Arithmetic Specification, available at - * http://speleotrove.com/decimal - * - * TODO(ICU-21275): This function will be removed in ICU 69. - * Use getDecimalNumbers() instead. - * - * @param status Set if an error occurs. - * @return A decimal representation of the second formatted number. - * @deprecated ICU 68 Use getDecimalNumbers instead. - * @see NumberRangeFormatter - * @see #getFirstDecimal - */ - UnicodeString getSecondDecimal(UErrorCode& status) const; -#endif // U_HIDE_DEPRECATED_API - - #ifndef U_HIDE_DRAFT_API /** * Extracts the formatted range as a pair of decimal numbers. This endpoint @@ -749,7 +706,7 @@ class U_I18N_API FormattedNumberRange : public UMemory, public FormattedValue { }; #ifndef U_HIDE_DRAFT_API -// Note: This is draft ICU 68 +// inline impl of @draft ICU 68 method template std::pair FormattedNumberRange::getDecimalNumbers(UErrorCode& status) const { StringClass str1; diff --git a/deps/icu-small/source/i18n/unicode/rbnf.h b/deps/icu-small/source/i18n/unicode/rbnf.h index 13bb7385b19921..ce60b9bec6853a 100644 --- a/deps/icu-small/source/i18n/unicode/rbnf.h +++ b/deps/icu-small/source/i18n/unicode/rbnf.h @@ -543,7 +543,7 @@ enum URBNFRuleSetTag { * names in this array will be treated as public rule set names by the API. Each subsequent * element is an array of localizations of these names. The first element of one of these * subarrays is the locale name, and the remaining elements are localizations of the - * public rule set names, in the same order as they were listed in the first arrray.

    + * public rule set names, in the same order as they were listed in the first array.

    *

    In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used * to separate elements of an array. Whitespace is ignored, unless quoted.

    *

    For example:

    @@ -653,7 +653,7 @@ class U_I18N_API RuleBasedNumberFormat : public NumberFormat {
     
       /**
        * Creates a RuleBasedNumberFormat from a predefined ruleset.  The selector
    -   * code choosed among three possible predefined formats: spellout, ordinal,
    +   * code chose among three possible predefined formats: spellout, ordinal,
        * and duration.
        * @param tag A selector code specifying which kind of formatter to create for that
        * locale.  There are four legal values: URBNF_SPELLOUT, which creates a formatter that
    diff --git a/deps/icu-small/source/i18n/unicode/rbtz.h b/deps/icu-small/source/i18n/unicode/rbtz.h
    index f7b45fb8ed540d..9fc0fd4657e7b8 100644
    --- a/deps/icu-small/source/i18n/unicode/rbtz.h
    +++ b/deps/icu-small/source/i18n/unicode/rbtz.h
    @@ -107,7 +107,7 @@ class U_I18N_API RuleBasedTimeZone : public BasicTimeZone {
     
         /**
          * Makes the TimeZoneRule ready to handle actual timezone
    -     * calcuation APIs.  This method collects time zone rules specified
    +     * calculation APIs.  This method collects time zone rules specified
          * by the caller via the constructor and addTransitionRule() and
          * builds internal structure for making the object ready to support
          * time zone APIs such as getOffset(), getNextTransition() and others.
    @@ -302,12 +302,16 @@ class U_I18N_API RuleBasedTimeZone : public BasicTimeZone {
         virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
             const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const;
     
    +#ifndef U_FORCE_HIDE_DRAFT_API
         /**
          * Get time zone offsets from local wall time.
    -     * @internal
    +     * @draft ICU 69
          */
    -    virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
    +    virtual void getOffsetFromLocal(
    +        UDate date, UTimeZoneLocalOption nonExistingTimeOpt,
    +        UTimeZoneLocalOption duplicatedTimeOpt,
             int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const;
    +#endif /* U_FORCE_HIDE_DRAFT_API */
     
     private:
         void deleteRules(void);
    diff --git a/deps/icu-small/source/i18n/unicode/simpletz.h b/deps/icu-small/source/i18n/unicode/simpletz.h
    index 8e5a877dabaddb..eb888cea675ea0 100644
    --- a/deps/icu-small/source/i18n/unicode/simpletz.h
    +++ b/deps/icu-small/source/i18n/unicode/simpletz.h
    @@ -620,12 +620,16 @@ class U_I18N_API SimpleTimeZone: public BasicTimeZone {
         virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
                                int32_t& dstOffset, UErrorCode& ec) const;
     
    +#ifndef U_FORCE_HIDE_DRAFT_API
         /**
          * Get time zone offsets from local wall time.
    -     * @internal
    +     * @draft ICU 69
          */
    -    virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
    +    virtual void getOffsetFromLocal(
    +        UDate date, UTimeZoneLocalOption nonExistingTimeOpt,
    +        UTimeZoneLocalOption duplicatedTimeOpt,
             int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const;
    +#endif /* U_FORCE_HIDE_DRAFT_API */
     
         /**
          * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
    diff --git a/deps/icu-small/source/i18n/unicode/tblcoll.h b/deps/icu-small/source/i18n/unicode/tblcoll.h
    index a004dd6644c394..1709e836dd4e98 100644
    --- a/deps/icu-small/source/i18n/unicode/tblcoll.h
    +++ b/deps/icu-small/source/i18n/unicode/tblcoll.h
    @@ -112,7 +112,7 @@ class UVector64;
      * Note, RuleBasedCollator is not to be subclassed.
      * @see        Collator
      */
    -class U_I18N_API RuleBasedCollator : public Collator {
    +class U_I18N_API RuleBasedCollator U_FINAL : public Collator {
     public:
         /**
          * RuleBasedCollator constructor. This takes the table rules and builds a
    diff --git a/deps/icu-small/source/i18n/unicode/translit.h b/deps/icu-small/source/i18n/unicode/translit.h
    index 2aa02c39f13446..0556b740fab792 100644
    --- a/deps/icu-small/source/i18n/unicode/translit.h
    +++ b/deps/icu-small/source/i18n/unicode/translit.h
    @@ -375,7 +375,7 @@ class TransliteratorIDParser;
      *
      * 

    It is also possible to match the beginning or the end of the text using a UnicodeSet. * This is done by including a virtual anchor character '$' at the end of the - * set pattern. Although this is usually the match chafacter for the end anchor, the set will + * set pattern. Although this is usually the match character for the end anchor, the set will * match either the beginning or the end of the text, depending on its placement. For * example: * @@ -683,8 +683,8 @@ class U_I18N_API Transliterator : public UObject { * unambiguous transliterations. After the last call to this * method, there may be untransliterated text that is waiting for * more input to resolve an ambiguity. In order to perform these - * pending transliterations, clients should call {@link - * #finishTransliteration } after the last call to this + * pending transliterations, clients should call + * {@link #finishTransliteration } after the last call to this * method has been made. * * @param text the buffer holding transliterated and untransliterated text @@ -741,8 +741,7 @@ class U_I18N_API Transliterator : public UObject { /** * Transliterates the portion of the text buffer that can be * transliterated unambiguosly. This is a convenience method; see - * {@link - * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const } + * {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const } * for details. * @param text the buffer holding transliterated and * untransliterated text @@ -761,8 +760,7 @@ class U_I18N_API Transliterator : public UObject { * transliterate(). * @param text the buffer holding transliterated and * untransliterated text. - * @param index the array of indices previously passed to {@link - * #transliterate } + * @param index the array of indices previously passed to {@link #transliterate } * @stable ICU 2.0 */ virtual void finishTransliteration(Replaceable& text, @@ -883,7 +881,7 @@ class U_I18N_API Transliterator : public UObject { * @param text the text to be transliterated * @param index the position indices * @param incremental if true, then assume more characters may be inserted - * at index.limit, and postpone processing to accomodate future incoming + * at index.limit, and postpone processing to accommodate future incoming * characters * @stable ICU 2.4 */ @@ -913,7 +911,7 @@ class U_I18N_API Transliterator : public UObject { * @param text the text to be transliterated * @param index the position indices * @param incremental if true, then assume more characters may be inserted - * at index.limit, and postpone processing to accomodate future incoming + * at index.limit, and postpone processing to accommodate future incoming * characters * @param rollback if true and if incremental is true, then perform special * incremental processing, as described above, and undo partial @@ -968,8 +966,8 @@ class U_I18N_API Transliterator : public UObject { /** * Returns a name for this transliterator that is appropriate for - * display to the user in the default locale. See {@link - * #getDisplayName } for details. + * display to the user in the default locale. See {@link #getDisplayName } + * for details. * @param ID the string identifier for this transliterator * @param result Output param to receive the display name * @return A reference to 'result'. @@ -1168,8 +1166,8 @@ class U_I18N_API Transliterator : public UObject { * input text by this Transliterator. This incorporates this * object's current filter; if the filter is changed, the return * value of this function will change. The default implementation - * returns an empty set. Some subclasses may override {@link - * #handleGetSourceSet } to return a more precise result. The + * returns an empty set. Some subclasses may override + * {@link #handleGetSourceSet } to return a more precise result. The * return result is approximate in any case and is intended for * use by tests, tools, or utilities. * @param result receives result set; previous contents lost diff --git a/deps/icu-small/source/i18n/unicode/tznames.h b/deps/icu-small/source/i18n/unicode/tznames.h index 2e20eff6089302..19858cd7e2cf6d 100644 --- a/deps/icu-small/source/i18n/unicode/tznames.h +++ b/deps/icu-small/source/i18n/unicode/tznames.h @@ -193,7 +193,7 @@ class U_I18N_API TimeZoneNames : public UObject { /** * Returns an enumeration of all available meta zone IDs used by the given time zone. - * @param tzID The canoical tiem zone ID. + * @param tzID The canonical time zone ID. * @param status Receives the status. * @return an enumeration object, owned by the caller. * @stable ICU 50 diff --git a/deps/icu-small/source/i18n/unicode/tzrule.h b/deps/icu-small/source/i18n/unicode/tzrule.h index c6d6b9631fcd69..2a983ef58ac448 100644 --- a/deps/icu-small/source/i18n/unicode/tzrule.h +++ b/deps/icu-small/source/i18n/unicode/tzrule.h @@ -372,7 +372,7 @@ class U_I18N_API InitialTimeZoneRule : public TimeZoneRule { /** * AnnualTimeZoneRule is a class used for representing a time zone - * rule which takes effect annually. The calenday system used for the rule is + * rule which takes effect annually. The calendar system used for the rule is * is based on Gregorian calendar * * @stable ICU 3.8 diff --git a/deps/icu-small/source/i18n/unicode/ucal.h b/deps/icu-small/source/i18n/unicode/ucal.h index d491f5d610f95a..04c4a25de99951 100644 --- a/deps/icu-small/source/i18n/unicode/ucal.h +++ b/deps/icu-small/source/i18n/unicode/ucal.h @@ -1617,6 +1617,109 @@ U_CAPI int32_t U_EXPORT2 ucal_getTimeZoneIDForWindowsID(const UChar* winid, int32_t len, const char* region, UChar* id, int32_t idCapacity, UErrorCode* status); +#ifndef U_FORCE_HIDE_DRAFT_API +/** + * Options used by ucal_getTimeZoneOffsetFromLocal and BasicTimeZone::getOffsetFromLocal() + * to specify how to interpret an input time when it does not exist, or when it is ambiguous, + * around a time zone transition. + * @draft ICU 69 + */ +enum UTimeZoneLocalOption { +#ifndef U_HIDE_DRAFT_API + /** + * An input time is always interpreted as local time before + * a time zone transition. + * @draft ICU 69 + */ + UCAL_TZ_LOCAL_FORMER = 0x04, + /** + * An input time is always interpreted as local time after + * a time zone transition. + * @draft ICU 69 + */ + UCAL_TZ_LOCAL_LATTER = 0x0C, + /** + * An input time is interpreted as standard time when local + * time is switched to/from daylight saving time. When both + * sides of a time zone transition are standard time, + * or daylight saving time, the local time before the + * transition is used. + * @draft ICU 69 + */ + UCAL_TZ_LOCAL_STANDARD_FORMER = UCAL_TZ_LOCAL_FORMER | 0x01, + /** + * An input time is interpreted as standard time when local + * time is switched to/from daylight saving time. When both + * sides of a time zone transition are standard time, + * or daylight saving time, the local time after the + * transition is used. + * @draft ICU 69 + */ + UCAL_TZ_LOCAL_STANDARD_LATTER = UCAL_TZ_LOCAL_LATTER | 0x01, + /** + * An input time is interpreted as daylight saving time when + * local time is switched to/from standard time. When both + * sides of a time zone transition are standard time, + * or daylight saving time, the local time before the + * transition is used. + * @draft ICU 69 + */ + UCAL_TZ_LOCAL_DAYLIGHT_FORMER = UCAL_TZ_LOCAL_FORMER | 0x03, + /** + * An input time is interpreted as daylight saving time when + * local time is switched to/from standard time. When both + * sides of a time zone transition are standard time, + * or daylight saving time, the local time after the + * transition is used. + * @draft ICU 69 + */ + UCAL_TZ_LOCAL_DAYLIGHT_LATTER = UCAL_TZ_LOCAL_LATTER | 0x03, +#else /* U_HIDE_DRAFT_API */ + /** + * Dummy value to prevent empty enum if U_HIDE_DRAFT_API. + * This will go away when draft conditionals are removed. + * @internal + */ + UCAL_TZ_LOCAL_NONE = 0, +#endif /* U_HIDE_DRAFT_API */ +}; +typedef enum UTimeZoneLocalOption UTimeZoneLocalOption; /**< @draft ICU 69 */ + +/** +* Returns the time zone raw and GMT offset for the given moment +* in time. Upon return, local-millis = GMT-millis + rawOffset + +* dstOffset. All computations are performed in the proleptic +* Gregorian calendar. +* +* @param cal The UCalendar which specify the local date and time value to query. +* @param nonExistingTimeOpt The option to indicate how to interpret the date and +* time in the calendar represent a local time that skipped at a positive time +* zone transitions (e.g. when the daylight saving time starts or the time zone +* offset is increased due to a time zone rule change). +* @param duplicatedTimeOpt The option to indicate how to interpret the date and +* time in the calendar represent a local time that repeating multiple times at a +* negative time zone transition (e.g. when the daylight saving time ends or the +* time zone offset is decreased due to a time zone rule change) +* @param rawOffset output parameter to receive the raw offset, that +* is, the offset not including DST adjustments. +* If the status is set to one of the error code, the value set is unspecified. +* @param dstOffset output parameter to receive the DST offset, +* that is, the offset to be added to `rawOffset' to obtain the +* total offset between local and GMT time. If DST is not in +* effect, this value is zero; otherwise it is a positive value, +* typically one hour. +* If the status is set to one of the error code, the value set is unspecified. +* @param status A pointer to a UErrorCode to receive any errors. +* @draft ICU 69 +*/ +U_CAPI void U_EXPORT2 +ucal_getTimeZoneOffsetFromLocal( + const UCalendar* cal, + UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t* rawOffset, int32_t* dstOffset, UErrorCode* status); +#endif /* U_FORCE_HIDE_DRAFT_API */ + #endif /* #if !UCONFIG_NO_FORMATTING */ #endif diff --git a/deps/icu-small/source/i18n/unicode/ucol.h b/deps/icu-small/source/i18n/unicode/ucol.h index 83774bc8ec6769..6122cc1d59cbf3 100644 --- a/deps/icu-small/source/i18n/unicode/ucol.h +++ b/deps/icu-small/source/i18n/unicode/ucol.h @@ -83,7 +83,7 @@ typedef enum { } UCollationResult ; -/** Enum containing attribute values for controling collation behavior. +/** Enum containing attribute values for controlling collation behavior. * Here are all the allowable values. Not every attribute can take every value. The only * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined * value for that locale diff --git a/deps/icu-small/source/i18n/unicode/udat.h b/deps/icu-small/source/i18n/unicode/udat.h index ec25eba8085223..2963e5506c821d 100644 --- a/deps/icu-small/source/i18n/unicode/udat.h +++ b/deps/icu-small/source/i18n/unicode/udat.h @@ -976,37 +976,35 @@ udat_getBooleanAttribute(const UDateFormat* fmt, UDateFormatBooleanAttribute att U_CAPI void U_EXPORT2 udat_setBooleanAttribute(UDateFormat *fmt, UDateFormatBooleanAttribute attr, UBool newValue, UErrorCode* status); -#ifndef U_HIDE_DRAFT_API /** * Hour Cycle. - * @draft ICU 67 + * @stable ICU 67 */ typedef enum UDateFormatHourCycle { /** * Hour in am/pm (0~11) - * @draft ICU 67 + * @stable ICU 67 */ UDAT_HOUR_CYCLE_11, /** * Hour in am/pm (1~12) - * @draft ICU 67 + * @stable ICU 67 */ UDAT_HOUR_CYCLE_12, /** * Hour in day (0~23) - * @draft ICU 67 + * @stable ICU 67 */ UDAT_HOUR_CYCLE_23, /** * Hour in day (1~24) - * @draft ICU 67 + * @stable ICU 67 */ UDAT_HOUR_CYCLE_24 } UDateFormatHourCycle; -#endif /* U_HIDE_DRAFT_API */ #if U_SHOW_CPLUSPLUS_API diff --git a/deps/icu-small/source/i18n/unicode/udateintervalformat.h b/deps/icu-small/source/i18n/unicode/udateintervalformat.h index a840ed595b45cb..9ed53a87d68904 100644 --- a/deps/icu-small/source/i18n/unicode/udateintervalformat.h +++ b/deps/icu-small/source/i18n/unicode/udateintervalformat.h @@ -252,7 +252,6 @@ udtitvfmt_format(const UDateIntervalFormat* formatter, UErrorCode* status); -#ifndef U_HIDE_DRAFT_API /** * Formats a date/time range using the conventions established for the * UDateIntervalFormat object. @@ -267,7 +266,7 @@ udtitvfmt_format(const UDateIntervalFormat* formatter, * formatting operation. * @param status * A pointer to a UErrorCode to receive any errors. - * @draft ICU 67 + * @stable ICU 67 */ U_CAPI void U_EXPORT2 udtitvfmt_formatToResult( @@ -291,7 +290,7 @@ udtitvfmt_formatToResult( * formatting operation. * @param status * A pointer to a UErrorCode to receive any errors. - * @draft ICU 67 + * @stable ICU 67 */ U_CAPI void U_EXPORT2 @@ -301,7 +300,6 @@ udtitvfmt_formatCalendarToResult( UCalendar* toCalendar, UFormattedDateInterval* result, UErrorCode* status); -#endif /* U_HIDE_DRAFT_API */ #ifndef U_HIDE_DRAFT_API /** diff --git a/deps/icu-small/source/i18n/unicode/udatpg.h b/deps/icu-small/source/i18n/unicode/udatpg.h index 9fe267a3f488f4..893e1e6831ae34 100644 --- a/deps/icu-small/source/i18n/unicode/udatpg.h +++ b/deps/icu-small/source/i18n/unicode/udatpg.h @@ -657,7 +657,6 @@ udatpg_getPatternForSkeleton(const UDateTimePatternGenerator *dtpg, #if !UCONFIG_NO_FORMATTING -#ifndef U_HIDE_DRAFT_API /** * Return the default hour cycle for a locale. Uses the locale that the * UDateTimePatternGenerator was initially created with. @@ -669,11 +668,10 @@ udatpg_getPatternForSkeleton(const UDateTimePatternGenerator *dtpg, * failure before the function call. Set to U_UNSUPPORTED_ERROR * if used on an empty instance. * @return the default hour cycle. - * @draft ICU 67 + * @stable ICU 67 */ U_CAPI UDateFormatHourCycle U_EXPORT2 udatpg_getDefaultHourCycle(const UDateTimePatternGenerator *dtpg, UErrorCode* pErrorCode); -#endif /* U_HIDE_DRAFT_API */ #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/unicode/uformattedvalue.h b/deps/icu-small/source/i18n/unicode/uformattedvalue.h index 3017372d8b3c58..c964b3a74dd3a1 100644 --- a/deps/icu-small/source/i18n/unicode/uformattedvalue.h +++ b/deps/icu-small/source/i18n/unicode/uformattedvalue.h @@ -93,6 +93,15 @@ typedef enum UFieldCategory { */ UFIELD_CATEGORY_DATE_INTERVAL_SPAN = 0x1000 + UFIELD_CATEGORY_DATE_INTERVAL, +#ifndef U_HIDE_DRAFT_API + /** + * Category for spans in a number range. + * + * @draft ICU 69 + */ + UFIELD_CATEGORY_NUMBER_RANGE_SPAN = 0x1000 + UFIELD_CATEGORY_NUMBER, +#endif // U_HIDE_DRAFT_API + } UFieldCategory; diff --git a/deps/icu-small/source/i18n/unicode/unum.h b/deps/icu-small/source/i18n/unicode/unum.h index ce1685f129060a..76c7d151357875 100644 --- a/deps/icu-small/source/i18n/unicode/unum.h +++ b/deps/icu-small/source/i18n/unicode/unum.h @@ -302,7 +302,24 @@ typedef enum UNumberFormatRoundingMode { * ROUND_UNNECESSARY reports an error if formatted result is not exact. * @stable ICU 4.8 */ - UNUM_ROUND_UNNECESSARY + UNUM_ROUND_UNNECESSARY, +#ifndef U_HIDE_DRAFT_API + /** + * Rounds ties toward the odd number. + * @draft ICU 69 + */ + UNUM_ROUND_HALF_ODD, + /** + * Rounds ties toward +∞. + * @draft ICU 69 + */ + UNUM_ROUND_HALF_CEILING, + /** + * Rounds ties toward -∞. + * @draft ICU 69 + */ + UNUM_ROUND_HALF_FLOOR, +#endif // U_HIDE_DRAFT_API } UNumberFormatRoundingMode; /** The possible number format pad positions. @@ -692,6 +709,12 @@ unum_formatDecimal( const UNumberFormat* fmt, /** * Format a double currency amount using a UNumberFormat. * The double will be formatted according to the UNumberFormat's locale. + * + * To format an exact decimal value with a currency, use + * `unum_setTextAttribute(UNUM_CURRENCY_CODE, ...)` followed by unum_formatDecimal. + * Your UNumberFormat must be created with the UNUM_CURRENCY style. Alternatively, + * consider using unumf_openForSkeletonAndLocale. + * * @param fmt the formatter to use * @param number the number to format * @param currency the 3-letter null-terminated ISO 4217 currency code diff --git a/deps/icu-small/source/i18n/unicode/unumberformatter.h b/deps/icu-small/source/i18n/unicode/unumberformatter.h index 754987aea0923b..341d9e4ad9c628 100644 --- a/deps/icu-small/source/i18n/unicode/unumberformatter.h +++ b/deps/icu-small/source/i18n/unicode/unumberformatter.h @@ -78,6 +78,62 @@ *

    */ +#ifndef U_FORCE_HIDE_DRAFT_API +/** + * An enum declaring how to resolve conflicts between maximum fraction digits and maximum + * significant digits. + * + * There are two modes, RELAXED and STRICT: + * + * - RELAXED: Relax one of the two constraints (fraction digits or significant digits) in order + * to round the number to a higher level of precision. + * - STRICT: Enforce both constraints, resulting in the number being rounded to a lower + * level of precision. + * + * The default settings for compact notation rounding are Max-Fraction = 0 (round to the nearest + * integer), Max-Significant = 2 (round to 2 significant digits), and priority RELAXED (choose + * the constraint that results in more digits being displayed). + * + * Conflicting *minimum* fraction and significant digits are always resolved in the direction that + * results in more trailing zeros. + * + * Example 1: Consider the number 3.141, with various different settings: + * + * - Max-Fraction = 1: "3.1" + * - Max-Significant = 3: "3.14" + * + * The rounding priority determines how to resolve the conflict when both Max-Fraction and + * Max-Significant are set. With RELAXED, the less-strict setting (the one that causes more digits + * to be displayed) will be used; Max-Significant wins. With STRICT, the more-strict setting (the + * one that causes fewer digits to be displayed) will be used; Max-Fraction wins. + * + * Example 2: Consider the number 8317, with various different settings: + * + * - Max-Fraction = 1: "8317" + * - Max-Significant = 3: "8320" + * + * Here, RELAXED favors Max-Fraction and STRICT favors Max-Significant. Note that this larger + * number caused the two modes to favor the opposite result. + * + * @draft ICU 69 + */ +typedef enum UNumberRoundingPriority { + /** + * Favor greater precision by relaxing one of the rounding constraints. + * + * @draft ICU 69 + */ + UNUM_ROUNDING_PRIORITY_RELAXED, + + /** + * Favor adherence to all rounding constraints by producing lower precision. + * + * @draft ICU 69 + */ + UNUM_ROUNDING_PRIORITY_STRICT, +} UNumberRoundingPriority; +#endif // U_FORCE_HIDE_DRAFT_API + /** * An enum declaring how to render units, including currencies. Example outputs when formatting 123 USD and 123 * meters in en-CA: @@ -108,7 +164,7 @@ typedef enum UNumberUnitWidth { * * @stable ICU 60 */ - UNUM_UNIT_WIDTH_NARROW, + UNUM_UNIT_WIDTH_NARROW = 0, /** * Print an abbreviated version of the unit name. Similar to NARROW, but use a slightly wider abbreviation or @@ -124,7 +180,7 @@ typedef enum UNumberUnitWidth { * * @stable ICU 60 */ - UNUM_UNIT_WIDTH_SHORT, + UNUM_UNIT_WIDTH_SHORT = 1, /** * Print the full name of the unit, without any abbreviations. @@ -135,7 +191,7 @@ typedef enum UNumberUnitWidth { * * @stable ICU 60 */ - UNUM_UNIT_WIDTH_FULL_NAME, + UNUM_UNIT_WIDTH_FULL_NAME = 2, /** * Use the three-digit ISO XXX code in place of the symbol for displaying currencies. The behavior of this @@ -146,7 +202,7 @@ typedef enum UNumberUnitWidth { * * @stable ICU 60 */ - UNUM_UNIT_WIDTH_ISO_CODE, + UNUM_UNIT_WIDTH_ISO_CODE = 3, #ifndef U_HIDE_DRAFT_API /** @@ -158,7 +214,7 @@ typedef enum UNumberUnitWidth { * * @draft ICU 68 */ - UNUM_UNIT_WIDTH_FORMAL, + UNUM_UNIT_WIDTH_FORMAL = 4, /** * Use the alternate variant of the currency symbol; for example, "TL" for the Turkish @@ -169,7 +225,7 @@ typedef enum UNumberUnitWidth { * * @draft ICU 68 */ - UNUM_UNIT_WIDTH_VARIANT, + UNUM_UNIT_WIDTH_VARIANT = 5, #endif // U_HIDE_DRAFT_API /** @@ -179,14 +235,16 @@ typedef enum UNumberUnitWidth { * * @stable ICU 60 */ - UNUM_UNIT_WIDTH_HIDDEN, + UNUM_UNIT_WIDTH_HIDDEN = 6, + // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, + // needed for unconditionalized struct MacroProps /** * One more than the highest UNumberUnitWidth value. * * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. */ - UNUM_UNIT_WIDTH_COUNT + UNUM_UNIT_WIDTH_COUNT = 7 } UNumberUnitWidth; /** @@ -314,9 +372,12 @@ typedef enum UNumberSignDisplay { * Show the minus sign on negative numbers, and do not show the sign on positive numbers. This is the default * behavior. * + * If using this option, a sign will be displayed on negative zero, including negative numbers + * that round to zero. To hide the sign on negative zero, use the NEGATIVE option. + * * @stable ICU 60 */ - UNUM_SIGN_AUTO, + UNUM_SIGN_AUTO, /** * Show the minus sign on negative numbers and the plus sign on positive numbers, including zero. @@ -324,14 +385,14 @@ typedef enum UNumberSignDisplay { * * @stable ICU 60 */ - UNUM_SIGN_ALWAYS, + UNUM_SIGN_ALWAYS, /** * Do not show the sign on positive or negative numbers. * * @stable ICU 60 */ - UNUM_SIGN_NEVER, + UNUM_SIGN_NEVER, /** * Use the locale-dependent accounting format on negative numbers, and do not show the sign on positive numbers. @@ -347,7 +408,7 @@ typedef enum UNumberSignDisplay { * * @stable ICU 60 */ - UNUM_SIGN_ACCOUNTING, + UNUM_SIGN_ACCOUNTING, /** * Use the locale-dependent accounting format on negative numbers, and show the plus sign on @@ -357,7 +418,7 @@ typedef enum UNumberSignDisplay { * * @stable ICU 60 */ - UNUM_SIGN_ACCOUNTING_ALWAYS, + UNUM_SIGN_ACCOUNTING_ALWAYS, /** * Show the minus sign on negative numbers and the plus sign on positive numbers. Do not show a @@ -365,7 +426,7 @@ typedef enum UNumberSignDisplay { * * @stable ICU 61 */ - UNUM_SIGN_EXCEPT_ZERO, + UNUM_SIGN_EXCEPT_ZERO, /** * Use the locale-dependent accounting format on negative numbers, and show the plus sign on @@ -374,14 +435,32 @@ typedef enum UNumberSignDisplay { * * @stable ICU 61 */ - UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO, + UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO, + +#ifndef U_HIDE_DRAFT_API + /** + * Same as AUTO, but do not show the sign on negative zero. + * + * @draft ICU 69 + */ + UNUM_SIGN_NEGATIVE, + + /** + * Same as ACCOUNTING, but do not show the sign on negative zero. + * + * @draft ICU 69 + */ + UNUM_SIGN_ACCOUNTING_NEGATIVE, +#endif // U_HIDE_DRAFT_API + // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, + // needed for unconditionalized struct MacroProps /** * One more than the highest UNumberSignDisplay value. * * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420. */ - UNUM_SIGN_COUNT + UNUM_SIGN_COUNT = 9, } UNumberSignDisplay; /** @@ -411,6 +490,8 @@ typedef enum UNumberDecimalSeparatorDisplay { */ UNUM_DECIMAL_SEPARATOR_ALWAYS, + // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, + // needed for unconditionalized struct MacroProps /** * One more than the highest UNumberDecimalSeparatorDisplay value. * @@ -419,6 +500,32 @@ typedef enum UNumberDecimalSeparatorDisplay { UNUM_DECIMAL_SEPARATOR_COUNT } UNumberDecimalSeparatorDisplay; +#ifndef U_FORCE_HIDE_DRAFT_API +/** + * An enum declaring how to render trailing zeros. + * + * - UNUM_TRAILING_ZERO_AUTO: 0.90, 1.00, 1.10 + * - UNUM_TRAILING_ZERO_HIDE_IF_WHOLE: 0.90, 1, 1.10 + * + * @draft ICU 69 + */ +typedef enum UNumberTrailingZeroDisplay { + /** + * Display trailing zeros according to the settings for minimum fraction and significant digits. + * + * @draft ICU 69 + */ + UNUM_TRAILING_ZERO_AUTO, + + /** + * Same as AUTO, but hide trailing zeros after the decimal separator if they are all zero. + * + * @draft ICU 69 + */ + UNUM_TRAILING_ZERO_HIDE_IF_WHOLE, +} UNumberTrailingZeroDisplay; +#endif // U_FORCE_HIDE_DRAFT_API + struct UNumberFormatter; /** * C-compatible version of icu::number::LocalizedNumberFormatter. @@ -449,6 +556,9 @@ typedef struct UFormattedNumber UFormattedNumber; * For more details on skeleton strings, see the documentation in numberformatter.h. For more details on * the usage of this API, see the documentation at the top of unumberformatter.h. * + * For more information on number skeleton strings, see: + * https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html + * * NOTE: This is a C-compatible API; C++ users should build against numberformatter.h instead. * * @param skeleton The skeleton string, like u"percent precision-integer" @@ -466,6 +576,9 @@ unumf_openForSkeletonAndLocale(const UChar* skeleton, int32_t skeletonLen, const * Like unumf_openForSkeletonAndLocale, but accepts a UParseError, which will be populated with the * location of a skeleton syntax error if such a syntax error exists. * + * For more information on number skeleton strings, see: + * https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html + * * @param skeleton The skeleton string, like u"percent precision-integer" * @param skeletonLen The number of UChars in the skeleton string, or -1 if it is NUL-terminated. * @param locale The NUL-terminated locale ID. diff --git a/deps/icu-small/source/i18n/unicode/unumberrangeformatter.h b/deps/icu-small/source/i18n/unicode/unumberrangeformatter.h index 738bfb9f70acfd..e1c5a5760fd55a 100644 --- a/deps/icu-small/source/i18n/unicode/unumberrangeformatter.h +++ b/deps/icu-small/source/i18n/unicode/unumberrangeformatter.h @@ -354,7 +354,6 @@ unumrf_resultGetIdentityResult( UErrorCode* ec); -#ifndef U_HIDE_DRAFT_API /** * Extracts the first formatted number as a decimal number. This endpoint * is useful for obtaining the exact number being printed after scaling @@ -407,7 +406,6 @@ unumrf_resultGetSecondDecimalNumber( char* dest, int32_t destCapacity, UErrorCode* ec); -#endif // U_HIDE_DRAFT_API /** diff --git a/deps/icu-small/source/i18n/unicode/uspoof.h b/deps/icu-small/source/i18n/unicode/uspoof.h index 7680c687ce4daf..bf675bef51b51c 100644 --- a/deps/icu-small/source/i18n/unicode/uspoof.h +++ b/deps/icu-small/source/i18n/unicode/uspoof.h @@ -502,7 +502,7 @@ typedef enum USpoofChecks { USPOOF_ALL_CHECKS = 0xFFFF, /** - * Enable the return of auxillary (non-error) information in the + * Enable the return of auxiliary (non-error) information in the * upper bits of the check results value. * * If this "check" is not enabled, the results of {@link uspoof_check} will be diff --git a/deps/icu-small/source/i18n/unicode/vtzone.h b/deps/icu-small/source/i18n/unicode/vtzone.h index 89a79c07b3215f..c0a2a14c897077 100644 --- a/deps/icu-small/source/i18n/unicode/vtzone.h +++ b/deps/icu-small/source/i18n/unicode/vtzone.h @@ -157,7 +157,7 @@ class U_I18N_API VTimeZone : public BasicTimeZone { void write(UnicodeString& result, UErrorCode& status) const; /** - * Writes RFC2445 VTIMEZONE data for this time zone applicalbe + * Writes RFC2445 VTIMEZONE data for this time zone applicable * for dates after the specified start time. * @param start The start date. * @param result Output param to filled in with the VTIMEZONE data. @@ -167,7 +167,7 @@ class U_I18N_API VTimeZone : public BasicTimeZone { void write(UDate start, UnicodeString& result, UErrorCode& status) const; /** - * Writes RFC2445 VTIMEZONE data applicalbe for the specified date. + * Writes RFC2445 VTIMEZONE data applicable for the specified date. * Some common iCalendar implementations can only handle a single time * zone property or a pair of standard and daylight time properties using * BYDAY rule with day of week (such as BYDAY=1SUN). This method produce @@ -264,6 +264,17 @@ class U_I18N_API VTimeZone : public BasicTimeZone { virtual void getOffset(UDate date, UBool local, int32_t& rawOffset, int32_t& dstOffset, UErrorCode& ec) const; +#ifndef U_FORCE_HIDE_DRAFT_API + /** + * Get time zone offsets from local wall time. + * @draft ICU 69 + */ + virtual void getOffsetFromLocal( + UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const; +#endif /* U_FORCE_HIDE_DRAFT_API */ + /** * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add * to GMT to get local time, before taking daylight savings time into account). diff --git a/deps/icu-small/source/i18n/units_complexconverter.cpp b/deps/icu-small/source/i18n/units_complexconverter.cpp index 27f835e6dd4a29..db56f5ded9cf98 100644 --- a/deps/icu-small/source/i18n/units_complexconverter.cpp +++ b/deps/icu-small/source/i18n/units_complexconverter.cpp @@ -10,6 +10,7 @@ #include "cmemory.h" #include "number_decimalquantity.h" #include "number_roundingutils.h" +#include "putilimp.h" #include "uarrsort.h" #include "uassert.h" #include "unicode/fmtable.h" @@ -21,44 +22,73 @@ U_NAMESPACE_BEGIN namespace units { +ComplexUnitsConverter::ComplexUnitsConverter(const MeasureUnitImpl &targetUnit, + const ConversionRates &ratesInfo, UErrorCode &status) + : units_(targetUnit.extractIndividualUnitsWithIndices(status)) { + if (U_FAILURE(status)) { + return; + } + U_ASSERT(units_.length() != 0); + + // Just borrowing a pointer to the instance + MeasureUnitImpl *biggestUnit = &units_[0]->unitImpl; + for (int32_t i = 1; i < units_.length(); i++) { + if (UnitsConverter::compareTwoUnits(units_[i]->unitImpl, *biggestUnit, ratesInfo, status) > 0 && + U_SUCCESS(status)) { + biggestUnit = &units_[i]->unitImpl; + } + + if (U_FAILURE(status)) { + return; + } + } + + this->init(*biggestUnit, ratesInfo, status); +} + +ComplexUnitsConverter::ComplexUnitsConverter(StringPiece inputUnitIdentifier, + StringPiece outputUnitsIdentifier, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + MeasureUnitImpl inputUnit = MeasureUnitImpl::forIdentifier(inputUnitIdentifier, status); + MeasureUnitImpl outputUnits = MeasureUnitImpl::forIdentifier(outputUnitsIdentifier, status); + + this->units_ = outputUnits.extractIndividualUnitsWithIndices(status); + U_ASSERT(units_.length() != 0); + + this->init(inputUnit, ConversionRates(status), status); +} ComplexUnitsConverter::ComplexUnitsConverter(const MeasureUnitImpl &inputUnit, const MeasureUnitImpl &outputUnits, const ConversionRates &ratesInfo, UErrorCode &status) - : units_(outputUnits.extractIndividualUnits(status)) { + : units_(outputUnits.extractIndividualUnitsWithIndices(status)) { if (U_FAILURE(status)) { return; } U_ASSERT(units_.length() != 0); - // Save the desired order of output units before we sort units_ - for (int32_t i = 0; i < units_.length(); i++) { - outputUnits_.emplaceBackAndCheckErrorCode(status, units_[i]->copy(status).build(status)); - } + this->init(inputUnit, ratesInfo, status); +} - // NOTE: - // This comparator is used to sort the units in a descending order. Therefore, we return -1 if - // the left is bigger than right and so on. +void ComplexUnitsConverter::init(const MeasureUnitImpl &inputUnit, + const ConversionRates &ratesInfo, + UErrorCode &status) { + // Sorts units in descending order. Therefore, we return -1 if + // the left is bigger than right and so on. auto descendingCompareUnits = [](const void *context, const void *left, const void *right) { UErrorCode status = U_ZERO_ERROR; - const auto *leftPointer = static_cast(left); - const auto *rightPointer = static_cast(right); + const auto *leftPointer = static_cast(left); + const auto *rightPointer = static_cast(right); - UnitConverter fromLeftToRight(**leftPointer, // - **rightPointer, // - *static_cast(context), // - status); - - double rightFromOneLeft = fromLeftToRight.convert(1.0); - if (std::abs(rightFromOneLeft - 1.0) < 0.0000000001) { // Equals To - return 0; - } else if (rightFromOneLeft > 1.0) { // Greater Than - return -1; - } - - return 1; // Less Than + // Multiply by -1 to sort in descending order + return (-1) * UnitsConverter::compareTwoUnits((**leftPointer).unitImpl, // + (**rightPointer).unitImpl, // + *static_cast(context), // + status); }; uprv_sortArray(units_.getAlias(), // @@ -86,11 +116,11 @@ ComplexUnitsConverter::ComplexUnitsConverter(const MeasureUnitImpl &inputUnit, // 3. then, the final result will be (6 feet and 6.74016 inches) for (int i = 0, n = units_.length(); i < n; i++) { if (i == 0) { // first element - unitConverters_.emplaceBackAndCheckErrorCode(status, inputUnit, *units_[i], ratesInfo, - status); + unitsConverters_.emplaceBackAndCheckErrorCode(status, inputUnit, units_[i]->unitImpl, + ratesInfo, status); } else { - unitConverters_.emplaceBackAndCheckErrorCode(status, *units_[i - 1], *units_[i], ratesInfo, - status); + unitsConverters_.emplaceBackAndCheckErrorCode(status, units_[i - 1]->unitImpl, + units_[i]->unitImpl, ratesInfo, status); } if (U_FAILURE(status)) { @@ -100,17 +130,17 @@ ComplexUnitsConverter::ComplexUnitsConverter(const MeasureUnitImpl &inputUnit, } UBool ComplexUnitsConverter::greaterThanOrEqual(double quantity, double limit) const { - U_ASSERT(unitConverters_.length() > 0); + U_ASSERT(unitsConverters_.length() > 0); // First converter converts to the biggest quantity. - double newQuantity = unitConverters_[0]->convert(quantity); + double newQuantity = unitsConverters_[0]->convert(quantity); return newQuantity >= limit; } MaybeStackVector ComplexUnitsConverter::convert(double quantity, icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const { - // TODO(hugovdm): return an error for "foot-and-foot"? + // TODO: return an error for "foot-and-foot"? MaybeStackVector result; int sign = 1; if (quantity < 0) { @@ -120,133 +150,118 @@ MaybeStackVector ComplexUnitsConverter::convert(double quantity, // For N converters: // - the first converter converts from the input unit to the largest unit, - // - N-1 converters convert to bigger units for which we want integers, + // - the following N-2 converters convert to bigger units for which we want integers, // - the Nth converter (index N-1) converts to the smallest unit, for which // we keep a double. - MaybeStackArray intValues(unitConverters_.length() - 1, status); + MaybeStackArray intValues(unitsConverters_.length() - 1, status); if (U_FAILURE(status)) { return result; } - uprv_memset(intValues.getAlias(), 0, (unitConverters_.length() - 1) * sizeof(int64_t)); + uprv_memset(intValues.getAlias(), 0, (unitsConverters_.length() - 1) * sizeof(int64_t)); - for (int i = 0, n = unitConverters_.length(); i < n; ++i) { - quantity = (*unitConverters_[i]).convert(quantity); + for (int i = 0, n = unitsConverters_.length(); i < n; ++i) { + quantity = (*unitsConverters_[i]).convert(quantity); if (i < n - 1) { - // The double type has 15 decimal digits of precision. For choosing - // whether to use the current unit or the next smaller unit, we - // therefore nudge up the number with which the thresholding - // decision is made. However after the thresholding, we use the - // original values to ensure unbiased accuracy (to the extent of - // double's capabilities). - int64_t roundedQuantity = floor(quantity * (1 + DBL_EPSILON)); - intValues[i] = roundedQuantity; + // If quantity is at the limits of double's precision from an + // integer value, we take that integer value. + int64_t flooredQuantity = floor(quantity * (1 + DBL_EPSILON)); + if (uprv_isNaN(quantity)) { + // With clang on Linux: floor does not support NaN, resulting in + // a giant negative number. For now, we produce "0 feet, NaN + // inches". TODO(icu-units#131): revisit desired output. + flooredQuantity = 0; + } + intValues[i] = flooredQuantity; // Keep the residual of the quantity. // For example: `3.6 feet`, keep only `0.6 feet` - // - // When the calculation is near enough +/- DBL_EPSILON, we round to - // zero. (We also ensure no negative values here.) - if ((quantity - roundedQuantity) / quantity < DBL_EPSILON) { + double remainder = quantity - flooredQuantity; + if (remainder < 0) { + // Because we nudged flooredQuantity up by eps, remainder may be + // negative: we must treat such a remainder as zero. quantity = 0; } else { - quantity -= roundedQuantity; - } - } else { // LAST ELEMENT - if (rounder == nullptr) { - // Nothing to do for the last element. - break; + quantity = remainder; } + } + } - // Round the last value - // TODO(ICU-21288): get smarter about precision for mixed units. - number::impl::DecimalQuantity quant; - quant.setToDouble(quantity); - rounder->apply(quant, status); - if (U_FAILURE(status)) { - return result; - } - quantity = quant.toDouble(); - if (i == 0) { - // Last element is also the first element, so we're done - break; - } + applyRounder(intValues, quantity, rounder, status); - // Check if there's a carry, and bubble it back up the resulting intValues. - int64_t carry = floor(unitConverters_[i]->convertInverse(quantity) * (1 + DBL_EPSILON)); - if (carry <= 0) { - break; - } - quantity -= unitConverters_[i]->convert(carry); - intValues[i - 1] += carry; - - // We don't use the first converter: that one is for the input unit - for (int32_t j = i - 1; j > 0; j--) { - carry = floor(unitConverters_[j]->convertInverse(intValues[j]) * (1 + DBL_EPSILON)); - if (carry <= 0) { - break; - } - intValues[j] -= round(unitConverters_[j]->convert(carry)); - intValues[j - 1] += carry; - } - } + // Initialize empty result. We use a MaybeStackArray directly so we can + // assign pointers - for this privilege we have to take care of cleanup. + MaybeStackArray tmpResult(unitsConverters_.length(), status); + if (U_FAILURE(status)) { + return result; } - // Package values into Measure instances in result: - for (int i = 0, n = unitConverters_.length(); i < n; ++i) { + // Package values into temporary Measure instances in tmpResult: + for (int i = 0, n = unitsConverters_.length(); i < n; ++i) { if (i < n - 1) { Formattable formattableQuantity(intValues[i] * sign); // Measure takes ownership of the MeasureUnit* - MeasureUnit *type = new MeasureUnit(units_[i]->copy(status).build(status)); - if (result.emplaceBackAndCheckErrorCode(status, formattableQuantity, type, status) == - nullptr) { - // Ownership wasn't taken - U_ASSERT(U_FAILURE(status)); - delete type; - } - if (U_FAILURE(status)) { - return result; - } + MeasureUnit *type = new MeasureUnit(units_[i]->unitImpl.copy(status).build(status)); + tmpResult[units_[i]->index] = new Measure(formattableQuantity, type, status); } else { // LAST ELEMENT - // Add the last element, not an integer: Formattable formattableQuantity(quantity * sign); // Measure takes ownership of the MeasureUnit* - MeasureUnit *type = new MeasureUnit(units_[i]->copy(status).build(status)); - if (result.emplaceBackAndCheckErrorCode(status, formattableQuantity, type, status) == - nullptr) { - // Ownership wasn't taken - U_ASSERT(U_FAILURE(status)); - delete type; - } - if (U_FAILURE(status)) { - return result; - } - U_ASSERT(result.length() == i + 1); - U_ASSERT(result[i] != nullptr); + MeasureUnit *type = new MeasureUnit(units_[i]->unitImpl.copy(status).build(status)); + tmpResult[units_[i]->index] = new Measure(formattableQuantity, type, status); } } - MaybeStackVector orderedResult; - int32_t unitsCount = outputUnits_.length(); - U_ASSERT(unitsCount == units_.length()); - Measure **arr = result.getAlias(); - // O(N^2) is fine: mixed units' unitsCount is usually 2 or 3. - for (int32_t i = 0; i < unitsCount; i++) { - for (int32_t j = i; j < unitsCount; j++) { - // Find the next expected unit, and swap it into place. - U_ASSERT(result[j] != nullptr); - if (result[j]->getUnit() == *outputUnits_[i]) { - if (j != i) { - Measure *tmp = arr[j]; - arr[j] = arr[i]; - arr[i] = tmp; - } - } - } + + // Transfer values into result and return: + for(int32_t i = 0, n = unitsConverters_.length(); i < n; ++i) { + U_ASSERT(tmpResult[i] != nullptr); + result.emplaceBackAndCheckErrorCode(status, *tmpResult[i]); + delete tmpResult[i]; } return result; } +void ComplexUnitsConverter::applyRounder(MaybeStackArray &intValues, double &quantity, + icu::number::impl::RoundingImpl *rounder, + UErrorCode &status) const { + if (rounder == nullptr) { + // Nothing to do for the quantity. + return; + } + + number::impl::DecimalQuantity decimalQuantity; + decimalQuantity.setToDouble(quantity); + rounder->apply(decimalQuantity, status); + if (U_FAILURE(status)) { + return; + } + quantity = decimalQuantity.toDouble(); + + int32_t lastIndex = unitsConverters_.length() - 1; + if (lastIndex == 0) { + // Only one element, no need to bubble up the carry + return; + } + + // Check if there's a carry, and bubble it back up the resulting intValues. + int64_t carry = floor(unitsConverters_[lastIndex]->convertInverse(quantity) * (1 + DBL_EPSILON)); + if (carry <= 0) { + return; + } + quantity -= unitsConverters_[lastIndex]->convert(carry); + intValues[lastIndex - 1] += carry; + + // We don't use the first converter: that one is for the input unit + for (int32_t j = lastIndex - 1; j > 0; j--) { + carry = floor(unitsConverters_[j]->convertInverse(intValues[j]) * (1 + DBL_EPSILON)); + if (carry <= 0) { + return; + } + intValues[j] -= round(unitsConverters_[j]->convert(carry)); + intValues[j - 1] += carry; + } +} + } // namespace units U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/units_complexconverter.h b/deps/icu-small/source/i18n/units_complexconverter.h index 83c5b94342f373..5c669b45ddd7df 100644 --- a/deps/icu-small/source/i18n/units_complexconverter.h +++ b/deps/icu-small/source/i18n/units_complexconverter.h @@ -24,9 +24,9 @@ U_NAMESPACE_BEGIN // Note: These need to be outside of the units namespace, or Clang will generate // a compile error. #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN -template class U_I18N_API MaybeStackArray; -template class U_I18N_API MemoryPool; -template class U_I18N_API MaybeStackVector; +template class U_I18N_API MaybeStackArray; +template class U_I18N_API MemoryPool; +template class U_I18N_API MaybeStackVector; template class U_I18N_API MaybeStackArray; template class U_I18N_API MemoryPool; template class U_I18N_API MaybeStackVector; @@ -42,12 +42,42 @@ namespace units { * For example, from `meter` to `foot+inch`. * * DESIGN: - * This class uses `UnitConverter` in order to perform the single converter (i.e. converters from a + * This class uses `UnitsConverter` in order to perform the single converter (i.e. converters from a * single unit to another single unit). Therefore, `ComplexUnitsConverter` class contains multiple - * instances of the `UnitConverter` to perform the conversion. + * instances of the `UnitsConverter` to perform the conversion. */ class U_I18N_API ComplexUnitsConverter : public UMemory { public: + /** + * Constructs `ComplexUnitsConverter` for an `targetUnit` that could be Single, Compound or Mixed. + * In case of: + * 1- Single and Compound units, + * the conversion will not perform anything, the input will be equal to the output. + * 2- Mixed Unit + * the conversion will consider the input is the biggest unit. And will convert it to be spread + * through the target units. For example: if target unit is "inch-and-foot", and the input is 2.5. + * The converter will consider the input value in "foot", because foot is the biggest unit. + * Then, it will convert 2.5 feet to "inch-and-foot". + * + * @param targetUnit could be any units type (single, compound or mixed). + * @param ratesInfo + * @param status + */ + ComplexUnitsConverter(const MeasureUnitImpl &targetUnit, const ConversionRates &ratesInfo, + UErrorCode &status); + /** + * Constructor of `ComplexUnitsConverter`. + * NOTE: + * - inputUnit and outputUnits must be under the same category + * - e.g. meter to feet and inches --> all of them are length units. + * + * @param inputUnit represents the source unit. (should be single or compound unit). + * @param outputUnits represents the output unit. could be any type. (single, compound or mixed). + * @param status + */ + ComplexUnitsConverter(StringPiece inputUnitIdentifier, StringPiece outputUnitsIdentifier, + UErrorCode &status); + /** * Constructor of `ComplexUnitsConverter`. * NOTE: @@ -56,6 +86,7 @@ class U_I18N_API ComplexUnitsConverter : public UMemory { * * @param inputUnit represents the source unit. (should be single or compound unit). * @param outputUnits represents the output unit. could be any type. (single, compound or mixed). + * @param ratesInfo a ConversionRates instance containing the unit conversion rates. * @param status */ ComplexUnitsConverter(const MeasureUnitImpl &inputUnit, const MeasureUnitImpl &outputUnits, @@ -78,11 +109,21 @@ class U_I18N_API ComplexUnitsConverter : public UMemory { convert(double quantity, icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const; private: - MaybeStackVector unitConverters_; - // Individual units of mixed units, sorted big to small - MaybeStackVector units_; - // Individual units of mixed units, sorted in desired output order - MaybeStackVector outputUnits_; + MaybeStackVector unitsConverters_; + + // Individual units of mixed units, sorted big to small, with indices + // indicating the requested output mixed unit order. + MaybeStackVector units_; + + // Sorts units_, which must be populated before calling this, and populates + // unitsConverters_. + void init(const MeasureUnitImpl &inputUnit, const ConversionRates &ratesInfo, UErrorCode &status); + + // Applies the rounder to the quantity (last element) and bubble up any carried value to all the + // intValues. + // TODO(ICU-21288): get smarter about precision for mixed units. + void applyRounder(MaybeStackArray &intValues, double &quantity, + icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const; }; } // namespace units diff --git a/deps/icu-small/source/i18n/units_converter.cpp b/deps/icu-small/source/i18n/units_converter.cpp index a777d026b98756..2854ad30b3288f 100644 --- a/deps/icu-small/source/i18n/units_converter.cpp +++ b/deps/icu-small/source/i18n/units_converter.cpp @@ -26,7 +26,7 @@ void U_I18N_API Factor::multiplyBy(const Factor &rhs) { factorNum *= rhs.factorNum; factorDen *= rhs.factorDen; for (int i = 0; i < CONSTANTS_COUNT; i++) { - constants[i] += rhs.constants[i]; + constantExponents[i] += rhs.constantExponents[i]; } // NOTE @@ -39,7 +39,7 @@ void U_I18N_API Factor::divideBy(const Factor &rhs) { factorNum *= rhs.factorDen; factorDen *= rhs.factorNum; for (int i = 0; i < CONSTANTS_COUNT; i++) { - constants[i] -= rhs.constants[i]; + constantExponents[i] -= rhs.constantExponents[i]; } // NOTE @@ -51,7 +51,7 @@ void U_I18N_API Factor::divideBy(const Factor &rhs) { void U_I18N_API Factor::power(int32_t power) { // multiply all the constant by the power. for (int i = 0; i < CONSTANTS_COUNT; i++) { - constants[i] *= power; + constantExponents[i] *= power; } bool shouldFlip = power < 0; // This means that after applying the absolute power, we should flip @@ -66,35 +66,29 @@ void U_I18N_API Factor::power(int32_t power) { } } -void U_I18N_API Factor::flip() { - std::swap(factorNum, factorDen); - - for (int i = 0; i < CONSTANTS_COUNT; i++) { - constants[i] *= -1; - } -} - -void U_I18N_API Factor::applySiPrefix(UMeasureSIPrefix siPrefix) { - if (siPrefix == UMeasureSIPrefix::UMEASURE_SI_PREFIX_ONE) return; // No need to do anything - - double siApplied = std::pow(10.0, std::abs(siPrefix)); - - if (siPrefix < 0) { - factorDen *= siApplied; +void U_I18N_API Factor::applyPrefix(UMeasurePrefix unitPrefix) { + if (unitPrefix == UMeasurePrefix::UMEASURE_PREFIX_ONE) { + // No need to do anything return; } - factorNum *= siApplied; + int32_t prefixPower = umeas_getPrefixPower(unitPrefix); + double prefixFactor = std::pow((double)umeas_getPrefixBase(unitPrefix), (double)std::abs(prefixPower)); + if (prefixPower >= 0) { + factorNum *= prefixFactor; + } else { + factorDen *= prefixFactor; + } } void U_I18N_API Factor::substituteConstants() { for (int i = 0; i < CONSTANTS_COUNT; i++) { - if (this->constants[i] == 0) { + if (this->constantExponents[i] == 0) { continue; } - auto absPower = std::abs(this->constants[i]); - Signum powerSig = this->constants[i] < 0 ? Signum::NEGATIVE : Signum::POSITIVE; + auto absPower = std::abs(this->constantExponents[i]); + Signum powerSig = this->constantExponents[i] < 0 ? Signum::NEGATIVE : Signum::POSITIVE; double absConstantValue = std::pow(constantsValues[i], absPower); if (powerSig == Signum::NEGATIVE) { @@ -103,7 +97,7 @@ void U_I18N_API Factor::substituteConstants() { this->factorNum *= absConstantValue; } - this->constants[i] = 0; + this->constantExponents[i] = 0; } } @@ -221,18 +215,21 @@ Factor loadSingleFactor(StringPiece source, const ConversionRates &ratesInfo, UE } // Load Factor of a compound source unit. +// In ICU4J, this is a pair of ConversionRates.getFactorToBase() functions. Factor loadCompoundFactor(const MeasureUnitImpl &source, const ConversionRates &ratesInfo, UErrorCode &status) { Factor result; - for (int32_t i = 0, n = source.units.length(); i < n; i++) { - SingleUnitImpl singleUnit = *source.units[i]; + for (int32_t i = 0, n = source.singleUnits.length(); i < n; i++) { + SingleUnitImpl singleUnit = *source.singleUnits[i]; Factor singleFactor = loadSingleFactor(singleUnit.getSimpleUnitID(), ratesInfo, status); if (U_FAILURE(status)) return result; - // Apply SiPrefix before the power, because the power may be will flip the factor. - singleFactor.applySiPrefix(singleUnit.siPrefix); + // Prefix before power, because: + // - square-kilometer to square-meter: (1000)^2 + // - square-kilometer to square-foot (approximate): (3.28*1000)^2 + singleFactor.applyPrefix(singleUnit.unitPrefix); // Apply the power of the `dimensionality` singleFactor.power(singleUnit.dimensionality); @@ -249,6 +246,8 @@ Factor loadCompoundFactor(const MeasureUnitImpl &source, const ConversionRates & * * NOTE: * Empty unit means simple unit. + * + * In ICU4J, this is ConversionRates.checkSimpleUnit(). */ UBool checkSimpleUnit(const MeasureUnitImpl &unit, UErrorCode &status) { if (U_FAILURE(status)) return false; @@ -256,14 +255,14 @@ UBool checkSimpleUnit(const MeasureUnitImpl &unit, UErrorCode &status) { if (unit.complexity != UMEASURE_UNIT_SINGLE) { return false; } - if (unit.units.length() == 0) { + if (unit.singleUnits.length() == 0) { // Empty units means simple unit. return true; } - auto singleUnit = *(unit.units[0]); + auto singleUnit = *(unit.singleUnits[0]); - if (singleUnit.dimensionality != 1 || singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) { + if (singleUnit.dimensionality != 1 || singleUnit.unitPrefix != UMEASURE_PREFIX_ONE) { return false; } @@ -273,6 +272,7 @@ UBool checkSimpleUnit(const MeasureUnitImpl &unit, UErrorCode &status) { /** * Extract conversion rate from `source` to `target` */ +// In ICU4J, this function is partially inlined in the UnitsConverter constructor. void loadConversionRate(ConversionRate &conversionRate, const MeasureUnitImpl &source, const MeasureUnitImpl &target, Convertibility unitsState, const ConversionRates &ratesInfo, UErrorCode &status) { @@ -300,6 +300,7 @@ void loadConversionRate(ConversionRate &conversionRate, const MeasureUnitImpl &s conversionRate.factorNum = finalFactor.factorNum; conversionRate.factorDen = finalFactor.factorDen; + // This code corresponds to ICU4J's ConversionRates.getOffset(). // In case of simple units (such as: celsius or fahrenheit), offsets are considered. if (checkSimpleUnit(source, status) && checkSimpleUnit(target, status)) { conversionRate.sourceOffset = @@ -307,6 +308,8 @@ void loadConversionRate(ConversionRate &conversionRate, const MeasureUnitImpl &s conversionRate.targetOffset = targetToBase.offset * targetToBase.factorDen / targetToBase.factorNum; } + // TODO(icu-units#127): should we consider failure if there's an offset for + // a not-simple-unit? What about kilokelvin / kilocelsius? conversionRate.reciprocal = unitsState == Convertibility::RECIPROCAL; } @@ -336,8 +339,8 @@ void mergeSingleUnitWithDimension(MaybeStackVector &unitI void mergeUnitsAndDimensions(MaybeStackVector &unitIndicesWithDimension, const MeasureUnitImpl &shouldBeMerged, int32_t multiplier) { - for (int32_t unit_i = 0; unit_i < shouldBeMerged.units.length(); unit_i++) { - auto singleUnit = *shouldBeMerged.units[unit_i]; + for (int32_t unit_i = 0; unit_i < shouldBeMerged.singleUnits.length(); unit_i++) { + auto singleUnit = *shouldBeMerged.singleUnits[unit_i]; mergeSingleUnitWithDimension(unitIndicesWithDimension, singleUnit, multiplier); } } @@ -361,28 +364,32 @@ UBool checkAllDimensionsAreZeros(const MaybeStackVector & void U_I18N_API addSingleFactorConstant(StringPiece baseStr, int32_t power, Signum signum, Factor &factor, UErrorCode &status) { if (baseStr == "ft_to_m") { - factor.constants[CONSTANT_FT2M] += power * signum; + factor.constantExponents[CONSTANT_FT2M] += power * signum; } else if (baseStr == "ft2_to_m2") { - factor.constants[CONSTANT_FT2M] += 2 * power * signum; + factor.constantExponents[CONSTANT_FT2M] += 2 * power * signum; } else if (baseStr == "ft3_to_m3") { - factor.constants[CONSTANT_FT2M] += 3 * power * signum; + factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; } else if (baseStr == "in3_to_m3") { - factor.constants[CONSTANT_FT2M] += 3 * power * signum; + factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; factor.factorDen *= 12 * 12 * 12; } else if (baseStr == "gal_to_m3") { factor.factorNum *= 231; - factor.constants[CONSTANT_FT2M] += 3 * power * signum; + factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; factor.factorDen *= 12 * 12 * 12; } else if (baseStr == "gal_imp_to_m3") { - factor.constants[CONSTANT_GAL_IMP2M3] += power * signum; + factor.constantExponents[CONSTANT_GAL_IMP2M3] += power * signum; } else if (baseStr == "G") { - factor.constants[CONSTANT_G] += power * signum; + factor.constantExponents[CONSTANT_G] += power * signum; } else if (baseStr == "gravity") { - factor.constants[CONSTANT_GRAVITY] += power * signum; + factor.constantExponents[CONSTANT_GRAVITY] += power * signum; } else if (baseStr == "lb_to_kg") { - factor.constants[CONSTANT_LB2KG] += power * signum; + factor.constantExponents[CONSTANT_LB2KG] += power * signum; + } else if (baseStr == "glucose_molar_mass") { + factor.constantExponents[CONSTANT_GLUCOSE_MOLAR_MASS] += power * signum; + } else if (baseStr == "item_per_mole") { + factor.constantExponents[CONSTANT_ITEM_PER_MOLE] += power * signum; } else if (baseStr == "PI") { - factor.constants[CONSTANT_PI] += power * signum; + factor.constantExponents[CONSTANT_PI] += power * signum; } else { if (signum == Signum::NEGATIVE) { factor.factorDen *= std::pow(strToDouble(baseStr, status), power); @@ -403,7 +410,7 @@ MeasureUnitImpl U_I18N_API extractCompoundBaseUnit(const MeasureUnitImpl &source MeasureUnitImpl result; if (U_FAILURE(status)) return result; - const auto &singleUnits = source.units; + const auto &singleUnits = source.singleUnits; for (int i = 0, count = singleUnits.length(); i < count; ++i) { const auto &singleUnit = *singleUnits[i]; // Extract `ConversionRateInfo` using the absolute unit. For example: in case of `square-meter`, @@ -421,11 +428,11 @@ MeasureUnitImpl U_I18N_API extractCompoundBaseUnit(const MeasureUnitImpl &source // Multiply the power of the singleUnit by the power of the baseUnit. For example, square-hectare // must be pow4-meter. (NOTE: hectare --> square-meter) auto baseUnits = - MeasureUnitImpl::forIdentifier(rateInfo->baseUnit.toStringPiece(), status).units; + MeasureUnitImpl::forIdentifier(rateInfo->baseUnit.toStringPiece(), status).singleUnits; for (int32_t i = 0, baseUnitsCount = baseUnits.length(); i < baseUnitsCount; i++) { baseUnits[i]->dimensionality *= singleUnit.dimensionality; // TODO: Deal with SI-prefix - result.append(*baseUnits[i], status); + result.appendSingleUnit(*baseUnits[i], status); if (U_FAILURE(status)) { return result; @@ -482,16 +489,37 @@ Convertibility U_I18N_API extractConvertibility(const MeasureUnitImpl &source, return UNCONVERTIBLE; } -UnitConverter::UnitConverter(const MeasureUnitImpl &source, const MeasureUnitImpl &target, - const ConversionRates &ratesInfo, UErrorCode &status) +UnitsConverter::UnitsConverter(const MeasureUnitImpl &source, const MeasureUnitImpl &target, + const ConversionRates &ratesInfo, UErrorCode &status) : conversionRate_(source.copy(status), target.copy(status)) { - if (source.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED || - target.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + this->init(ratesInfo, status); +} + +UnitsConverter::UnitsConverter(StringPiece sourceIdentifier, StringPiece targetIdentifier, + UErrorCode &status) + : conversionRate_(MeasureUnitImpl::forIdentifier(sourceIdentifier, status), + MeasureUnitImpl::forIdentifier(targetIdentifier, status)) { + if (U_FAILURE(status)) { + return; + } + + ConversionRates ratesInfo(status); + this->init(ratesInfo, status); +} + +void UnitsConverter::init(const ConversionRates &ratesInfo, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + + if (this->conversionRate_.source.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED || + this->conversionRate_.target.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { status = U_INTERNAL_PROGRAM_ERROR; return; } - Convertibility unitsState = extractConvertibility(source, target, ratesInfo, status); + Convertibility unitsState = extractConvertibility(this->conversionRate_.source, + this->conversionRate_.target, ratesInfo, status); if (U_FAILURE(status)) return; if (unitsState == Convertibility::UNCONVERTIBLE) { status = U_INTERNAL_PROGRAM_ERROR; @@ -500,9 +528,57 @@ UnitConverter::UnitConverter(const MeasureUnitImpl &source, const MeasureUnitImp loadConversionRate(conversionRate_, conversionRate_.source, conversionRate_.target, unitsState, ratesInfo, status); + } -double UnitConverter::convert(double inputValue) const { +int32_t UnitsConverter::compareTwoUnits(const MeasureUnitImpl &firstUnit, + const MeasureUnitImpl &secondUnit, + const ConversionRates &ratesInfo, UErrorCode &status) { + if (U_FAILURE(status)) { + return 0; + } + + if (firstUnit.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED || + secondUnit.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + + Convertibility unitsState = extractConvertibility(firstUnit, secondUnit, ratesInfo, status); + if (U_FAILURE(status)) { + return 0; + } + + if (unitsState == Convertibility::UNCONVERTIBLE || unitsState == Convertibility::RECIPROCAL) { + status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + + // Represents the conversion factor from the firstUnit to the base + // unit that specified in the conversion data which is considered as + // the root of the firstUnit and the secondUnit. + Factor firstUnitToBase = loadCompoundFactor(firstUnit, ratesInfo, status); + Factor secondUnitToBase = loadCompoundFactor(secondUnit, ratesInfo, status); + + firstUnitToBase.substituteConstants(); + secondUnitToBase.substituteConstants(); + + double firstUnitToBaseConversionRate = firstUnitToBase.factorNum / firstUnitToBase.factorDen; + double secondUnitToBaseConversionRate = secondUnitToBase.factorNum / secondUnitToBase.factorDen; + + double diff = firstUnitToBaseConversionRate - secondUnitToBaseConversionRate; + if (diff > 0) { + return 1; + } + + if (diff < 0) { + return -1; + } + + return 0; +} + +double UnitsConverter::convert(double inputValue) const { double result = inputValue + conversionRate_.sourceOffset; // Reset the input to the target zero index. // Convert the quantity to from the source scale to the target scale. @@ -523,7 +599,7 @@ double UnitConverter::convert(double inputValue) const { return result; } -double UnitConverter::convertInverse(double inputValue) const { +double UnitsConverter::convertInverse(double inputValue) const { double result = inputValue; if (conversionRate_.reciprocal) { if (result == 0) { @@ -540,6 +616,17 @@ double UnitConverter::convertInverse(double inputValue) const { return result; } +ConversionInfo UnitsConverter::getConversionInfo() const { + ConversionInfo result; + result.conversionRate = conversionRate_.factorNum / conversionRate_.factorDen; + result.offset = + (conversionRate_.sourceOffset * (conversionRate_.factorNum / conversionRate_.factorDen)) - + conversionRate_.targetOffset; + result.reciprocal = conversionRate_.reciprocal; + + return result; +} + } // namespace units U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/units_converter.h b/deps/icu-small/source/i18n/units_converter.h index 7650131b1f6975..1b83155a17b571 100644 --- a/deps/icu-small/source/i18n/units_converter.h +++ b/deps/icu-small/source/i18n/units_converter.h @@ -20,13 +20,16 @@ namespace units { /* Internal Structure */ +// Constants corresponding to unitConstants in CLDR's units.xml. enum Constants { - CONSTANT_FT2M, // ft2m stands for foot to meter. - CONSTANT_PI, // PI - CONSTANT_GRAVITY, // Gravity - CONSTANT_G, + CONSTANT_FT2M, // ft_to_m + CONSTANT_PI, // PI + CONSTANT_GRAVITY, // Gravity of earth (9.80665 m/s^2), "g". + CONSTANT_G, // Newtonian constant of gravitation, "G". CONSTANT_GAL_IMP2M3, // Gallon imp to m3 CONSTANT_LB2KG, // Pound to Kilogram + CONSTANT_GLUCOSE_MOLAR_MASS, + CONSTANT_ITEM_PER_MOLE, // Must be the last element. CONSTANTS_COUNT @@ -36,6 +39,7 @@ enum Constants { // resources file. A unit test checks that all constants in the resource // file are at least recognised by the code. Derived constants' values or // hard-coded derivations are not checked. +// In ICU4J, these constants live in UnitConverter.Factor.getConversionRate(). static const double constantsValues[CONSTANTS_COUNT] = { 0.3048, // CONSTANT_FT2M 411557987.0 / 131002976.0, // CONSTANT_PI @@ -43,6 +47,8 @@ static const double constantsValues[CONSTANTS_COUNT] = { 6.67408E-11, // CONSTANT_G 0.00454609, // CONSTANT_GAL_IMP2M3 0.45359237, // CONSTANT_LB2KG + 180.1557, // CONSTANT_GLUCOSE_MOLAR_MASS + 6.02214076E+23, // CONSTANT_ITEM_PER_MOLE }; typedef enum Signum { @@ -56,7 +62,9 @@ struct U_I18N_API Factor { double factorDen = 1; double offset = 0; bool reciprocal = false; - int32_t constants[CONSTANTS_COUNT] = {}; + + // Exponents for the symbolic constants + int32_t constantExponents[CONSTANTS_COUNT] = {}; void multiplyBy(const Factor &rhs); void divideBy(const Factor &rhs); @@ -64,14 +72,22 @@ struct U_I18N_API Factor { // Apply the power to the factor. void power(int32_t power); - // Flip the `Factor`, for example, factor= 2/3, flippedFactor = 3/2 - void flip(); + // Apply SI or binary prefix to the Factor. + void applyPrefix(UMeasurePrefix unitPrefix); - // Apply SI prefix to the `Factor` - void applySiPrefix(UMeasureSIPrefix siPrefix); + // Does an in-place substition of the "symbolic constants" based on + // constantExponents (resetting the exponents). + // + // In ICU4J, see UnitConverter.Factor.getConversionRate(). void substituteConstants(); }; +struct U_I18N_API ConversionInfo { + double conversionRate; + double offset; + bool reciprocal; +}; + /* * Adds a single factor element to the `Factor`. e.g "ft3m", "2.333" or "cup2m3". But not "cup2m3^3". */ @@ -127,8 +143,22 @@ Convertibility U_I18N_API extractConvertibility(const MeasureUnitImpl &source, * Only works with SINGLE and COMPOUND units. If one of the units is a * MIXED unit, an error will occur. For more information, see UMeasureUnitComplexity. */ -class U_I18N_API UnitConverter : public UMemory { +class U_I18N_API UnitsConverter : public UMemory { public: + /** + * Constructor of `UnitConverter`. + * NOTE: + * - source and target must be under the same category + * - e.g. meter to mile --> both of them are length units. + * NOTE: + * This constructor creates an instance of `ConversionRates` internally. + * + * @param sourceIdentifier represents the source unit identifier. + * @param targetIdentifier represents the target unit identifier. + * @param status + */ + UnitsConverter(StringPiece sourceIdentifier, StringPiece targetIdentifier, UErrorCode &status); + /** * Constructor of `UnitConverter`. * NOTE: @@ -140,9 +170,19 @@ class U_I18N_API UnitConverter : public UMemory { * @param ratesInfo Contains all the needed conversion rates. * @param status */ - UnitConverter(const MeasureUnitImpl &source, const MeasureUnitImpl &target, + UnitsConverter(const MeasureUnitImpl &source, const MeasureUnitImpl &target, const ConversionRates &ratesInfo, UErrorCode &status); + /** + * Compares two single units and returns 1 if the first one is greater, -1 if the second + * one is greater and 0 if they are equal. + * + * NOTE: + * Compares only single units that are convertible. + */ + static int32_t compareTwoUnits(const MeasureUnitImpl &firstUnit, const MeasureUnitImpl &SecondUnit, + const ConversionRates &ratesInfo, UErrorCode &status); + /** * Convert a measurement expressed in the source unit to a measurement * expressed in the target unit. @@ -161,8 +201,15 @@ class U_I18N_API UnitConverter : public UMemory { */ double convertInverse(double inputValue) const; + ConversionInfo getConversionInfo() const; + private: ConversionRate conversionRate_; + + /** + * Initialises the object. + */ + void init(const ConversionRates &ratesInfo, UErrorCode &status); }; } // namespace units diff --git a/deps/icu-small/source/i18n/units_data.cpp b/deps/icu-small/source/i18n/units_data.cpp index 42bd6248b0b26d..61f537479fa908 100644 --- a/deps/icu-small/source/i18n/units_data.cpp +++ b/deps/icu-small/source/i18n/units_data.cpp @@ -282,6 +282,10 @@ int32_t getPreferenceMetadataIndex(const MaybeStackVector= 0) { return idx; } if (!foundCategory) { + // TODO: failures can happen if units::getUnitCategory returns a category + // that does not appear in unitPreferenceData. Do we want a unit test that + // checks unitPreferenceData has full coverage of categories? Or just trust + // CLDR? status = U_ILLEGAL_ARGUMENT_ERROR; return -1; } @@ -360,29 +364,6 @@ int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, b return cmp; } -CharString U_I18N_API getUnitCategory(const char *baseUnitIdentifier, UErrorCode &status) { - CharString result; - LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); - LocalUResourceBundlePointer unitQuantities( - ures_getByKey(unitsBundle.getAlias(), "unitQuantities", NULL, &status)); - int32_t categoryLength; - if (U_FAILURE(status)) { return result; } - const UChar *uCategory = - ures_getStringByKey(unitQuantities.getAlias(), baseUnitIdentifier, &categoryLength, &status); - if (U_FAILURE(status)) { - // TODO(CLDR-13787,hugovdm): special-casing the consumption-inverse - // case. Once CLDR-13787 is clarified, this should be generalised (or - // possibly removed): - if (uprv_strcmp(baseUnitIdentifier, "meter-per-cubic-meter") == 0) { - status = U_ZERO_ERROR; - result.append("consumption-inverse", status); - return result; - } - } - result.appendInvariantChars(uCategory, categoryLength, status); - return result; -} - // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace? void U_I18N_API getAllConversionRates(MaybeStackVector &result, UErrorCode &status) { LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); @@ -415,7 +396,11 @@ void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringP const UnitPreference *const *&outPreferences, int32_t &preferenceCount, UErrorCode &status) const { int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status); - if (U_FAILURE(status)) { return; } + if (U_FAILURE(status)) { + outPreferences = nullptr; + preferenceCount = 0; + return; + } U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`. const UnitPreferenceMetadata *m = metadata_[idx]; outPreferences = unitPrefs_.getAlias() + m->prefsOffset; diff --git a/deps/icu-small/source/i18n/units_data.h b/deps/icu-small/source/i18n/units_data.h index b6fe8e88de3c2e..2c19b9434bd02b 100644 --- a/deps/icu-small/source/i18n/units_data.h +++ b/deps/icu-small/source/i18n/units_data.h @@ -17,22 +17,6 @@ U_NAMESPACE_BEGIN namespace units { -/** - * Looks up the unit category of a base unit identifier. - * - * Only supports base units, other units must be resolved to base units before - * passing to this function. - * - * Categories are found in `unitQuantities` in the `units` resource (see - * `units.txt`). - * - * TODO(hugovdm): if we give units_data.cpp access to the functionality of - * `extractCompoundBaseUnit` which is currently in units_converter.cpp, we could - * support all units for which there is a category. Does it make sense to move - * that function to units_data.cpp? - */ -CharString U_I18N_API getUnitCategory(const char *baseUnitIdentifier, UErrorCode &status); - /** * Encapsulates "convertUnits" information from units resources, specifying how * to convert from one unit to another. diff --git a/deps/icu-small/source/i18n/units_router.cpp b/deps/icu-small/source/i18n/units_router.cpp index 3158718fd22fba..51f66bfa892be7 100644 --- a/deps/icu-small/source/i18n/units_router.cpp +++ b/deps/icu-small/source/i18n/units_router.cpp @@ -43,8 +43,23 @@ Precision UnitsRouter::parseSkeletonToPrecision(icu::UnicodeString precisionSkel return result; } -UnitsRouter::UnitsRouter(MeasureUnit inputUnit, StringPiece region, StringPiece usage, +UnitsRouter::UnitsRouter(StringPiece inputUnitIdentifier, StringPiece region, StringPiece usage, UErrorCode &status) { + this->init(MeasureUnit::forIdentifier(inputUnitIdentifier, status), region, usage, status); +} + +UnitsRouter::UnitsRouter(const MeasureUnit &inputUnit, StringPiece region, StringPiece usage, + UErrorCode &status) { + this->init(std::move(inputUnit), region, usage, status); +} + +void UnitsRouter::init(const MeasureUnit &inputUnit, StringPiece region, StringPiece usage, + UErrorCode &status) { + + if (U_FAILURE(status)) { + return; + } + // TODO: do we want to pass in ConversionRates and UnitPreferences instead // of loading in each UnitsRouter instance? (Or make global?) ConversionRates conversionRates(status); @@ -53,13 +68,18 @@ UnitsRouter::UnitsRouter(MeasureUnit inputUnit, StringPiece region, StringPiece MeasureUnitImpl inputUnitImpl = MeasureUnitImpl::forMeasureUnitMaybeCopy(inputUnit, status); MeasureUnit baseUnit = (extractCompoundBaseUnit(inputUnitImpl, conversionRates, status)).build(status); - CharString category = getUnitCategory(baseUnit.getIdentifier(), status); + CharString category = getUnitQuantity(baseUnit.getIdentifier(), status); + if (U_FAILURE(status)) { + return; + } const UnitPreference *const *unitPreferences; - int32_t preferencesCount; - prefs.getPreferencesFor(category.data(), usage, region, unitPreferences, preferencesCount, status); + int32_t preferencesCount = 0; + prefs.getPreferencesFor(category.toStringPiece(), usage, region, unitPreferences, preferencesCount, + status); for (int i = 0; i < preferencesCount; ++i) { + U_ASSERT(unitPreferences[i] != nullptr); const auto &preference = *unitPreferences[i]; MeasureUnitImpl complexTargetUnitImpl = diff --git a/deps/icu-small/source/i18n/units_router.h b/deps/icu-small/source/i18n/units_router.h index bd7a93d2d8c531..c6e4e4f5288363 100644 --- a/deps/icu-small/source/i18n/units_router.h +++ b/deps/icu-small/source/i18n/units_router.h @@ -120,7 +120,9 @@ namespace units { */ class U_I18N_API UnitsRouter { public: - UnitsRouter(MeasureUnit inputUnit, StringPiece locale, StringPiece usage, UErrorCode &status); + UnitsRouter(StringPiece inputUnitIdentifier, StringPiece locale, StringPiece usage, + UErrorCode &status); + UnitsRouter(const MeasureUnit &inputUnit, StringPiece locale, StringPiece usage, UErrorCode &status); /** * Performs locale and usage sensitive unit conversion. @@ -152,6 +154,8 @@ class U_I18N_API UnitsRouter { static number::Precision parseSkeletonToPrecision(icu::UnicodeString precisionSkeleton, UErrorCode &status); + + void init(const MeasureUnit &inputUnit, StringPiece locale, StringPiece usage, UErrorCode &status); }; } // namespace units diff --git a/deps/icu-small/source/i18n/usearch.cpp b/deps/icu-small/source/i18n/usearch.cpp index 1b22e201e53294..47da3d7364de3f 100644 --- a/deps/icu-small/source/i18n/usearch.cpp +++ b/deps/icu-small/source/i18n/usearch.cpp @@ -4061,7 +4061,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, // * do NOT require that match limit be on a breakIter boundary // Advance the match end position to the first acceptable match boundary. - // This advances the index over any combining charcters. + // This advances the index over any combining characters. mLimit = maxLimit; if (minLimit < maxLimit) { // When the last CE's low index is same with its high index, the CE is likely diff --git a/deps/icu-small/source/i18n/uspoof_conf.cpp b/deps/icu-small/source/i18n/uspoof_conf.cpp index 672b3e0a6c8023..f3f649d5d2c259 100644 --- a/deps/icu-small/source/i18n/uspoof_conf.cpp +++ b/deps/icu-small/source/i18n/uspoof_conf.cpp @@ -15,7 +15,7 @@ * created on: 2009Jan05 (refactoring earlier files) * created by: Andy Heninger * -* Internal classes for compililing confusable data into its binary (runtime) form. +* Internal classes for compiling confusable data into its binary (runtime) form. */ #include "unicode/utypes.h" diff --git a/deps/icu-small/source/i18n/uspoof_impl.h b/deps/icu-small/source/i18n/uspoof_impl.h index b0bd8cefc82ddc..e825f343da1915 100644 --- a/deps/icu-small/source/i18n/uspoof_impl.h +++ b/deps/icu-small/source/i18n/uspoof_impl.h @@ -157,7 +157,7 @@ class CheckResult : public UObject, // // String Table: // The strings table contains all of the value strings (those of length two or greater) -// concatentated together into one long UChar (UTF-16) array. +// concatenated together into one long UChar (UTF-16) array. // // There is no nul character or other mark between adjacent strings. // diff --git a/deps/icu-small/source/i18n/vtzone.cpp b/deps/icu-small/source/i18n/vtzone.cpp index aa2e1763966710..94fa6a4158ae1d 100644 --- a/deps/icu-small/source/i18n/vtzone.cpp +++ b/deps/icu-small/source/i18n/vtzone.cpp @@ -376,7 +376,7 @@ static void getDefaultTZName(const UnicodeString &tzid, UBool isDST, UnicodeStri * dow day of week in BYDAY, or 0 when not found * wim day of week ordinal number in BYDAY, or 0 when not found * dom an array of day of month - * domCount number of availble days in dom (domCount is specifying the size of dom on input) + * domCount number of available days in dom (domCount is specifying the size of dom on input) * until time defined by UNTIL attribute or MIN_MILLIS if not available */ static void parseRRULE(const UnicodeString& rrule, int32_t& month, int32_t& dow, int32_t& wim, @@ -1217,6 +1217,12 @@ VTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffset, return tz->getOffset(date, local, rawOffset, dstOffset, status); } +void VTimeZone::getOffsetFromLocal(UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const { + tz->getOffsetFromLocal(date, nonExistingTimeOpt, duplicatedTimeOpt, rawOffset, dstOffset, status); +} + void VTimeZone::setRawOffset(int32_t offsetMillis) { tz->setRawOffset(offsetMillis); @@ -2649,7 +2655,7 @@ VTimeZone::endZoneProps(VTZWriter& writer, UBool isDst, UErrorCode& status) cons } /* - * Write the beggining part of RRULE line + * Write the beginning part of RRULE line */ void VTimeZone::beginRRULE(VTZWriter& writer, int32_t month, UErrorCode& status) const { diff --git a/deps/icu-small/source/i18n/vzone.h b/deps/icu-small/source/i18n/vzone.h index 9c83c1b7e660cb..2e8433908c0330 100644 --- a/deps/icu-small/source/i18n/vzone.h +++ b/deps/icu-small/source/i18n/vzone.h @@ -136,7 +136,7 @@ U_CAPI void U_EXPORT2 vzone_write(VZone* zone, UChar* & result, int32_t & resultLength, UErrorCode& status); /** - * Writes RFC2445 VTIMEZONE data for this time zone applicalbe + * Writes RFC2445 VTIMEZONE data for this time zone applicable * for dates after the specified start time. * @param zone, the vzone to use * @param start The start date. @@ -148,7 +148,7 @@ U_CAPI void U_EXPORT2 vzone_writeFromStart(VZone* zone, UDate start, UChar* & result, int32_t & resultLength, UErrorCode& status); /** - * Writes RFC2445 VTIMEZONE data applicalbe for the specified date. + * Writes RFC2445 VTIMEZONE data applicable for the specified date. * Some common iCalendar implementations can only handle a single time * zone property or a pair of standard and daylight time properties using * BYDAY rule with day of week (such as BYDAY=1SUN). This method produce diff --git a/deps/icu-small/source/i18n/windtfmt.cpp b/deps/icu-small/source/i18n/windtfmt.cpp index c35adc09875724..b5cb8f367b662c 100644 --- a/deps/icu-small/source/i18n/windtfmt.cpp +++ b/deps/icu-small/source/i18n/windtfmt.cpp @@ -193,6 +193,7 @@ Win32DateFormat::~Win32DateFormat() Win32DateFormat &Win32DateFormat::operator=(const Win32DateFormat &other) { + if (this == &other) { return *this; } // self-assignment: no-op // The following handles fCalendar DateFormat::operator=(other); diff --git a/deps/icu-small/source/i18n/winnmfmt.cpp b/deps/icu-small/source/i18n/winnmfmt.cpp index 1ae2310123a252..2e44631c85074e 100644 --- a/deps/icu-small/source/i18n/winnmfmt.cpp +++ b/deps/icu-small/source/i18n/winnmfmt.cpp @@ -268,6 +268,7 @@ Win32NumberFormat::~Win32NumberFormat() Win32NumberFormat &Win32NumberFormat::operator=(const Win32NumberFormat &other) { + if (this == &other) { return *this; } // self-assignment: no-op NumberFormat::operator=(other); this->fCurrency = other.fCurrency; diff --git a/deps/icu-small/source/i18n/wintzimpl.cpp b/deps/icu-small/source/i18n/wintzimpl.cpp index c55ed95fa8aea2..211d3564029db8 100644 --- a/deps/icu-small/source/i18n/wintzimpl.cpp +++ b/deps/icu-small/source/i18n/wintzimpl.cpp @@ -145,7 +145,7 @@ static UBool getWindowsTimeZoneInfo(TIME_ZONE_INFORMATION *zoneInfo, const UChar } /* - * Given the timezone icuid, fill in zoneInfo by calling auxillary functions that creates a timezone and extract the + * Given the timezone icuid, fill in zoneInfo by calling auxiliary functions that creates a timezone and extract the * information to put into zoneInfo. This includes bias and standard time date and daylight saving date. */ U_CAPI UBool U_EXPORT2 diff --git a/deps/icu-small/source/i18n/zonemeta.h b/deps/icu-small/source/i18n/zonemeta.h index 58724ea3b7d2e4..a5a446d8ccdad9 100644 --- a/deps/icu-small/source/i18n/zonemeta.h +++ b/deps/icu-small/source/i18n/zonemeta.h @@ -50,7 +50,7 @@ class U_I18N_API ZoneMeta { static const UChar* U_EXPORT2 getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status); /* - * Conveninent method returning CLDR canonical ID for the given time zone + * Convenient method returning CLDR canonical ID for the given time zone */ static const UChar* U_EXPORT2 getCanonicalCLDRID(const TimeZone& tz); diff --git a/deps/icu-small/source/tools/genrb/parse.cpp b/deps/icu-small/source/tools/genrb/parse.cpp index 18a8c76dbc5df1..5abe5d656733f9 100644 --- a/deps/icu-small/source/tools/genrb/parse.cpp +++ b/deps/icu-small/source/tools/genrb/parse.cpp @@ -1191,7 +1191,7 @@ realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t star if (token == TOK_CLOSE_BRACE) { - if (!readToken) { + if (!readToken && isVerbose()) { warning(startline, "Encountered empty table"); } return table; diff --git a/deps/icu-small/source/tools/genrb/prscmnts.cpp b/deps/icu-small/source/tools/genrb/prscmnts.cpp index 5d494cd9ad3f2b..d79d59e3916bc5 100644 --- a/deps/icu-small/source/tools/genrb/prscmnts.cpp +++ b/deps/icu-small/source/tools/genrb/prscmnts.cpp @@ -78,7 +78,7 @@ trim(UChar *src, int32_t srcLen, UErrorCode *status){ U_CFUNC int32_t removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ srcLen = trim(source, srcLen, status); - UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line + UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the beginning of the line srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; } diff --git a/deps/icu-small/source/tools/genrb/reslist.cpp b/deps/icu-small/source/tools/genrb/reslist.cpp index 4bc6adc128ecc3..389ad19b38b88f 100644 --- a/deps/icu-small/source/tools/genrb/reslist.cpp +++ b/deps/icu-small/source/tools/genrb/reslist.cpp @@ -340,7 +340,8 @@ IntResource::~IntResource() {} IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag, const UString* comment, UErrorCode &errorCode) : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode), - fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) { + fCount(0), fSize(RESLIST_INT_VECTOR_INIT_SIZE), + fArray(new uint32_t[fSize]) { if (fArray == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return; @@ -352,6 +353,17 @@ IntVectorResource::~IntVectorResource() { } void IntVectorResource::add(int32_t value, UErrorCode &errorCode) { + if (fCount == fSize) { + uint32_t* tmp = new uint32_t[2 * fSize]; + if (tmp == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(tmp, fArray, fSize * sizeof(uint32_t)); + delete[] fArray; + fArray = tmp; + fSize *= 2; + } if (U_SUCCESS(errorCode)) { fArray[fCount++] = value; } diff --git a/deps/icu-small/source/tools/genrb/reslist.h b/deps/icu-small/source/tools/genrb/reslist.h index e7b10fa0961d18..400f0a97debea4 100644 --- a/deps/icu-small/source/tools/genrb/reslist.h +++ b/deps/icu-small/source/tools/genrb/reslist.h @@ -21,7 +21,7 @@ #define RESLIST_H #define KEY_SPACE_SIZE 65536 -#define RESLIST_MAX_INT_VECTOR 2048 +#define RESLIST_INT_VECTOR_INIT_SIZE 2048 #include @@ -405,7 +405,8 @@ class IntVectorResource : public SResource { virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset); // TODO: UVector32 - uint32_t fCount; + size_t fCount; + size_t fSize; uint32_t *fArray; }; diff --git a/deps/icu-small/source/tools/toolutil/ucbuf.cpp b/deps/icu-small/source/tools/toolutil/ucbuf.cpp index 9b5e615d258c92..195963c78c5af9 100644 --- a/deps/icu-small/source/tools/toolutil/ucbuf.cpp +++ b/deps/icu-small/source/tools/toolutil/ucbuf.cpp @@ -531,14 +531,14 @@ ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffere /* TODO: this method will fail if at the - * begining of buffer and the uchar to unget + * beginning of buffer and the uchar to unget * is from the previous buffer. Need to implement * system to take care of that situation. */ U_CAPI void U_EXPORT2 ucbuf_ungetc(int32_t c,UCHARBUF* buf){ /* decrement currentPos pointer - * if not at the begining of buffer + * if not at the beginning of buffer */ if(buf->currentPos!=buf->buffer){ if(*(buf->currentPos-1)==c){ @@ -736,7 +736,7 @@ ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ } } /* - * Accoding to TR 13 readLine functions must interpret + * According to TR 13 readLine functions must interpret * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators */ /* Windows CR LF */ diff --git a/deps/icu-small/source/tools/toolutil/ucbuf.h b/deps/icu-small/source/tools/toolutil/ucbuf.h index 7a9b7af5cc71ac..a854150bfba95b 100644 --- a/deps/icu-small/source/tools/toolutil/ucbuf.h +++ b/deps/icu-small/source/tools/toolutil/ucbuf.h @@ -49,7 +49,7 @@ struct ULine { * Opens the UCHARBUF with the given file stream and code page for conversion * @param fileName Name of the file to open. * @param codepage The encoding of the file stream to convert to Unicode. - * If *codepoge is NULL on input the API will try to autodetect + * If *codepage is NULL on input the API will try to autodetect * popular Unicode encodings * @param showWarning Flag to print out warnings to STDOUT * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads diff --git a/deps/icu-small/source/tools/toolutil/xmlparser.cpp b/deps/icu-small/source/tools/toolutil/xmlparser.cpp index e3d5b42ef47a73..d421a79ead6f2b 100644 --- a/deps/icu-small/source/tools/toolutil/xmlparser.cpp +++ b/deps/icu-small/source/tools/toolutil/xmlparser.cpp @@ -79,7 +79,7 @@ UXMLParser::UXMLParser(UErrorCode &status) : // or " // TODO: we don't actually parse the DOCTYPE or internal subsets. // Some internal dtd subsets could confuse this simple-minded - // attempt at skipping over them, specifically, occcurences + // attempt at skipping over them, specifically, occurrences // of closeing square brackets. These could appear in comments, // or in parameter entity declarations, for example. mXMLDoctype(UnicodeString( @@ -243,7 +243,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { UnicodeString attValue = mAttrValue.group(2, errorCode); // Trim the quotes from the att value. These are left over from the original regex - // that parsed the attribue, which couldn't conveniently strip them. + // that parsed the attribute, which couldn't conveniently strip them. attValue.remove(0,1); // one char from the beginning attValue.truncate(attValue.length()-1); // and one from the end. @@ -498,7 +498,7 @@ UXMLParser::createElement(RegexMatcher &mEl, UErrorCode &status) { UnicodeString attValue = mAttrValue.group(2, status); // Trim the quotes from the att value. These are left over from the original regex - // that parsed the attribue, which couldn't conveniently strip them. + // that parsed the attribute, which couldn't conveniently strip them. attValue.remove(0,1); // one char from the beginning attValue.truncate(attValue.length()-1); // and one from the end. @@ -658,7 +658,7 @@ UXMLParser::intern(const UnicodeString &s, UErrorCode &errorCode) { return (const UnicodeString *)he->key.pointer; } else { // add this new name and return its hashed key pointer - fNames.puti(s, 0, errorCode); + fNames.puti(s, 1, errorCode); he=fNames.find(s); return (const UnicodeString *)he->key.pointer; } diff --git a/tools/icu/current_ver.dep b/tools/icu/current_ver.dep index 1c7f45879621c0..4ab415d8757b24 100644 --- a/tools/icu/current_ver.dep +++ b/tools/icu/current_ver.dep @@ -1,6 +1,6 @@ [ { - "url": "/~https://github.com/unicode-org/icu/releases/download/release-68-2/icu4c-68_2-src.tgz", - "md5": "c21cbdfe31a1e325afe765a16f907d20" + "url": "/~https://github.com/unicode-org/icu/releases/download/release-69-1/icu4c-69_1-src.tgz", + "md5": "9403db682507369d0f60a25ea67014c4" } ]