-
-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathproperties.go
208 lines (200 loc) · 3.43 KB
/
properties.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
package uniseg
// The Unicode properties as used in the various parsers. Only the ones needed
// in the context of this package are included.
const (
prXX = 0 // Same as prAny.
prAny = iota // prAny must be 0.
prPrepend // Grapheme properties must come first, to reduce the number of bits stored in the state vector.
prCR
prLF
prControl
prExtend
prRegionalIndicator
prSpacingMark
prL
prV
prT
prLV
prLVT
prZWJ
prExtendedPictographic
prNewline
prWSegSpace
prDoubleQuote
prSingleQuote
prMidNumLet
prNumeric
prMidLetter
prMidNum
prExtendNumLet
prALetter
prFormat
prHebrewLetter
prKatakana
prSp
prSTerm
prClose
prSContinue
prATerm
prUpper
prLower
prSep
prOLetter
prCM
prBA
prBK
prSP
prEX
prQU
prAL
prPR
prPO
prOP
prCP
prIS
prHY
prSY
prNU
prCL
prNL
prGL
prAI
prBB
prHL
prSA
prJL
prJV
prJT
prNS
prZW
prB2
prIN
prWJ
prID
prEB
prCJ
prH2
prH3
prSG
prCB
prRI
prEM
prN
prNa
prA
prW
prH
prF
prEmojiPresentation
)
// Unicode General Categories. Only the ones needed in the context of this
// package are included.
const (
gcNone = iota // gcNone must be 0.
gcCc
gcZs
gcPo
gcSc
gcPs
gcPe
gcSm
gcPd
gcNd
gcLu
gcSk
gcPc
gcLl
gcSo
gcLo
gcPi
gcCf
gcNo
gcPf
gcLC
gcLm
gcMn
gcMe
gcMc
gcNl
gcZl
gcZp
gcCn
gcCs
gcCo
)
// Special code points.
const (
vs15 = 0xfe0e // Variation Selector-15 (text presentation)
vs16 = 0xfe0f // Variation Selector-16 (emoji presentation)
)
// propertySearch performs a binary search on a property slice and returns the
// entry whose range (start = first array element, end = second array element)
// includes r, or an array of 0's if no such entry was found.
func propertySearch[E interface{ [3]int | [4]int }](dictionary []E, r rune) (result E) {
// Run a binary search.
from := 0
to := len(dictionary)
for to > from {
middle := (from + to) / 2
cpRange := dictionary[middle]
if int(r) < cpRange[0] {
to = middle
continue
}
if int(r) > cpRange[1] {
from = middle + 1
continue
}
return cpRange
}
return
}
// property returns the Unicode property value (see constants above) of the
// given code point.
func property(dictionary [][3]int, r rune) int {
return propertySearch(dictionary, r)[2]
}
// propertyLineBreak returns the Unicode property value and General Category
// (see constants above) of the given code point, as listed in the line break
// code points table, while fast tracking ASCII digits and letters.
func propertyLineBreak(r rune) (property, generalCategory int) {
if r >= 'a' && r <= 'z' {
return prAL, gcLl
}
if r >= 'A' && r <= 'Z' {
return prAL, gcLu
}
if r >= '0' && r <= '9' {
return prNU, gcNd
}
entry := propertySearch(lineBreakCodePoints, r)
return entry[2], entry[3]
}
// propertyGraphemes returns the Unicode grapheme cluster property value of the
// given code point while fast tracking ASCII characters.
func propertyGraphemes(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prAny
}
if r == 0x0a {
return prLF
}
if r == 0x0d {
return prCR
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prControl
}
return property(graphemeCodePoints, r)
}
// propertyEastAsianWidth returns the Unicode East Asian Width property value of
// the given code point while fast tracking ASCII characters.
func propertyEastAsianWidth(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prNa
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prN
}
return property(eastAsianWidth, r)
}