Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/5.18.2/unicore/Name.pm |
Statements | Executed 25 statements in 278µs |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 16µs | 19µs | BEGIN@394 | charnames::
0 | 0 | 0 | 0s | 0s | code_point_to_name_special | charnames::
0 | 0 | 0 | 0s | 0s | name_to_code_point_special | charnames::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! | ||||
2 | # This file is machine-generated by lib/unicore/mktables from the Unicode | ||||
3 | # database, Version 6.2.0. Any changes made here will be lost! | ||||
4 | |||||
5 | |||||
6 | # !!!!!!! INTERNAL PERL USE ONLY !!!!!!! | ||||
7 | # This file is for internal use by core Perl only. The format and even the | ||||
8 | # name or existence of this file are subject to change without notice. Don't | ||||
9 | # use it directly. | ||||
10 | |||||
11 | |||||
12 | package charnames; | ||||
13 | |||||
14 | # This module contains machine-generated tables and code for the | ||||
15 | # algorithmically-determinable Unicode character names. The following | ||||
16 | # routines can be used to translate between name and code point and vice versa | ||||
17 | |||||
18 | { # Closure | ||||
19 | |||||
20 | # Matches legal code point. 4-6 hex numbers, If there are 6, the first | ||||
21 | # two must be 10; if there are 5, the first must not be a 0. Written this | ||||
22 | # way to decrease backtracking. The first regex allows the code point to | ||||
23 | # be at the end of a word, but to work properly, the word shouldn't end | ||||
24 | # with a valid hex character. The second one won't match a code point at | ||||
25 | # the end of a word, and doesn't have the run-on issue | ||||
26 | 2 | 18µs | 1 | 2µs | my $run_on_code_point_re = qr/(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b)/; # spent 2µs making 1 call to charnames::CORE:qr |
27 | 1 | 4µs | 1 | 1µs | my $code_point_re = qr/(?^aa:\b(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b))/; # spent 1µs making 1 call to charnames::CORE:qr |
28 | |||||
29 | # In the following hash, the keys are the bases of names which include | ||||
30 | # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The value | ||||
31 | # of each key is another hash which is used to get the low and high ends | ||||
32 | # for each range of code points that apply to the name. | ||||
33 | 1 | 4µs | my %names_ending_in_code_point = ( | ||
34 | 'CJK COMPATIBILITY IDEOGRAPH' => | ||||
35 | { | ||||
36 | 'high' => | ||||
37 | [ | ||||
38 | 64109, | ||||
39 | 64217, | ||||
40 | 195101, | ||||
41 | ], | ||||
42 | 'low' => | ||||
43 | [ | ||||
44 | 63744, | ||||
45 | 64112, | ||||
46 | 194560, | ||||
47 | ], | ||||
48 | }, | ||||
49 | 'CJK UNIFIED IDEOGRAPH' => | ||||
50 | { | ||||
51 | 'high' => | ||||
52 | [ | ||||
53 | 19893, | ||||
54 | 40908, | ||||
55 | 173782, | ||||
56 | 177972, | ||||
57 | 178205, | ||||
58 | ], | ||||
59 | 'low' => | ||||
60 | [ | ||||
61 | 13312, | ||||
62 | 19968, | ||||
63 | 131072, | ||||
64 | 173824, | ||||
65 | 177984, | ||||
66 | ], | ||||
67 | }, | ||||
68 | |||||
69 | ); | ||||
70 | |||||
71 | # The following hash is a copy of the previous one, except is for loose | ||||
72 | # matching, so each name has blanks and dashes squeezed out | ||||
73 | 1 | 3µs | my %loose_names_ending_in_code_point = ( | ||
74 | 'CJKCOMPATIBILITYIDEOGRAPH' => | ||||
75 | { | ||||
76 | 'high' => | ||||
77 | [ | ||||
78 | 64109, | ||||
79 | 64217, | ||||
80 | 195101, | ||||
81 | ], | ||||
82 | 'low' => | ||||
83 | [ | ||||
84 | 63744, | ||||
85 | 64112, | ||||
86 | 194560, | ||||
87 | ], | ||||
88 | }, | ||||
89 | 'CJKUNIFIEDIDEOGRAPH' => | ||||
90 | { | ||||
91 | 'high' => | ||||
92 | [ | ||||
93 | 19893, | ||||
94 | 40908, | ||||
95 | 173782, | ||||
96 | 177972, | ||||
97 | 178205, | ||||
98 | ], | ||||
99 | 'low' => | ||||
100 | [ | ||||
101 | 13312, | ||||
102 | 19968, | ||||
103 | 131072, | ||||
104 | 173824, | ||||
105 | 177984, | ||||
106 | ], | ||||
107 | }, | ||||
108 | |||||
109 | ); | ||||
110 | |||||
111 | # And the following array gives the inverse mapping from code points to | ||||
112 | # names. Lowest code points are first | ||||
113 | 1 | 6µs | my @code_points_ending_in_code_point = ( | ||
114 | |||||
115 | { | ||||
116 | 'high' => 19893, | ||||
117 | 'low' => 13312, | ||||
118 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
119 | }, | ||||
120 | { | ||||
121 | 'high' => 40908, | ||||
122 | 'low' => 19968, | ||||
123 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
124 | }, | ||||
125 | { | ||||
126 | 'high' => 64109, | ||||
127 | 'low' => 63744, | ||||
128 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
129 | }, | ||||
130 | { | ||||
131 | 'high' => 64217, | ||||
132 | 'low' => 64112, | ||||
133 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
134 | }, | ||||
135 | { | ||||
136 | 'high' => 173782, | ||||
137 | 'low' => 131072, | ||||
138 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
139 | }, | ||||
140 | { | ||||
141 | 'high' => 177972, | ||||
142 | 'low' => 173824, | ||||
143 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
144 | }, | ||||
145 | { | ||||
146 | 'high' => 178205, | ||||
147 | 'low' => 177984, | ||||
148 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
149 | }, | ||||
150 | { | ||||
151 | 'high' => 195101, | ||||
152 | 'low' => 194560, | ||||
153 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
154 | }, | ||||
155 | , | ||||
156 | |||||
157 | ); | ||||
158 | |||||
159 | # Convert from code point to Jamo short name for use in composing Hangul | ||||
160 | # syllable names | ||||
161 | 1 | 41µs | my %Jamo = ( | ||
162 | 4352 => 'G', | ||||
163 | 4353 => 'GG', | ||||
164 | 4354 => 'N', | ||||
165 | 4355 => 'D', | ||||
166 | 4356 => 'DD', | ||||
167 | 4357 => 'R', | ||||
168 | 4358 => 'M', | ||||
169 | 4359 => 'B', | ||||
170 | 4360 => 'BB', | ||||
171 | 4361 => 'S', | ||||
172 | 4362 => 'SS', | ||||
173 | 4363 => '', | ||||
174 | 4364 => 'J', | ||||
175 | 4365 => 'JJ', | ||||
176 | 4366 => 'C', | ||||
177 | 4367 => 'K', | ||||
178 | 4368 => 'T', | ||||
179 | 4369 => 'P', | ||||
180 | 4370 => 'H', | ||||
181 | 4449 => 'A', | ||||
182 | 4450 => 'AE', | ||||
183 | 4451 => 'YA', | ||||
184 | 4452 => 'YAE', | ||||
185 | 4453 => 'EO', | ||||
186 | 4454 => 'E', | ||||
187 | 4455 => 'YEO', | ||||
188 | 4456 => 'YE', | ||||
189 | 4457 => 'O', | ||||
190 | 4458 => 'WA', | ||||
191 | 4459 => 'WAE', | ||||
192 | 4460 => 'OE', | ||||
193 | 4461 => 'YO', | ||||
194 | 4462 => 'U', | ||||
195 | 4463 => 'WEO', | ||||
196 | 4464 => 'WE', | ||||
197 | 4465 => 'WI', | ||||
198 | 4466 => 'YU', | ||||
199 | 4467 => 'EU', | ||||
200 | 4468 => 'YI', | ||||
201 | 4469 => 'I', | ||||
202 | 4520 => 'G', | ||||
203 | 4521 => 'GG', | ||||
204 | 4522 => 'GS', | ||||
205 | 4523 => 'N', | ||||
206 | 4524 => 'NJ', | ||||
207 | 4525 => 'NH', | ||||
208 | 4526 => 'D', | ||||
209 | 4527 => 'L', | ||||
210 | 4528 => 'LG', | ||||
211 | 4529 => 'LM', | ||||
212 | 4530 => 'LB', | ||||
213 | 4531 => 'LS', | ||||
214 | 4532 => 'LT', | ||||
215 | 4533 => 'LP', | ||||
216 | 4534 => 'LH', | ||||
217 | 4535 => 'M', | ||||
218 | 4536 => 'B', | ||||
219 | 4537 => 'BS', | ||||
220 | 4538 => 'S', | ||||
221 | 4539 => 'SS', | ||||
222 | 4540 => 'NG', | ||||
223 | 4541 => 'J', | ||||
224 | 4542 => 'C', | ||||
225 | 4543 => 'K', | ||||
226 | 4544 => 'T', | ||||
227 | 4545 => 'P', | ||||
228 | 4546 => 'H', | ||||
229 | |||||
230 | ); | ||||
231 | |||||
232 | # Leading consonant (can be null) | ||||
233 | 1 | 9µs | my %Jamo_L = ( | ||
234 | '' => 11, | ||||
235 | 'B' => 7, | ||||
236 | 'BB' => 8, | ||||
237 | 'C' => 14, | ||||
238 | 'D' => 3, | ||||
239 | 'DD' => 4, | ||||
240 | 'G' => 0, | ||||
241 | 'GG' => 1, | ||||
242 | 'H' => 18, | ||||
243 | 'J' => 12, | ||||
244 | 'JJ' => 13, | ||||
245 | 'K' => 15, | ||||
246 | 'M' => 6, | ||||
247 | 'N' => 2, | ||||
248 | 'P' => 17, | ||||
249 | 'R' => 5, | ||||
250 | 'S' => 9, | ||||
251 | 'SS' => 10, | ||||
252 | 'T' => 16, | ||||
253 | |||||
254 | ); | ||||
255 | |||||
256 | # Vowel | ||||
257 | 1 | 7µs | my %Jamo_V = ( | ||
258 | 'A' => 0, | ||||
259 | 'AE' => 1, | ||||
260 | 'E' => 5, | ||||
261 | 'EO' => 4, | ||||
262 | 'EU' => 18, | ||||
263 | 'I' => 20, | ||||
264 | 'O' => 8, | ||||
265 | 'OE' => 11, | ||||
266 | 'U' => 13, | ||||
267 | 'WA' => 9, | ||||
268 | 'WAE' => 10, | ||||
269 | 'WE' => 15, | ||||
270 | 'WEO' => 14, | ||||
271 | 'WI' => 16, | ||||
272 | 'YA' => 2, | ||||
273 | 'YAE' => 3, | ||||
274 | 'YE' => 7, | ||||
275 | 'YEO' => 6, | ||||
276 | 'YI' => 19, | ||||
277 | 'YO' => 12, | ||||
278 | 'YU' => 17, | ||||
279 | |||||
280 | ); | ||||
281 | |||||
282 | # Optional trailing consonant | ||||
283 | 1 | 6µs | my %Jamo_T = ( | ||
284 | 'B' => 17, | ||||
285 | 'BS' => 18, | ||||
286 | 'C' => 23, | ||||
287 | 'D' => 7, | ||||
288 | 'G' => 1, | ||||
289 | 'GG' => 2, | ||||
290 | 'GS' => 3, | ||||
291 | 'H' => 27, | ||||
292 | 'J' => 22, | ||||
293 | 'K' => 24, | ||||
294 | 'L' => 8, | ||||
295 | 'LB' => 11, | ||||
296 | 'LG' => 9, | ||||
297 | 'LH' => 15, | ||||
298 | 'LM' => 10, | ||||
299 | 'LP' => 14, | ||||
300 | 'LS' => 12, | ||||
301 | 'LT' => 13, | ||||
302 | 'M' => 16, | ||||
303 | 'N' => 4, | ||||
304 | 'NG' => 21, | ||||
305 | 'NH' => 6, | ||||
306 | 'NJ' => 5, | ||||
307 | 'P' => 26, | ||||
308 | 'S' => 19, | ||||
309 | 'SS' => 20, | ||||
310 | 'T' => 25, | ||||
311 | |||||
312 | ); | ||||
313 | |||||
314 | # Computed re that splits up a Hangul name into LVT or LV syllables | ||||
315 | 1 | 5µs | 1 | 1µs | my $syllable_re = qr/(|B|BB|C|D|DD|G|GG|H|J|JJ|K|M|N|P|R|S|SS|T)(A|AE|E|EO|EU|I|O|OE|U|WA|WAE|WE|WEO|WI|YA|YAE|YE|YEO|YI|YO|YU)(B|BS|C|D|G|GG|GS|H|J|K|L|LB|LG|LH|LM|LP|LS|LT|M|N|NG|NH|NJ|P|S|SS|T)?/; # spent 1µs making 1 call to charnames::CORE:qr |
316 | |||||
317 | 1 | 300ns | my $HANGUL_SYLLABLE = "HANGUL SYLLABLE "; | ||
318 | 1 | 100ns | my $loose_HANGUL_SYLLABLE = "HANGULSYLLABLE"; | ||
319 | |||||
320 | # These constants names and values were taken from the Unicode standard, | ||||
321 | # version 5.1, section 3.12. They are used in conjunction with Hangul | ||||
322 | # syllables | ||||
323 | 1 | 100ns | my $SBase = 0xAC00; | ||
324 | 1 | 0s | my $LBase = 0x1100; | ||
325 | 1 | 0s | my $VBase = 0x1161; | ||
326 | 1 | 0s | my $TBase = 0x11A7; | ||
327 | 1 | 0s | my $SCount = 11172; | ||
328 | 1 | 100ns | my $LCount = 19; | ||
329 | 1 | 0s | my $VCount = 21; | ||
330 | 1 | 0s | my $TCount = 28; | ||
331 | 1 | 2µs | my $NCount = $VCount * $TCount; | ||
332 | |||||
333 | sub name_to_code_point_special { | ||||
334 | my ($name, $loose) = @_; | ||||
335 | |||||
336 | # Returns undef if not one of the specially handled names; otherwise | ||||
337 | # returns the code point equivalent to the input name | ||||
338 | # $loose is non-zero if to use loose matching, 'name' in that case | ||||
339 | # must be input as upper case with all blanks and dashes squeezed out. | ||||
340 | |||||
341 | if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//) | ||||
342 | || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//)) | ||||
343 | { | ||||
344 | return if $name !~ qr/^$syllable_re$/; | ||||
345 | my $L = $Jamo_L{$1}; | ||||
346 | my $V = $Jamo_V{$2}; | ||||
347 | my $T = (defined $3) ? $Jamo_T{$3} : 0; | ||||
348 | return ($L * $VCount + $V) * $TCount + $T + $SBase; | ||||
349 | } | ||||
350 | |||||
351 | # Name must end in 'code_point' for this to handle. | ||||
352 | return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x) | ||||
353 | || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x)); | ||||
354 | |||||
355 | my $base = $1; | ||||
356 | my $code_point = CORE::hex $2; | ||||
357 | my $names_ref; | ||||
358 | |||||
359 | if ($loose) { | ||||
360 | $names_ref = \%loose_names_ending_in_code_point; | ||||
361 | } | ||||
362 | else { | ||||
363 | return if $base !~ s/-$//; | ||||
364 | $names_ref = \%names_ending_in_code_point; | ||||
365 | } | ||||
366 | |||||
367 | # Name must be one of the ones which has the code point in it. | ||||
368 | return if ! $names_ref->{$base}; | ||||
369 | |||||
370 | # Look through the list of ranges that apply to this name to see if | ||||
371 | # the code point is in one of them. | ||||
372 | for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) { | ||||
373 | return if $names_ref->{$base}{'low'}->[$i] > $code_point; | ||||
374 | next if $names_ref->{$base}{'high'}->[$i] < $code_point; | ||||
375 | |||||
376 | # Here, the code point is in the range. | ||||
377 | return $code_point; | ||||
378 | } | ||||
379 | |||||
380 | # Here, looked like the name had a code point number in it, but | ||||
381 | # did not match one of the valid ones. | ||||
382 | return; | ||||
383 | } | ||||
384 | |||||
385 | sub code_point_to_name_special { | ||||
386 | my $code_point = shift; | ||||
387 | |||||
388 | # Returns the name of a code point if algorithmically determinable; | ||||
389 | # undef if not | ||||
390 | |||||
391 | # If in the Hangul range, calculate the name based on Unicode's | ||||
392 | # algorithm | ||||
393 | if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) { | ||||
394 | 2 | 141µs | 2 | 21µs | # spent 19µs (16+2) within charnames::BEGIN@394 which was called:
# once (16µs+2µs) by charnames::BEGIN@5 at line 394 # spent 19µs making 1 call to charnames::BEGIN@394
# spent 2µs making 1 call to integer::import |
395 | my $SIndex = $code_point - $SBase; | ||||
396 | my $L = $LBase + $SIndex / $NCount; | ||||
397 | my $V = $VBase + ($SIndex % $NCount) / $TCount; | ||||
398 | my $T = $TBase + $SIndex % $TCount; | ||||
399 | $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}"; | ||||
400 | $name .= $Jamo{$T} if $T != $TBase; | ||||
401 | return $name; | ||||
402 | } | ||||
403 | |||||
404 | # Look through list of these code points for one in range. | ||||
405 | foreach my $hash (@code_points_ending_in_code_point) { | ||||
406 | return if $code_point < $hash->{'low'}; | ||||
407 | if ($code_point <= $hash->{'high'}) { | ||||
408 | return sprintf("%s-%04X", $hash->{'name'}, $code_point); | ||||
409 | } | ||||
410 | } | ||||
411 | return; # None found | ||||
412 | } | ||||
413 | } # End closure | ||||
414 | |||||
415 | 1 | 34µs | 1; |