| Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/5.18.2/unicore/Name.pm |
| Statements | Executed 25 statements in 278µs |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 16µs | 19µs | charnames::BEGIN@394 |
| 0 | 0 | 0 | 0s | 0s | charnames::code_point_to_name_special |
| 0 | 0 | 0 | 0s | 0s | charnames::name_to_code_point_special |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! | ||||
| 2 | # This file is machine-generated by lib/unicore/mktables from the Unicode | ||||
| 3 | # database, Version 6.2.0. Any changes made here will be lost! | ||||
| 4 | |||||
| 5 | |||||
| 6 | # !!!!!!! INTERNAL PERL USE ONLY !!!!!!! | ||||
| 7 | # This file is for internal use by core Perl only. The format and even the | ||||
| 8 | # name or existence of this file are subject to change without notice. Don't | ||||
| 9 | # use it directly. | ||||
| 10 | |||||
| 11 | |||||
| 12 | package charnames; | ||||
| 13 | |||||
| 14 | # This module contains machine-generated tables and code for the | ||||
| 15 | # algorithmically-determinable Unicode character names. The following | ||||
| 16 | # routines can be used to translate between name and code point and vice versa | ||||
| 17 | |||||
| 18 | { # Closure | ||||
| 19 | |||||
| 20 | # Matches legal code point. 4-6 hex numbers, If there are 6, the first | ||||
| 21 | # two must be 10; if there are 5, the first must not be a 0. Written this | ||||
| 22 | # way to decrease backtracking. The first regex allows the code point to | ||||
| 23 | # be at the end of a word, but to work properly, the word shouldn't end | ||||
| 24 | # with a valid hex character. The second one won't match a code point at | ||||
| 25 | # the end of a word, and doesn't have the run-on issue | ||||
| 26 | 2 | 18µs | 1 | 2µs | my $run_on_code_point_re = qr/(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b)/; # spent 2µs making 1 call to charnames::CORE:qr |
| 27 | 1 | 4µs | 1 | 1µs | my $code_point_re = qr/(?^aa:\b(?^aax: (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b))/; # spent 1µs making 1 call to charnames::CORE:qr |
| 28 | |||||
| 29 | # In the following hash, the keys are the bases of names which include | ||||
| 30 | # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The value | ||||
| 31 | # of each key is another hash which is used to get the low and high ends | ||||
| 32 | # for each range of code points that apply to the name. | ||||
| 33 | 1 | 4µs | my %names_ending_in_code_point = ( | ||
| 34 | 'CJK COMPATIBILITY IDEOGRAPH' => | ||||
| 35 | { | ||||
| 36 | 'high' => | ||||
| 37 | [ | ||||
| 38 | 64109, | ||||
| 39 | 64217, | ||||
| 40 | 195101, | ||||
| 41 | ], | ||||
| 42 | 'low' => | ||||
| 43 | [ | ||||
| 44 | 63744, | ||||
| 45 | 64112, | ||||
| 46 | 194560, | ||||
| 47 | ], | ||||
| 48 | }, | ||||
| 49 | 'CJK UNIFIED IDEOGRAPH' => | ||||
| 50 | { | ||||
| 51 | 'high' => | ||||
| 52 | [ | ||||
| 53 | 19893, | ||||
| 54 | 40908, | ||||
| 55 | 173782, | ||||
| 56 | 177972, | ||||
| 57 | 178205, | ||||
| 58 | ], | ||||
| 59 | 'low' => | ||||
| 60 | [ | ||||
| 61 | 13312, | ||||
| 62 | 19968, | ||||
| 63 | 131072, | ||||
| 64 | 173824, | ||||
| 65 | 177984, | ||||
| 66 | ], | ||||
| 67 | }, | ||||
| 68 | |||||
| 69 | ); | ||||
| 70 | |||||
| 71 | # The following hash is a copy of the previous one, except is for loose | ||||
| 72 | # matching, so each name has blanks and dashes squeezed out | ||||
| 73 | 1 | 3µs | my %loose_names_ending_in_code_point = ( | ||
| 74 | 'CJKCOMPATIBILITYIDEOGRAPH' => | ||||
| 75 | { | ||||
| 76 | 'high' => | ||||
| 77 | [ | ||||
| 78 | 64109, | ||||
| 79 | 64217, | ||||
| 80 | 195101, | ||||
| 81 | ], | ||||
| 82 | 'low' => | ||||
| 83 | [ | ||||
| 84 | 63744, | ||||
| 85 | 64112, | ||||
| 86 | 194560, | ||||
| 87 | ], | ||||
| 88 | }, | ||||
| 89 | 'CJKUNIFIEDIDEOGRAPH' => | ||||
| 90 | { | ||||
| 91 | 'high' => | ||||
| 92 | [ | ||||
| 93 | 19893, | ||||
| 94 | 40908, | ||||
| 95 | 173782, | ||||
| 96 | 177972, | ||||
| 97 | 178205, | ||||
| 98 | ], | ||||
| 99 | 'low' => | ||||
| 100 | [ | ||||
| 101 | 13312, | ||||
| 102 | 19968, | ||||
| 103 | 131072, | ||||
| 104 | 173824, | ||||
| 105 | 177984, | ||||
| 106 | ], | ||||
| 107 | }, | ||||
| 108 | |||||
| 109 | ); | ||||
| 110 | |||||
| 111 | # And the following array gives the inverse mapping from code points to | ||||
| 112 | # names. Lowest code points are first | ||||
| 113 | 1 | 6µs | my @code_points_ending_in_code_point = ( | ||
| 114 | |||||
| 115 | { | ||||
| 116 | 'high' => 19893, | ||||
| 117 | 'low' => 13312, | ||||
| 118 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
| 119 | }, | ||||
| 120 | { | ||||
| 121 | 'high' => 40908, | ||||
| 122 | 'low' => 19968, | ||||
| 123 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
| 124 | }, | ||||
| 125 | { | ||||
| 126 | 'high' => 64109, | ||||
| 127 | 'low' => 63744, | ||||
| 128 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
| 129 | }, | ||||
| 130 | { | ||||
| 131 | 'high' => 64217, | ||||
| 132 | 'low' => 64112, | ||||
| 133 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
| 134 | }, | ||||
| 135 | { | ||||
| 136 | 'high' => 173782, | ||||
| 137 | 'low' => 131072, | ||||
| 138 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
| 139 | }, | ||||
| 140 | { | ||||
| 141 | 'high' => 177972, | ||||
| 142 | 'low' => 173824, | ||||
| 143 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
| 144 | }, | ||||
| 145 | { | ||||
| 146 | 'high' => 178205, | ||||
| 147 | 'low' => 177984, | ||||
| 148 | 'name' => 'CJK UNIFIED IDEOGRAPH', | ||||
| 149 | }, | ||||
| 150 | { | ||||
| 151 | 'high' => 195101, | ||||
| 152 | 'low' => 194560, | ||||
| 153 | 'name' => 'CJK COMPATIBILITY IDEOGRAPH', | ||||
| 154 | }, | ||||
| 155 | , | ||||
| 156 | |||||
| 157 | ); | ||||
| 158 | |||||
| 159 | # Convert from code point to Jamo short name for use in composing Hangul | ||||
| 160 | # syllable names | ||||
| 161 | 1 | 41µs | my %Jamo = ( | ||
| 162 | 4352 => 'G', | ||||
| 163 | 4353 => 'GG', | ||||
| 164 | 4354 => 'N', | ||||
| 165 | 4355 => 'D', | ||||
| 166 | 4356 => 'DD', | ||||
| 167 | 4357 => 'R', | ||||
| 168 | 4358 => 'M', | ||||
| 169 | 4359 => 'B', | ||||
| 170 | 4360 => 'BB', | ||||
| 171 | 4361 => 'S', | ||||
| 172 | 4362 => 'SS', | ||||
| 173 | 4363 => '', | ||||
| 174 | 4364 => 'J', | ||||
| 175 | 4365 => 'JJ', | ||||
| 176 | 4366 => 'C', | ||||
| 177 | 4367 => 'K', | ||||
| 178 | 4368 => 'T', | ||||
| 179 | 4369 => 'P', | ||||
| 180 | 4370 => 'H', | ||||
| 181 | 4449 => 'A', | ||||
| 182 | 4450 => 'AE', | ||||
| 183 | 4451 => 'YA', | ||||
| 184 | 4452 => 'YAE', | ||||
| 185 | 4453 => 'EO', | ||||
| 186 | 4454 => 'E', | ||||
| 187 | 4455 => 'YEO', | ||||
| 188 | 4456 => 'YE', | ||||
| 189 | 4457 => 'O', | ||||
| 190 | 4458 => 'WA', | ||||
| 191 | 4459 => 'WAE', | ||||
| 192 | 4460 => 'OE', | ||||
| 193 | 4461 => 'YO', | ||||
| 194 | 4462 => 'U', | ||||
| 195 | 4463 => 'WEO', | ||||
| 196 | 4464 => 'WE', | ||||
| 197 | 4465 => 'WI', | ||||
| 198 | 4466 => 'YU', | ||||
| 199 | 4467 => 'EU', | ||||
| 200 | 4468 => 'YI', | ||||
| 201 | 4469 => 'I', | ||||
| 202 | 4520 => 'G', | ||||
| 203 | 4521 => 'GG', | ||||
| 204 | 4522 => 'GS', | ||||
| 205 | 4523 => 'N', | ||||
| 206 | 4524 => 'NJ', | ||||
| 207 | 4525 => 'NH', | ||||
| 208 | 4526 => 'D', | ||||
| 209 | 4527 => 'L', | ||||
| 210 | 4528 => 'LG', | ||||
| 211 | 4529 => 'LM', | ||||
| 212 | 4530 => 'LB', | ||||
| 213 | 4531 => 'LS', | ||||
| 214 | 4532 => 'LT', | ||||
| 215 | 4533 => 'LP', | ||||
| 216 | 4534 => 'LH', | ||||
| 217 | 4535 => 'M', | ||||
| 218 | 4536 => 'B', | ||||
| 219 | 4537 => 'BS', | ||||
| 220 | 4538 => 'S', | ||||
| 221 | 4539 => 'SS', | ||||
| 222 | 4540 => 'NG', | ||||
| 223 | 4541 => 'J', | ||||
| 224 | 4542 => 'C', | ||||
| 225 | 4543 => 'K', | ||||
| 226 | 4544 => 'T', | ||||
| 227 | 4545 => 'P', | ||||
| 228 | 4546 => 'H', | ||||
| 229 | |||||
| 230 | ); | ||||
| 231 | |||||
| 232 | # Leading consonant (can be null) | ||||
| 233 | 1 | 9µs | my %Jamo_L = ( | ||
| 234 | '' => 11, | ||||
| 235 | 'B' => 7, | ||||
| 236 | 'BB' => 8, | ||||
| 237 | 'C' => 14, | ||||
| 238 | 'D' => 3, | ||||
| 239 | 'DD' => 4, | ||||
| 240 | 'G' => 0, | ||||
| 241 | 'GG' => 1, | ||||
| 242 | 'H' => 18, | ||||
| 243 | 'J' => 12, | ||||
| 244 | 'JJ' => 13, | ||||
| 245 | 'K' => 15, | ||||
| 246 | 'M' => 6, | ||||
| 247 | 'N' => 2, | ||||
| 248 | 'P' => 17, | ||||
| 249 | 'R' => 5, | ||||
| 250 | 'S' => 9, | ||||
| 251 | 'SS' => 10, | ||||
| 252 | 'T' => 16, | ||||
| 253 | |||||
| 254 | ); | ||||
| 255 | |||||
| 256 | # Vowel | ||||
| 257 | 1 | 7µs | my %Jamo_V = ( | ||
| 258 | 'A' => 0, | ||||
| 259 | 'AE' => 1, | ||||
| 260 | 'E' => 5, | ||||
| 261 | 'EO' => 4, | ||||
| 262 | 'EU' => 18, | ||||
| 263 | 'I' => 20, | ||||
| 264 | 'O' => 8, | ||||
| 265 | 'OE' => 11, | ||||
| 266 | 'U' => 13, | ||||
| 267 | 'WA' => 9, | ||||
| 268 | 'WAE' => 10, | ||||
| 269 | 'WE' => 15, | ||||
| 270 | 'WEO' => 14, | ||||
| 271 | 'WI' => 16, | ||||
| 272 | 'YA' => 2, | ||||
| 273 | 'YAE' => 3, | ||||
| 274 | 'YE' => 7, | ||||
| 275 | 'YEO' => 6, | ||||
| 276 | 'YI' => 19, | ||||
| 277 | 'YO' => 12, | ||||
| 278 | 'YU' => 17, | ||||
| 279 | |||||
| 280 | ); | ||||
| 281 | |||||
| 282 | # Optional trailing consonant | ||||
| 283 | 1 | 6µs | my %Jamo_T = ( | ||
| 284 | 'B' => 17, | ||||
| 285 | 'BS' => 18, | ||||
| 286 | 'C' => 23, | ||||
| 287 | 'D' => 7, | ||||
| 288 | 'G' => 1, | ||||
| 289 | 'GG' => 2, | ||||
| 290 | 'GS' => 3, | ||||
| 291 | 'H' => 27, | ||||
| 292 | 'J' => 22, | ||||
| 293 | 'K' => 24, | ||||
| 294 | 'L' => 8, | ||||
| 295 | 'LB' => 11, | ||||
| 296 | 'LG' => 9, | ||||
| 297 | 'LH' => 15, | ||||
| 298 | 'LM' => 10, | ||||
| 299 | 'LP' => 14, | ||||
| 300 | 'LS' => 12, | ||||
| 301 | 'LT' => 13, | ||||
| 302 | 'M' => 16, | ||||
| 303 | 'N' => 4, | ||||
| 304 | 'NG' => 21, | ||||
| 305 | 'NH' => 6, | ||||
| 306 | 'NJ' => 5, | ||||
| 307 | 'P' => 26, | ||||
| 308 | 'S' => 19, | ||||
| 309 | 'SS' => 20, | ||||
| 310 | 'T' => 25, | ||||
| 311 | |||||
| 312 | ); | ||||
| 313 | |||||
| 314 | # Computed re that splits up a Hangul name into LVT or LV syllables | ||||
| 315 | 1 | 5µs | 1 | 1µs | my $syllable_re = qr/(|B|BB|C|D|DD|G|GG|H|J|JJ|K|M|N|P|R|S|SS|T)(A|AE|E|EO|EU|I|O|OE|U|WA|WAE|WE|WEO|WI|YA|YAE|YE|YEO|YI|YO|YU)(B|BS|C|D|G|GG|GS|H|J|K|L|LB|LG|LH|LM|LP|LS|LT|M|N|NG|NH|NJ|P|S|SS|T)?/; # spent 1µs making 1 call to charnames::CORE:qr |
| 316 | |||||
| 317 | 1 | 300ns | my $HANGUL_SYLLABLE = "HANGUL SYLLABLE "; | ||
| 318 | 1 | 100ns | my $loose_HANGUL_SYLLABLE = "HANGULSYLLABLE"; | ||
| 319 | |||||
| 320 | # These constants names and values were taken from the Unicode standard, | ||||
| 321 | # version 5.1, section 3.12. They are used in conjunction with Hangul | ||||
| 322 | # syllables | ||||
| 323 | 1 | 100ns | my $SBase = 0xAC00; | ||
| 324 | 1 | 0s | my $LBase = 0x1100; | ||
| 325 | 1 | 0s | my $VBase = 0x1161; | ||
| 326 | 1 | 0s | my $TBase = 0x11A7; | ||
| 327 | 1 | 0s | my $SCount = 11172; | ||
| 328 | 1 | 100ns | my $LCount = 19; | ||
| 329 | 1 | 0s | my $VCount = 21; | ||
| 330 | 1 | 0s | my $TCount = 28; | ||
| 331 | 1 | 2µs | my $NCount = $VCount * $TCount; | ||
| 332 | |||||
| 333 | sub name_to_code_point_special { | ||||
| 334 | my ($name, $loose) = @_; | ||||
| 335 | |||||
| 336 | # Returns undef if not one of the specially handled names; otherwise | ||||
| 337 | # returns the code point equivalent to the input name | ||||
| 338 | # $loose is non-zero if to use loose matching, 'name' in that case | ||||
| 339 | # must be input as upper case with all blanks and dashes squeezed out. | ||||
| 340 | |||||
| 341 | if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//) | ||||
| 342 | || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//)) | ||||
| 343 | { | ||||
| 344 | return if $name !~ qr/^$syllable_re$/; | ||||
| 345 | my $L = $Jamo_L{$1}; | ||||
| 346 | my $V = $Jamo_V{$2}; | ||||
| 347 | my $T = (defined $3) ? $Jamo_T{$3} : 0; | ||||
| 348 | return ($L * $VCount + $V) * $TCount + $T + $SBase; | ||||
| 349 | } | ||||
| 350 | |||||
| 351 | # Name must end in 'code_point' for this to handle. | ||||
| 352 | return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x) | ||||
| 353 | || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x)); | ||||
| 354 | |||||
| 355 | my $base = $1; | ||||
| 356 | my $code_point = CORE::hex $2; | ||||
| 357 | my $names_ref; | ||||
| 358 | |||||
| 359 | if ($loose) { | ||||
| 360 | $names_ref = \%loose_names_ending_in_code_point; | ||||
| 361 | } | ||||
| 362 | else { | ||||
| 363 | return if $base !~ s/-$//; | ||||
| 364 | $names_ref = \%names_ending_in_code_point; | ||||
| 365 | } | ||||
| 366 | |||||
| 367 | # Name must be one of the ones which has the code point in it. | ||||
| 368 | return if ! $names_ref->{$base}; | ||||
| 369 | |||||
| 370 | # Look through the list of ranges that apply to this name to see if | ||||
| 371 | # the code point is in one of them. | ||||
| 372 | for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) { | ||||
| 373 | return if $names_ref->{$base}{'low'}->[$i] > $code_point; | ||||
| 374 | next if $names_ref->{$base}{'high'}->[$i] < $code_point; | ||||
| 375 | |||||
| 376 | # Here, the code point is in the range. | ||||
| 377 | return $code_point; | ||||
| 378 | } | ||||
| 379 | |||||
| 380 | # Here, looked like the name had a code point number in it, but | ||||
| 381 | # did not match one of the valid ones. | ||||
| 382 | return; | ||||
| 383 | } | ||||
| 384 | |||||
| 385 | sub code_point_to_name_special { | ||||
| 386 | my $code_point = shift; | ||||
| 387 | |||||
| 388 | # Returns the name of a code point if algorithmically determinable; | ||||
| 389 | # undef if not | ||||
| 390 | |||||
| 391 | # If in the Hangul range, calculate the name based on Unicode's | ||||
| 392 | # algorithm | ||||
| 393 | if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) { | ||||
| 394 | 2 | 141µs | 2 | 21µs | # spent 19µs (16+2) within charnames::BEGIN@394 which was called:
# once (16µs+2µs) by charnames::BEGIN@5 at line 394 # spent 19µs making 1 call to charnames::BEGIN@394
# spent 2µs making 1 call to integer::import |
| 395 | my $SIndex = $code_point - $SBase; | ||||
| 396 | my $L = $LBase + $SIndex / $NCount; | ||||
| 397 | my $V = $VBase + ($SIndex % $NCount) / $TCount; | ||||
| 398 | my $T = $TBase + $SIndex % $TCount; | ||||
| 399 | $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}"; | ||||
| 400 | $name .= $Jamo{$T} if $T != $TBase; | ||||
| 401 | return $name; | ||||
| 402 | } | ||||
| 403 | |||||
| 404 | # Look through list of these code points for one in range. | ||||
| 405 | foreach my $hash (@code_points_ending_in_code_point) { | ||||
| 406 | return if $code_point < $hash->{'low'}; | ||||
| 407 | if ($code_point <= $hash->{'high'}) { | ||||
| 408 | return sprintf("%s-%04X", $hash->{'name'}, $code_point); | ||||
| 409 | } | ||||
| 410 | } | ||||
| 411 | return; # None found | ||||
| 412 | } | ||||
| 413 | } # End closure | ||||
| 414 | |||||
| 415 | 1 | 34µs | 1; |