Profile of unicore/Name.pm

Filename	/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/5.18.2/unicore/Name.pm
Statements	Executed 25 statements in 278µs

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	16µs	19µs	charnames::::BEGIN@394charnames::BEGIN@394
0	0	0	0s	0s	charnames::::code_point_to_name_specialcharnames::code_point_to_name_special
0	0	0	0s	0s	charnames::::name_to_code_point_specialcharnames::name_to_code_point_special

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
2					# This file is machine-generated by lib/unicore/mktables from the Unicode
3					# database, Version 6.2.0. Any changes made here will be lost!
4
5
6					# !!!!!!! INTERNAL PERL USE ONLY !!!!!!!
7					# This file is for internal use by core Perl only. The format and even the
8					# name or existence of this file are subject to change without notice. Don't
9					# use it directly.
10
11
12					package charnames;
13
14					# This module contains machine-generated tables and code for the
15					# algorithmically-determinable Unicode character names. The following
16					# routines can be used to translate between name and code point and vice versa
17
18					{ # Closure
19
20					# Matches legal code point. 4-6 hex numbers, If there are 6, the first
21					# two must be 10; if there are 5, the first must not be a 0. Written this
22					# way to decrease backtracking. The first regex allows the code point to
23					# be at the end of a word, but to work properly, the word shouldn't end
24					# with a valid hex character. The second one won't match a code point at
25					# the end of a word, and doesn't have the run-on issue
26	2	18µs	1	2µs	my $run_on_code_point_re = qr/(?^aax: (?: 10[0-9A-F]{4} \| [1-9A-F][0-9A-F]{4} \| [0-9A-F]{4} ) \b)/; # spent 2µs making 1 call to charnames::CORE:qr
27	1	4µs	1	1µs	my $code_point_re = qr/(?^aa:\b(?^aax: (?: 10[0-9A-F]{4} \| [1-9A-F][0-9A-F]{4} \| [0-9A-F]{4} ) \b))/; # spent 1µs making 1 call to charnames::CORE:qr
28
29					# In the following hash, the keys are the bases of names which include
30					# the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01. The value
31					# of each key is another hash which is used to get the low and high ends
32					# for each range of code points that apply to the name.
33	1	4µs			my %names_ending_in_code_point = (
34					'CJK COMPATIBILITY IDEOGRAPH' =>
35					{
36					'high' =>
37					[
38					64109,
39					64217,
40					195101,
41					],
42					'low' =>
43					[
44					63744,
45					64112,
46					194560,
47					],
48					},
49					'CJK UNIFIED IDEOGRAPH' =>
50					{
51					'high' =>
52					[
53					19893,
54					40908,
55					173782,
56					177972,
57					178205,
58					],
59					'low' =>
60					[
61					13312,
62					19968,
63					131072,
64					173824,
65					177984,
66					],
67					},
68
69					);
70
71					# The following hash is a copy of the previous one, except is for loose
72					# matching, so each name has blanks and dashes squeezed out
73	1	3µs			my %loose_names_ending_in_code_point = (
74					'CJKCOMPATIBILITYIDEOGRAPH' =>
75					{
76					'high' =>
77					[
78					64109,
79					64217,
80					195101,
81					],
82					'low' =>
83					[
84					63744,
85					64112,
86					194560,
87					],
88					},
89					'CJKUNIFIEDIDEOGRAPH' =>
90					{
91					'high' =>
92					[
93					19893,
94					40908,
95					173782,
96					177972,
97					178205,
98					],
99					'low' =>
100					[
101					13312,
102					19968,
103					131072,
104					173824,
105					177984,
106					],
107					},
108
109					);
110
111					# And the following array gives the inverse mapping from code points to
112					# names. Lowest code points are first
113	1	6µs			my @code_points_ending_in_code_point = (
114
115					{
116					'high' => 19893,
117					'low' => 13312,
118					'name' => 'CJK UNIFIED IDEOGRAPH',
119					},
120					{
121					'high' => 40908,
122					'low' => 19968,
123					'name' => 'CJK UNIFIED IDEOGRAPH',
124					},
125					{
126					'high' => 64109,
127					'low' => 63744,
128					'name' => 'CJK COMPATIBILITY IDEOGRAPH',
129					},
130					{
131					'high' => 64217,
132					'low' => 64112,
133					'name' => 'CJK COMPATIBILITY IDEOGRAPH',
134					},
135					{
136					'high' => 173782,
137					'low' => 131072,
138					'name' => 'CJK UNIFIED IDEOGRAPH',
139					},
140					{
141					'high' => 177972,
142					'low' => 173824,
143					'name' => 'CJK UNIFIED IDEOGRAPH',
144					},
145					{
146					'high' => 178205,
147					'low' => 177984,
148					'name' => 'CJK UNIFIED IDEOGRAPH',
149					},
150					{
151					'high' => 195101,
152					'low' => 194560,
153					'name' => 'CJK COMPATIBILITY IDEOGRAPH',
154					},
155					,
156
157					);
158
159					# Convert from code point to Jamo short name for use in composing Hangul
160					# syllable names
161	1	41µs			my %Jamo = (
162					4352 => 'G',
163					4353 => 'GG',
164					4354 => 'N',
165					4355 => 'D',
166					4356 => 'DD',
167					4357 => 'R',
168					4358 => 'M',
169					4359 => 'B',
170					4360 => 'BB',
171					4361 => 'S',
172					4362 => 'SS',
173					4363 => '',
174					4364 => 'J',
175					4365 => 'JJ',
176					4366 => 'C',
177					4367 => 'K',
178					4368 => 'T',
179					4369 => 'P',
180					4370 => 'H',
181					4449 => 'A',
182					4450 => 'AE',
183					4451 => 'YA',
184					4452 => 'YAE',
185					4453 => 'EO',
186					4454 => 'E',
187					4455 => 'YEO',
188					4456 => 'YE',
189					4457 => 'O',
190					4458 => 'WA',
191					4459 => 'WAE',
192					4460 => 'OE',
193					4461 => 'YO',
194					4462 => 'U',
195					4463 => 'WEO',
196					4464 => 'WE',
197					4465 => 'WI',
198					4466 => 'YU',
199					4467 => 'EU',
200					4468 => 'YI',
201					4469 => 'I',
202					4520 => 'G',
203					4521 => 'GG',
204					4522 => 'GS',
205					4523 => 'N',
206					4524 => 'NJ',
207					4525 => 'NH',
208					4526 => 'D',
209					4527 => 'L',
210					4528 => 'LG',
211					4529 => 'LM',
212					4530 => 'LB',
213					4531 => 'LS',
214					4532 => 'LT',
215					4533 => 'LP',
216					4534 => 'LH',
217					4535 => 'M',
218					4536 => 'B',
219					4537 => 'BS',
220					4538 => 'S',
221					4539 => 'SS',
222					4540 => 'NG',
223					4541 => 'J',
224					4542 => 'C',
225					4543 => 'K',
226					4544 => 'T',
227					4545 => 'P',
228					4546 => 'H',
229
230					);
231
232					# Leading consonant (can be null)
233	1	9µs			my %Jamo_L = (
234					'' => 11,
235					'B' => 7,
236					'BB' => 8,
237					'C' => 14,
238					'D' => 3,
239					'DD' => 4,
240					'G' => 0,
241					'GG' => 1,
242					'H' => 18,
243					'J' => 12,
244					'JJ' => 13,
245					'K' => 15,
246					'M' => 6,
247					'N' => 2,
248					'P' => 17,
249					'R' => 5,
250					'S' => 9,
251					'SS' => 10,
252					'T' => 16,
253
254					);
255
256					# Vowel
257	1	7µs			my %Jamo_V = (
258					'A' => 0,
259					'AE' => 1,
260					'E' => 5,
261					'EO' => 4,
262					'EU' => 18,
263					'I' => 20,
264					'O' => 8,
265					'OE' => 11,
266					'U' => 13,
267					'WA' => 9,
268					'WAE' => 10,
269					'WE' => 15,
270					'WEO' => 14,
271					'WI' => 16,
272					'YA' => 2,
273					'YAE' => 3,
274					'YE' => 7,
275					'YEO' => 6,
276					'YI' => 19,
277					'YO' => 12,
278					'YU' => 17,
279
280					);
281
282					# Optional trailing consonant
283	1	6µs			my %Jamo_T = (
284					'B' => 17,
285					'BS' => 18,
286					'C' => 23,
287					'D' => 7,
288					'G' => 1,
289					'GG' => 2,
290					'GS' => 3,
291					'H' => 27,
292					'J' => 22,
293					'K' => 24,
294					'L' => 8,
295					'LB' => 11,
296					'LG' => 9,
297					'LH' => 15,
298					'LM' => 10,
299					'LP' => 14,
300					'LS' => 12,
301					'LT' => 13,
302					'M' => 16,
303					'N' => 4,
304					'NG' => 21,
305					'NH' => 6,
306					'NJ' => 5,
307					'P' => 26,
308					'S' => 19,
309					'SS' => 20,
310					'T' => 25,
311
312					);
313
314					# Computed re that splits up a Hangul name into LVT or LV syllables
315	1	5µs	1	1µs	my $syllable_re = qr/(\|B\|BB\|C\|D\|DD\|G\|GG\|H\|J\|JJ\|K\|M\|N\|P\|R\|S\|SS\|T)(A\|AE\|E\|EO\|EU\|I\|O\|OE\|U\|WA\|WAE\|WE\|WEO\|WI\|YA\|YAE\|YE\|YEO\|YI\|YO\|YU)(B\|BS\|C\|D\|G\|GG\|GS\|H\|J\|K\|L\|LB\|LG\|LH\|LM\|LP\|LS\|LT\|M\|N\|NG\|NH\|NJ\|P\|S\|SS\|T)?/; # spent 1µs making 1 call to charnames::CORE:qr
316
317	1	300ns			my $HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
318	1	100ns			my $loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
319
320					# These constants names and values were taken from the Unicode standard,
321					# version 5.1, section 3.12. They are used in conjunction with Hangul
322					# syllables
323	1	100ns			my $SBase = 0xAC00;
324	1	0s			my $LBase = 0x1100;
325	1	0s			my $VBase = 0x1161;
326	1	0s			my $TBase = 0x11A7;
327	1	0s			my $SCount = 11172;
328	1	100ns			my $LCount = 19;
329	1	0s			my $VCount = 21;
330	1	0s			my $TCount = 28;
331	1	2µs			my $NCount = $VCount * $TCount;
332
333					sub name_to_code_point_special {
334					my ($name, $loose) = @_;
335
336					# Returns undef if not one of the specially handled names; otherwise
337					# returns the code point equivalent to the input name
338					# $loose is non-zero if to use loose matching, 'name' in that case
339					# must be input as upper case with all blanks and dashes squeezed out.
340
341					if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
342					\|\| ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
343					{
344					return if $name !~ qr/^$syllable_re$/;
345					my $L = $Jamo_L{$1};
346					my $V = $Jamo_V{$2};
347					my $T = (defined $3) ? $Jamo_T{$3} : 0;
348					return ($L * $VCount + $V) * $TCount + $T + $SBase;
349					}
350
351					# Name must end in 'code_point' for this to handle.
352					return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
353					\|\| (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
354
355					my $base = $1;
356					my $code_point = CORE::hex $2;
357					my $names_ref;
358
359					if ($loose) {
360					$names_ref = \%loose_names_ending_in_code_point;
361					}
362					else {
363					return if $base !~ s/-$//;
364					$names_ref = \%names_ending_in_code_point;
365					}
366
367					# Name must be one of the ones which has the code point in it.
368					return if ! $names_ref->{$base};
369
370					# Look through the list of ranges that apply to this name to see if
371					# the code point is in one of them.
372					for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
373					return if $names_ref->{$base}{'low'}->[$i] > $code_point;
374					next if $names_ref->{$base}{'high'}->[$i] < $code_point;
375
376					# Here, the code point is in the range.
377					return $code_point;
378					}
379
380					# Here, looked like the name had a code point number in it, but
381					# did not match one of the valid ones.
382					return;
383					}
384
385					sub code_point_to_name_special {
386					my $code_point = shift;
387
388					# Returns the name of a code point if algorithmically determinable;
389					# undef if not
390
391					# If in the Hangul range, calculate the name based on Unicode's
392					# algorithm
393					if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
394	2	141µs	2	21µs	# spent 19µs (16+2) within charnames::BEGIN@394 which was called: # once (16µs+2µs) by charnames::BEGIN@5 at line 394 use integer; # spent 19µs making 1 call to charnames::BEGIN@394 # spent 2µs making 1 call to integer::import
395					my $SIndex = $code_point - $SBase;
396					my $L = $LBase + $SIndex / $NCount;
397					my $V = $VBase + ($SIndex % $NCount) / $TCount;
398					my $T = $TBase + $SIndex % $TCount;
399					$name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
400					$name .= $Jamo{$T} if $T != $TBase;
401					return $name;
402					}
403
404					# Look through list of these code points for one in range.
405					foreach my $hash (@code_points_ending_in_code_point) {
406					return if $code_point < $hash->{'low'};
407					if ($code_point <= $hash->{'high'}) {
408					return sprintf("%s-%04X", $hash->{'name'}, $code_point);
409					}
410					}
411					return; # None found
412					}
413					} # End closure
414
415	1	34µs			1;