Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/darwin-2level/HTML/Entities.pm |
Statements | Executed 941 statements in 2.40ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
253 | 1 | 1 | 159µs | 159µs | CORE:subst (opcode) | HTML::Entities::
1 | 1 | 1 | 15µs | 34µs | BEGIN@138 | HTML::Entities::
1 | 1 | 1 | 8µs | 66µs | BEGIN@139 | HTML::Entities::
1 | 1 | 1 | 6µs | 33µs | BEGIN@140 | HTML::Entities::
0 | 0 | 0 | 0s | 0s | Version | HTML::Entities::
0 | 0 | 0 | 0s | 0s | encode_entities | HTML::Entities::
0 | 0 | 0 | 0s | 0s | encode_entities_numeric | HTML::Entities::
0 | 0 | 0 | 0s | 0s | num_entity | HTML::Entities::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package HTML::Entities; | ||||
2 | |||||
3 | =encoding utf8 | ||||
4 | |||||
5 | =head1 NAME | ||||
6 | |||||
7 | HTML::Entities - Encode or decode strings with HTML entities | ||||
8 | |||||
9 | =head1 SYNOPSIS | ||||
10 | |||||
11 | use HTML::Entities; | ||||
12 | |||||
13 | $a = "Våre norske tegn bør æres"; | ||||
14 | decode_entities($a); | ||||
15 | encode_entities($a, "\200-\377"); | ||||
16 | |||||
17 | For example, this: | ||||
18 | |||||
19 | $input = "vis-à -vis Beyoncé's naïve\npapier-mâché résumé"; | ||||
20 | print encode_entities($input), "\n" | ||||
21 | |||||
22 | Prints this out: | ||||
23 | |||||
24 | vis-à-vis Beyoncé's naïve | ||||
25 | papier-mâché résumé | ||||
26 | |||||
27 | =head1 DESCRIPTION | ||||
28 | |||||
29 | This module deals with encoding and decoding of strings with HTML | ||||
30 | character entities. The module provides the following functions: | ||||
31 | |||||
32 | =over 4 | ||||
33 | |||||
34 | =item decode_entities( $string, ... ) | ||||
35 | |||||
36 | This routine replaces HTML entities found in the $string with the | ||||
37 | corresponding Unicode character. Unrecognized entities are left alone. | ||||
38 | |||||
39 | If multiple strings are provided as argument they are each decoded | ||||
40 | separately and the same number of strings are returned. | ||||
41 | |||||
42 | If called in void context the arguments are decoded in-place. | ||||
43 | |||||
44 | This routine is exported by default. | ||||
45 | |||||
46 | =item _decode_entities( $string, \%entity2char ) | ||||
47 | |||||
48 | =item _decode_entities( $string, \%entity2char, $expand_prefix ) | ||||
49 | |||||
50 | This will in-place replace HTML entities in $string. The %entity2char | ||||
51 | hash must be provided. Named entities not found in the %entity2char | ||||
52 | hash are left alone. Numeric entities are expanded unless their value | ||||
53 | overflow. | ||||
54 | |||||
55 | The keys in %entity2char are the entity names to be expanded and their | ||||
56 | values are what they should expand into. The values do not have to be | ||||
57 | single character strings. If a key has ";" as suffix, | ||||
58 | then occurrences in $string are only expanded if properly terminated | ||||
59 | with ";". Entities without ";" will be expanded regardless of how | ||||
60 | they are terminated for compatibility with how common browsers treat | ||||
61 | entities in the Latin-1 range. | ||||
62 | |||||
63 | If $expand_prefix is TRUE then entities without trailing ";" in | ||||
64 | %entity2char will even be expanded as a prefix of a longer | ||||
65 | unrecognized name. The longest matching name in %entity2char will be | ||||
66 | used. This is mainly present for compatibility with an MSIE | ||||
67 | misfeature. | ||||
68 | |||||
69 | $string = "foo bar"; | ||||
70 | _decode_entities($string, { nb => "@", nbsp => "\xA0" }, 1); | ||||
71 | print $string; # will print "foo bar" | ||||
72 | |||||
73 | This routine is exported by default. | ||||
74 | |||||
75 | =item encode_entities( $string ) | ||||
76 | |||||
77 | =item encode_entities( $string, $unsafe_chars ) | ||||
78 | |||||
79 | This routine replaces unsafe characters in $string with their entity | ||||
80 | representation. A second argument can be given to specify which characters to | ||||
81 | consider unsafe. The unsafe characters is specified using the regular | ||||
82 | expression character class syntax (what you find within brackets in regular | ||||
83 | expressions). | ||||
84 | |||||
85 | The default set of characters to encode are control chars, high-bit chars, and | ||||
86 | the C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >> characters. But this, | ||||
87 | for example, would encode I<just> the C<< < >>, C<< & >>, C<< > >>, and C<< " | ||||
88 | >> characters: | ||||
89 | |||||
90 | $encoded = encode_entities($input, '<>&"'); | ||||
91 | |||||
92 | and this would only encode non-plain ascii: | ||||
93 | |||||
94 | $encoded = encode_entities($input, '^\n\x20-\x25\x27-\x7e'); | ||||
95 | |||||
96 | This routine is exported by default. | ||||
97 | |||||
98 | =item encode_entities_numeric( $string ) | ||||
99 | |||||
100 | =item encode_entities_numeric( $string, $unsafe_chars ) | ||||
101 | |||||
102 | This routine works just like encode_entities, except that the replacement | ||||
103 | entities are always C<&#xI<hexnum>;> and never C<&I<entname>;>. For | ||||
104 | example, C<encode_entities("r\xF4le")> returns "rôle", but | ||||
105 | C<encode_entities_numeric("r\xF4le")> returns "rôle". | ||||
106 | |||||
107 | This routine is I<not> exported by default. But you can always | ||||
108 | export it with C<use HTML::Entities qw(encode_entities_numeric);> | ||||
109 | or even C<use HTML::Entities qw(:DEFAULT encode_entities_numeric);> | ||||
110 | |||||
111 | =back | ||||
112 | |||||
113 | All these routines modify the string passed as the first argument, if | ||||
114 | called in a void context. In scalar and array contexts, the encoded or | ||||
115 | decoded string is returned (without changing the input string). | ||||
116 | |||||
117 | If you prefer not to import these routines into your namespace, you can | ||||
118 | call them as: | ||||
119 | |||||
120 | use HTML::Entities (); | ||||
121 | $decoded = HTML::Entities::decode($a); | ||||
122 | $encoded = HTML::Entities::encode($a); | ||||
123 | $encoded = HTML::Entities::encode_numeric($a); | ||||
124 | |||||
125 | The module can also export the %char2entity and the %entity2char | ||||
126 | hashes, which contain the mapping from all characters to the | ||||
127 | corresponding entities (and vice versa, respectively). | ||||
128 | |||||
129 | =head1 COPYRIGHT | ||||
130 | |||||
131 | Copyright 1995-2006 Gisle Aas. All rights reserved. | ||||
132 | |||||
133 | This library is free software; you can redistribute it and/or | ||||
134 | modify it under the same terms as Perl itself. | ||||
135 | |||||
136 | =cut | ||||
137 | |||||
138 | 2 | 26µs | 2 | 52µs | # spent 34µs (15+19) within HTML::Entities::BEGIN@138 which was called:
# once (15µs+19µs) by Perl::Tidy::HtmlWriter::BEGIN@2 at line 138 # spent 34µs making 1 call to HTML::Entities::BEGIN@138
# spent 19µs making 1 call to strict::import |
139 | 2 | 24µs | 2 | 123µs | # spent 66µs (8+58) within HTML::Entities::BEGIN@139 which was called:
# once (8µs+58µs) by Perl::Tidy::HtmlWriter::BEGIN@2 at line 139 # spent 66µs making 1 call to HTML::Entities::BEGIN@139
# spent 58µs making 1 call to vars::import |
140 | 2 | 1.03ms | 2 | 60µs | # spent 33µs (6+27) within HTML::Entities::BEGIN@140 which was called:
# once (6µs+27µs) by Perl::Tidy::HtmlWriter::BEGIN@2 at line 140 # spent 33µs making 1 call to HTML::Entities::BEGIN@140
# spent 27µs making 1 call to vars::import |
141 | |||||
142 | 1 | 11µs | require 5.004; | ||
143 | 1 | 700ns | require Exporter; | ||
144 | 1 | 5µs | @ISA = qw(Exporter); | ||
145 | |||||
146 | 1 | 900ns | @EXPORT = qw(encode_entities decode_entities _decode_entities); | ||
147 | 1 | 600ns | @EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric); | ||
148 | |||||
149 | 1 | 200ns | $VERSION = "3.69"; | ||
150 | sub Version { $VERSION; } | ||||
151 | |||||
152 | 1 | 62µs | require HTML::Parser; # for fast XS implemented decode_entities | ||
153 | |||||
154 | |||||
155 | 1 | 105µs | %entity2char = ( | ||
156 | # Some normal chars that have special meaning in SGML context | ||||
157 | amp => '&', # ampersand | ||||
158 | 'gt' => '>', # greater than | ||||
159 | 'lt' => '<', # less than | ||||
160 | quot => '"', # double quote | ||||
161 | apos => "'", # single quote | ||||
162 | |||||
163 | # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML | ||||
164 | AElig => chr(198), # capital AE diphthong (ligature) | ||||
165 | Aacute => chr(193), # capital A, acute accent | ||||
166 | Acirc => chr(194), # capital A, circumflex accent | ||||
167 | Agrave => chr(192), # capital A, grave accent | ||||
168 | Aring => chr(197), # capital A, ring | ||||
169 | Atilde => chr(195), # capital A, tilde | ||||
170 | Auml => chr(196), # capital A, dieresis or umlaut mark | ||||
171 | Ccedil => chr(199), # capital C, cedilla | ||||
172 | ETH => chr(208), # capital Eth, Icelandic | ||||
173 | Eacute => chr(201), # capital E, acute accent | ||||
174 | Ecirc => chr(202), # capital E, circumflex accent | ||||
175 | Egrave => chr(200), # capital E, grave accent | ||||
176 | Euml => chr(203), # capital E, dieresis or umlaut mark | ||||
177 | Iacute => chr(205), # capital I, acute accent | ||||
178 | Icirc => chr(206), # capital I, circumflex accent | ||||
179 | Igrave => chr(204), # capital I, grave accent | ||||
180 | Iuml => chr(207), # capital I, dieresis or umlaut mark | ||||
181 | Ntilde => chr(209), # capital N, tilde | ||||
182 | Oacute => chr(211), # capital O, acute accent | ||||
183 | Ocirc => chr(212), # capital O, circumflex accent | ||||
184 | Ograve => chr(210), # capital O, grave accent | ||||
185 | Oslash => chr(216), # capital O, slash | ||||
186 | Otilde => chr(213), # capital O, tilde | ||||
187 | Ouml => chr(214), # capital O, dieresis or umlaut mark | ||||
188 | THORN => chr(222), # capital THORN, Icelandic | ||||
189 | Uacute => chr(218), # capital U, acute accent | ||||
190 | Ucirc => chr(219), # capital U, circumflex accent | ||||
191 | Ugrave => chr(217), # capital U, grave accent | ||||
192 | Uuml => chr(220), # capital U, dieresis or umlaut mark | ||||
193 | Yacute => chr(221), # capital Y, acute accent | ||||
194 | aacute => chr(225), # small a, acute accent | ||||
195 | acirc => chr(226), # small a, circumflex accent | ||||
196 | aelig => chr(230), # small ae diphthong (ligature) | ||||
197 | agrave => chr(224), # small a, grave accent | ||||
198 | aring => chr(229), # small a, ring | ||||
199 | atilde => chr(227), # small a, tilde | ||||
200 | auml => chr(228), # small a, dieresis or umlaut mark | ||||
201 | ccedil => chr(231), # small c, cedilla | ||||
202 | eacute => chr(233), # small e, acute accent | ||||
203 | ecirc => chr(234), # small e, circumflex accent | ||||
204 | egrave => chr(232), # small e, grave accent | ||||
205 | eth => chr(240), # small eth, Icelandic | ||||
206 | euml => chr(235), # small e, dieresis or umlaut mark | ||||
207 | iacute => chr(237), # small i, acute accent | ||||
208 | icirc => chr(238), # small i, circumflex accent | ||||
209 | igrave => chr(236), # small i, grave accent | ||||
210 | iuml => chr(239), # small i, dieresis or umlaut mark | ||||
211 | ntilde => chr(241), # small n, tilde | ||||
212 | oacute => chr(243), # small o, acute accent | ||||
213 | ocirc => chr(244), # small o, circumflex accent | ||||
214 | ograve => chr(242), # small o, grave accent | ||||
215 | oslash => chr(248), # small o, slash | ||||
216 | otilde => chr(245), # small o, tilde | ||||
217 | ouml => chr(246), # small o, dieresis or umlaut mark | ||||
218 | szlig => chr(223), # small sharp s, German (sz ligature) | ||||
219 | thorn => chr(254), # small thorn, Icelandic | ||||
220 | uacute => chr(250), # small u, acute accent | ||||
221 | ucirc => chr(251), # small u, circumflex accent | ||||
222 | ugrave => chr(249), # small u, grave accent | ||||
223 | uuml => chr(252), # small u, dieresis or umlaut mark | ||||
224 | yacute => chr(253), # small y, acute accent | ||||
225 | yuml => chr(255), # small y, dieresis or umlaut mark | ||||
226 | |||||
227 | # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) | ||||
228 | copy => chr(169), # copyright sign | ||||
229 | reg => chr(174), # registered sign | ||||
230 | nbsp => chr(160), # non breaking space | ||||
231 | |||||
232 | # Additional ISO-8859/1 entities listed in rfc1866 (section 14) | ||||
233 | iexcl => chr(161), | ||||
234 | cent => chr(162), | ||||
235 | pound => chr(163), | ||||
236 | curren => chr(164), | ||||
237 | yen => chr(165), | ||||
238 | brvbar => chr(166), | ||||
239 | sect => chr(167), | ||||
240 | uml => chr(168), | ||||
241 | ordf => chr(170), | ||||
242 | laquo => chr(171), | ||||
243 | 'not' => chr(172), # not is a keyword in perl | ||||
244 | shy => chr(173), | ||||
245 | macr => chr(175), | ||||
246 | deg => chr(176), | ||||
247 | plusmn => chr(177), | ||||
248 | sup1 => chr(185), | ||||
249 | sup2 => chr(178), | ||||
250 | sup3 => chr(179), | ||||
251 | acute => chr(180), | ||||
252 | micro => chr(181), | ||||
253 | para => chr(182), | ||||
254 | middot => chr(183), | ||||
255 | cedil => chr(184), | ||||
256 | ordm => chr(186), | ||||
257 | raquo => chr(187), | ||||
258 | frac14 => chr(188), | ||||
259 | frac12 => chr(189), | ||||
260 | frac34 => chr(190), | ||||
261 | iquest => chr(191), | ||||
262 | 'times' => chr(215), # times is a keyword in perl | ||||
263 | divide => chr(247), | ||||
264 | |||||
265 | ( $] > 5.007 ? ( | ||||
266 | 'OElig;' => chr(338), | ||||
267 | 'oelig;' => chr(339), | ||||
268 | 'Scaron;' => chr(352), | ||||
269 | 'scaron;' => chr(353), | ||||
270 | 'Yuml;' => chr(376), | ||||
271 | 'fnof;' => chr(402), | ||||
272 | 'circ;' => chr(710), | ||||
273 | 'tilde;' => chr(732), | ||||
274 | 'Alpha;' => chr(913), | ||||
275 | 'Beta;' => chr(914), | ||||
276 | 'Gamma;' => chr(915), | ||||
277 | 'Delta;' => chr(916), | ||||
278 | 'Epsilon;' => chr(917), | ||||
279 | 'Zeta;' => chr(918), | ||||
280 | 'Eta;' => chr(919), | ||||
281 | 'Theta;' => chr(920), | ||||
282 | 'Iota;' => chr(921), | ||||
283 | 'Kappa;' => chr(922), | ||||
284 | 'Lambda;' => chr(923), | ||||
285 | 'Mu;' => chr(924), | ||||
286 | 'Nu;' => chr(925), | ||||
287 | 'Xi;' => chr(926), | ||||
288 | 'Omicron;' => chr(927), | ||||
289 | 'Pi;' => chr(928), | ||||
290 | 'Rho;' => chr(929), | ||||
291 | 'Sigma;' => chr(931), | ||||
292 | 'Tau;' => chr(932), | ||||
293 | 'Upsilon;' => chr(933), | ||||
294 | 'Phi;' => chr(934), | ||||
295 | 'Chi;' => chr(935), | ||||
296 | 'Psi;' => chr(936), | ||||
297 | 'Omega;' => chr(937), | ||||
298 | 'alpha;' => chr(945), | ||||
299 | 'beta;' => chr(946), | ||||
300 | 'gamma;' => chr(947), | ||||
301 | 'delta;' => chr(948), | ||||
302 | 'epsilon;' => chr(949), | ||||
303 | 'zeta;' => chr(950), | ||||
304 | 'eta;' => chr(951), | ||||
305 | 'theta;' => chr(952), | ||||
306 | 'iota;' => chr(953), | ||||
307 | 'kappa;' => chr(954), | ||||
308 | 'lambda;' => chr(955), | ||||
309 | 'mu;' => chr(956), | ||||
310 | 'nu;' => chr(957), | ||||
311 | 'xi;' => chr(958), | ||||
312 | 'omicron;' => chr(959), | ||||
313 | 'pi;' => chr(960), | ||||
314 | 'rho;' => chr(961), | ||||
315 | 'sigmaf;' => chr(962), | ||||
316 | 'sigma;' => chr(963), | ||||
317 | 'tau;' => chr(964), | ||||
318 | 'upsilon;' => chr(965), | ||||
319 | 'phi;' => chr(966), | ||||
320 | 'chi;' => chr(967), | ||||
321 | 'psi;' => chr(968), | ||||
322 | 'omega;' => chr(969), | ||||
323 | 'thetasym;' => chr(977), | ||||
324 | 'upsih;' => chr(978), | ||||
325 | 'piv;' => chr(982), | ||||
326 | 'ensp;' => chr(8194), | ||||
327 | 'emsp;' => chr(8195), | ||||
328 | 'thinsp;' => chr(8201), | ||||
329 | 'zwnj;' => chr(8204), | ||||
330 | 'zwj;' => chr(8205), | ||||
331 | 'lrm;' => chr(8206), | ||||
332 | 'rlm;' => chr(8207), | ||||
333 | 'ndash;' => chr(8211), | ||||
334 | 'mdash;' => chr(8212), | ||||
335 | 'lsquo;' => chr(8216), | ||||
336 | 'rsquo;' => chr(8217), | ||||
337 | 'sbquo;' => chr(8218), | ||||
338 | 'ldquo;' => chr(8220), | ||||
339 | 'rdquo;' => chr(8221), | ||||
340 | 'bdquo;' => chr(8222), | ||||
341 | 'dagger;' => chr(8224), | ||||
342 | 'Dagger;' => chr(8225), | ||||
343 | 'bull;' => chr(8226), | ||||
344 | 'hellip;' => chr(8230), | ||||
345 | 'permil;' => chr(8240), | ||||
346 | 'prime;' => chr(8242), | ||||
347 | 'Prime;' => chr(8243), | ||||
348 | 'lsaquo;' => chr(8249), | ||||
349 | 'rsaquo;' => chr(8250), | ||||
350 | 'oline;' => chr(8254), | ||||
351 | 'frasl;' => chr(8260), | ||||
352 | 'euro;' => chr(8364), | ||||
353 | 'image;' => chr(8465), | ||||
354 | 'weierp;' => chr(8472), | ||||
355 | 'real;' => chr(8476), | ||||
356 | 'trade;' => chr(8482), | ||||
357 | 'alefsym;' => chr(8501), | ||||
358 | 'larr;' => chr(8592), | ||||
359 | 'uarr;' => chr(8593), | ||||
360 | 'rarr;' => chr(8594), | ||||
361 | 'darr;' => chr(8595), | ||||
362 | 'harr;' => chr(8596), | ||||
363 | 'crarr;' => chr(8629), | ||||
364 | 'lArr;' => chr(8656), | ||||
365 | 'uArr;' => chr(8657), | ||||
366 | 'rArr;' => chr(8658), | ||||
367 | 'dArr;' => chr(8659), | ||||
368 | 'hArr;' => chr(8660), | ||||
369 | 'forall;' => chr(8704), | ||||
370 | 'part;' => chr(8706), | ||||
371 | 'exist;' => chr(8707), | ||||
372 | 'empty;' => chr(8709), | ||||
373 | 'nabla;' => chr(8711), | ||||
374 | 'isin;' => chr(8712), | ||||
375 | 'notin;' => chr(8713), | ||||
376 | 'ni;' => chr(8715), | ||||
377 | 'prod;' => chr(8719), | ||||
378 | 'sum;' => chr(8721), | ||||
379 | 'minus;' => chr(8722), | ||||
380 | 'lowast;' => chr(8727), | ||||
381 | 'radic;' => chr(8730), | ||||
382 | 'prop;' => chr(8733), | ||||
383 | 'infin;' => chr(8734), | ||||
384 | 'ang;' => chr(8736), | ||||
385 | 'and;' => chr(8743), | ||||
386 | 'or;' => chr(8744), | ||||
387 | 'cap;' => chr(8745), | ||||
388 | 'cup;' => chr(8746), | ||||
389 | 'int;' => chr(8747), | ||||
390 | 'there4;' => chr(8756), | ||||
391 | 'sim;' => chr(8764), | ||||
392 | 'cong;' => chr(8773), | ||||
393 | 'asymp;' => chr(8776), | ||||
394 | 'ne;' => chr(8800), | ||||
395 | 'equiv;' => chr(8801), | ||||
396 | 'le;' => chr(8804), | ||||
397 | 'ge;' => chr(8805), | ||||
398 | 'sub;' => chr(8834), | ||||
399 | 'sup;' => chr(8835), | ||||
400 | 'nsub;' => chr(8836), | ||||
401 | 'sube;' => chr(8838), | ||||
402 | 'supe;' => chr(8839), | ||||
403 | 'oplus;' => chr(8853), | ||||
404 | 'otimes;' => chr(8855), | ||||
405 | 'perp;' => chr(8869), | ||||
406 | 'sdot;' => chr(8901), | ||||
407 | 'lceil;' => chr(8968), | ||||
408 | 'rceil;' => chr(8969), | ||||
409 | 'lfloor;' => chr(8970), | ||||
410 | 'rfloor;' => chr(8971), | ||||
411 | 'lang;' => chr(9001), | ||||
412 | 'rang;' => chr(9002), | ||||
413 | 'loz;' => chr(9674), | ||||
414 | 'spades;' => chr(9824), | ||||
415 | 'clubs;' => chr(9827), | ||||
416 | 'hearts;' => chr(9829), | ||||
417 | 'diams;' => chr(9830), | ||||
418 | ) : ()) | ||||
419 | ); | ||||
420 | |||||
421 | |||||
422 | # Make the opposite mapping | ||||
423 | 1 | 175µs | while (my($entity, $char) = each(%entity2char)) { | ||
424 | 253 | 517µs | 253 | 159µs | $entity =~ s/;\z//; # spent 159µs making 253 calls to HTML::Entities::CORE:subst, avg 630ns/call |
425 | 253 | 264µs | $char2entity{$char} = "&$entity;"; | ||
426 | } | ||||
427 | 1 | 600ns | delete $char2entity{"'"}; # only one-way decoding | ||
428 | |||||
429 | # Fill in missing entities | ||||
430 | 1 | 2µs | for (0 .. 255) { | ||
431 | 256 | 28µs | next if exists $char2entity{chr($_)}; | ||
432 | 156 | 85µs | $char2entity{chr($_)} = "&#$_;"; | ||
433 | } | ||||
434 | |||||
435 | 1 | 100ns | my %subst; # compiled encoding regexps | ||
436 | |||||
437 | sub encode_entities | ||||
438 | { | ||||
439 | return undef unless defined $_[0]; | ||||
440 | my $ref; | ||||
441 | if (defined wantarray) { | ||||
442 | my $x = $_[0]; | ||||
443 | $ref = \$x; # copy | ||||
444 | } else { | ||||
445 | $ref = \$_[0]; # modify in-place | ||||
446 | } | ||||
447 | if (defined $_[1] and length $_[1]) { | ||||
448 | unless (exists $subst{$_[1]}) { | ||||
449 | # Because we can't compile regex we fake it with a cached sub | ||||
450 | my $chars = $_[1]; | ||||
451 | $chars =~ s,(?<!\\)([]/]),\\$1,g; | ||||
452 | $chars =~ s,(?<!\\)\\\z,\\\\,; | ||||
453 | my $code = "sub {\$_[0] =~ s/([$chars])/\$char2entity{\$1} || num_entity(\$1)/ge; }"; | ||||
454 | $subst{$_[1]} = eval $code; | ||||
455 | die( $@ . " while trying to turn range: \"$_[1]\"\n " | ||||
456 | . "into code: $code\n " | ||||
457 | ) if $@; | ||||
458 | } | ||||
459 | &{$subst{$_[1]}}($$ref); | ||||
460 | } else { | ||||
461 | # Encode control chars, high bit chars and '<', '&', '>', ''' and '"' | ||||
462 | $$ref =~ s/([^\n\r\t !\#\$%\(-;=?-~])/$char2entity{$1} || num_entity($1)/ge; | ||||
463 | } | ||||
464 | $$ref; | ||||
465 | } | ||||
466 | |||||
467 | sub encode_entities_numeric { | ||||
468 | local %char2entity; | ||||
469 | return &encode_entities; # a goto &encode_entities wouldn't work | ||||
470 | } | ||||
471 | |||||
472 | |||||
473 | sub num_entity { | ||||
474 | sprintf "&#x%X;", ord($_[0]); | ||||
475 | } | ||||
476 | |||||
477 | # Set up aliases | ||||
478 | 1 | 800ns | *encode = \&encode_entities; | ||
479 | 1 | 200ns | *encode_numeric = \&encode_entities_numeric; | ||
480 | 1 | 200ns | *encode_numerically = \&encode_entities_numeric; | ||
481 | 1 | 200ns | *decode = \&decode_entities; | ||
482 | |||||
483 | 1 | 60µs | 1; | ||
# spent 159µs within HTML::Entities::CORE:subst which was called 253 times, avg 630ns/call:
# 253 times (159µs+0s) by Perl::Tidy::HtmlWriter::BEGIN@2 at line 424, avg 630ns/call |