1: <?php
2: /**
3: * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
4: * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
5: *
6: * Licensed under The MIT License
7: * For full copyright and license information, please see the LICENSE.txt
8: * Redistributions of files must retain the above copyright notice.
9: *
10: * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
11: * @link https://cakephp.org CakePHP(tm) Project
12: * @since 1.2.0
13: * @license https://opensource.org/licenses/mit-license.php MIT License
14: */
15: namespace Cake\Utility;
16:
17: use InvalidArgumentException;
18:
19: /**
20: * Text handling methods.
21: */
22: class Text
23: {
24:
25: /**
26: * Default transliterator.
27: *
28: * @var \Transliterator Transliterator instance.
29: */
30: protected static $_defaultTransliterator;
31:
32: /**
33: * Default transliterator id string.
34: *
35: * @var string $_defaultTransliteratorId Transliterator identifier string.
36: */
37: protected static $_defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
38:
39: /**
40: * Default html tags who must not be count for truncate text.
41: *
42: * @var array
43: */
44: protected static $_defaultHtmlNoCount = [
45: 'style',
46: 'script'
47: ];
48:
49: /**
50: * Generate a random UUID version 4
51: *
52: * Warning: This method should not be used as a random seed for any cryptographic operations.
53: * Instead you should use the openssl or mcrypt extensions.
54: *
55: * It should also not be used to create identifiers that have security implications, such as
56: * 'unguessable' URL identifiers. Instead you should use `Security::randomBytes()` for that.
57: *
58: * @see https://www.ietf.org/rfc/rfc4122.txt
59: * @return string RFC 4122 UUID
60: * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
61: */
62: public static function uuid()
63: {
64: $random = function_exists('random_int') ? 'random_int' : 'mt_rand';
65:
66: return sprintf(
67: '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
68: // 32 bits for "time_low"
69: $random(0, 65535),
70: $random(0, 65535),
71: // 16 bits for "time_mid"
72: $random(0, 65535),
73: // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
74: $random(0, 4095) | 0x4000,
75: // 16 bits, 8 bits for "clk_seq_hi_res",
76: // 8 bits for "clk_seq_low",
77: // two most significant bits holds zero and one for variant DCE1.1
78: $random(0, 0x3fff) | 0x8000,
79: // 48 bits for "node"
80: $random(0, 65535),
81: $random(0, 65535),
82: $random(0, 65535)
83: );
84: }
85:
86: /**
87: * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
88: * $leftBound and $rightBound.
89: *
90: * @param string $data The data to tokenize.
91: * @param string $separator The token to split the data on.
92: * @param string $leftBound The left boundary to ignore separators in.
93: * @param string $rightBound The right boundary to ignore separators in.
94: * @return array|string Array of tokens in $data or original input if empty.
95: */
96: public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
97: {
98: if (empty($data)) {
99: return [];
100: }
101:
102: $depth = 0;
103: $offset = 0;
104: $buffer = '';
105: $results = [];
106: $length = mb_strlen($data);
107: $open = false;
108:
109: while ($offset <= $length) {
110: $tmpOffset = -1;
111: $offsets = [
112: mb_strpos($data, $separator, $offset),
113: mb_strpos($data, $leftBound, $offset),
114: mb_strpos($data, $rightBound, $offset)
115: ];
116: for ($i = 0; $i < 3; $i++) {
117: if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
118: $tmpOffset = $offsets[$i];
119: }
120: }
121: if ($tmpOffset !== -1) {
122: $buffer .= mb_substr($data, $offset, $tmpOffset - $offset);
123: $char = mb_substr($data, $tmpOffset, 1);
124: if (!$depth && $char === $separator) {
125: $results[] = $buffer;
126: $buffer = '';
127: } else {
128: $buffer .= $char;
129: }
130: if ($leftBound !== $rightBound) {
131: if ($char === $leftBound) {
132: $depth++;
133: }
134: if ($char === $rightBound) {
135: $depth--;
136: }
137: } else {
138: if ($char === $leftBound) {
139: if (!$open) {
140: $depth++;
141: $open = true;
142: } else {
143: $depth--;
144: $open = false;
145: }
146: }
147: }
148: $tmpOffset += 1;
149: $offset = $tmpOffset;
150: } else {
151: $results[] = $buffer . mb_substr($data, $offset);
152: $offset = $length + 1;
153: }
154: }
155: if (empty($results) && !empty($buffer)) {
156: $results[] = $buffer;
157: }
158:
159: if (!empty($results)) {
160: return array_map('trim', $results);
161: }
162:
163: return [];
164: }
165:
166: /**
167: * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
168: * corresponds to a variable placeholder name in $str.
169: * Example:
170: * ```
171: * Text::insert(':name is :age years old.', ['name' => 'Bob', 'age' => '65']);
172: * ```
173: * Returns: Bob is 65 years old.
174: *
175: * Available $options are:
176: *
177: * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
178: * - after: The character or string after the name of the variable placeholder (Defaults to null)
179: * - escape: The character or string used to escape the before character / string (Defaults to `\`)
180: * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
181: * (Overwrites before, after, breaks escape / clean)
182: * - clean: A boolean or array with instructions for Text::cleanInsert
183: *
184: * @param string $str A string containing variable placeholders
185: * @param array $data A key => val array where each key stands for a placeholder variable name
186: * to be replaced with val
187: * @param array $options An array of options, see description above
188: * @return string
189: */
190: public static function insert($str, $data, array $options = [])
191: {
192: $defaults = [
193: 'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
194: ];
195: $options += $defaults;
196: $format = $options['format'];
197: $data = (array)$data;
198: if (empty($data)) {
199: return $options['clean'] ? static::cleanInsert($str, $options) : $str;
200: }
201:
202: if (!isset($format)) {
203: $format = sprintf(
204: '/(?<!%s)%s%%s%s/',
205: preg_quote($options['escape'], '/'),
206: str_replace('%', '%%', preg_quote($options['before'], '/')),
207: str_replace('%', '%%', preg_quote($options['after'], '/'))
208: );
209: }
210:
211: if (strpos($str, '?') !== false && is_numeric(key($data))) {
212: $offset = 0;
213: while (($pos = strpos($str, '?', $offset)) !== false) {
214: $val = array_shift($data);
215: $offset = $pos + strlen($val);
216: $str = substr_replace($str, $val, $pos, 1);
217: }
218:
219: return $options['clean'] ? static::cleanInsert($str, $options) : $str;
220: }
221:
222: $dataKeys = array_keys($data);
223: $hashKeys = array_map('crc32', $dataKeys);
224: $tempData = array_combine($dataKeys, $hashKeys);
225: krsort($tempData);
226:
227: foreach ($tempData as $key => $hashVal) {
228: $key = sprintf($format, preg_quote($key, '/'));
229: $str = preg_replace($key, $hashVal, $str);
230: }
231: $dataReplacements = array_combine($hashKeys, array_values($data));
232: foreach ($dataReplacements as $tmpHash => $tmpValue) {
233: $tmpValue = is_array($tmpValue) ? '' : $tmpValue;
234: $str = str_replace($tmpHash, $tmpValue, $str);
235: }
236:
237: if (!isset($options['format']) && isset($options['before'])) {
238: $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
239: }
240:
241: return $options['clean'] ? static::cleanInsert($str, $options) : $str;
242: }
243:
244: /**
245: * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
246: * $options. The default method used is text but html is also available. The goal of this function
247: * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
248: * by Text::insert().
249: *
250: * @param string $str String to clean.
251: * @param array $options Options list.
252: * @return string
253: * @see \Cake\Utility\Text::insert()
254: */
255: public static function cleanInsert($str, array $options)
256: {
257: $clean = $options['clean'];
258: if (!$clean) {
259: return $str;
260: }
261: if ($clean === true) {
262: $clean = ['method' => 'text'];
263: }
264: if (!is_array($clean)) {
265: $clean = ['method' => $options['clean']];
266: }
267: switch ($clean['method']) {
268: case 'html':
269: $clean += [
270: 'word' => '[\w,.]+',
271: 'andText' => true,
272: 'replacement' => '',
273: ];
274: $kleenex = sprintf(
275: '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
276: preg_quote($options['before'], '/'),
277: $clean['word'],
278: preg_quote($options['after'], '/')
279: );
280: $str = preg_replace($kleenex, $clean['replacement'], $str);
281: if ($clean['andText']) {
282: $options['clean'] = ['method' => 'text'];
283: $str = static::cleanInsert($str, $options);
284: }
285: break;
286: case 'text':
287: $clean += [
288: 'word' => '[\w,.]+',
289: 'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
290: 'replacement' => '',
291: ];
292:
293: $kleenex = sprintf(
294: '/(%s%s%s%s|%s%s%s%s)/',
295: preg_quote($options['before'], '/'),
296: $clean['word'],
297: preg_quote($options['after'], '/'),
298: $clean['gap'],
299: $clean['gap'],
300: preg_quote($options['before'], '/'),
301: $clean['word'],
302: preg_quote($options['after'], '/')
303: );
304: $str = preg_replace($kleenex, $clean['replacement'], $str);
305: break;
306: }
307:
308: return $str;
309: }
310:
311: /**
312: * Wraps text to a specific width, can optionally wrap at word breaks.
313: *
314: * ### Options
315: *
316: * - `width` The width to wrap to. Defaults to 72.
317: * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
318: * - `indent` String to indent with. Defaults to null.
319: * - `indentAt` 0 based index to start indenting at. Defaults to 0.
320: *
321: * @param string $text The text to format.
322: * @param array|int $options Array of options to use, or an integer to wrap the text to.
323: * @return string Formatted text.
324: */
325: public static function wrap($text, $options = [])
326: {
327: if (is_numeric($options)) {
328: $options = ['width' => $options];
329: }
330: $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
331: if ($options['wordWrap']) {
332: $wrapped = self::wordWrap($text, $options['width'], "\n");
333: } else {
334: $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
335: }
336: if (!empty($options['indent'])) {
337: $chunks = explode("\n", $wrapped);
338: for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
339: $chunks[$i] = $options['indent'] . $chunks[$i];
340: }
341: $wrapped = implode("\n", $chunks);
342: }
343:
344: return $wrapped;
345: }
346:
347: /**
348: * Wraps a complete block of text to a specific width, can optionally wrap
349: * at word breaks.
350: *
351: * ### Options
352: *
353: * - `width` The width to wrap to. Defaults to 72.
354: * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
355: * - `indent` String to indent with. Defaults to null.
356: * - `indentAt` 0 based index to start indenting at. Defaults to 0.
357: *
358: * @param string $text The text to format.
359: * @param array|int $options Array of options to use, or an integer to wrap the text to.
360: * @return string Formatted text.
361: */
362: public static function wrapBlock($text, $options = [])
363: {
364: if (is_numeric($options)) {
365: $options = ['width' => $options];
366: }
367: $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
368:
369: if (!empty($options['indentAt']) && $options['indentAt'] === 0) {
370: $indentLength = !empty($options['indent']) ? strlen($options['indent']) : 0;
371: $options['width'] -= $indentLength;
372:
373: return self::wrap($text, $options);
374: }
375:
376: $wrapped = self::wrap($text, $options);
377:
378: if (!empty($options['indent'])) {
379: $indentationLength = mb_strlen($options['indent']);
380: $chunks = explode("\n", $wrapped);
381: $count = count($chunks);
382: if ($count < 2) {
383: return $wrapped;
384: }
385: $toRewrap = '';
386: for ($i = $options['indentAt']; $i < $count; $i++) {
387: $toRewrap .= mb_substr($chunks[$i], $indentationLength) . ' ';
388: unset($chunks[$i]);
389: }
390: $options['width'] -= $indentationLength;
391: $options['indentAt'] = 0;
392: $rewrapped = self::wrap($toRewrap, $options);
393: $newChunks = explode("\n", $rewrapped);
394:
395: $chunks = array_merge($chunks, $newChunks);
396: $wrapped = implode("\n", $chunks);
397: }
398:
399: return $wrapped;
400: }
401:
402: /**
403: * Unicode and newline aware version of wordwrap.
404: *
405: * @param string $text The text to format.
406: * @param int $width The width to wrap to. Defaults to 72.
407: * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
408: * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
409: * @return string Formatted text.
410: */
411: public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
412: {
413: $paragraphs = explode($break, $text);
414: foreach ($paragraphs as &$paragraph) {
415: $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
416: }
417:
418: return implode($break, $paragraphs);
419: }
420:
421: /**
422: * Unicode aware version of wordwrap as helper method.
423: *
424: * @param string $text The text to format.
425: * @param int $width The width to wrap to. Defaults to 72.
426: * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
427: * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
428: * @return string Formatted text.
429: */
430: protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
431: {
432: if ($cut) {
433: $parts = [];
434: while (mb_strlen($text) > 0) {
435: $part = mb_substr($text, 0, $width);
436: $parts[] = trim($part);
437: $text = trim(mb_substr($text, mb_strlen($part)));
438: }
439:
440: return implode($break, $parts);
441: }
442:
443: $parts = [];
444: while (mb_strlen($text) > 0) {
445: if ($width >= mb_strlen($text)) {
446: $parts[] = trim($text);
447: break;
448: }
449:
450: $part = mb_substr($text, 0, $width);
451: $nextChar = mb_substr($text, $width, 1);
452: if ($nextChar !== ' ') {
453: $breakAt = mb_strrpos($part, ' ');
454: if ($breakAt === false) {
455: $breakAt = mb_strpos($text, ' ', $width);
456: }
457: if ($breakAt === false) {
458: $parts[] = trim($text);
459: break;
460: }
461: $part = mb_substr($text, 0, $breakAt);
462: }
463:
464: $part = trim($part);
465: $parts[] = $part;
466: $text = trim(mb_substr($text, mb_strlen($part)));
467: }
468:
469: return implode($break, $parts);
470: }
471:
472: /**
473: * Highlights a given phrase in a text. You can specify any expression in highlighter that
474: * may include the \1 expression to include the $phrase found.
475: *
476: * ### Options:
477: *
478: * - `format` The piece of HTML with that the phrase will be highlighted
479: * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
480: * - `regex` A custom regex rule that is used to match words, default is '|$tag|iu'
481: * - `limit` A limit, optional, defaults to -1 (none)
482: *
483: * @param string $text Text to search the phrase in.
484: * @param string|array $phrase The phrase or phrases that will be searched.
485: * @param array $options An array of HTML attributes and options.
486: * @return string The highlighted text
487: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#highlighting-substrings
488: */
489: public static function highlight($text, $phrase, array $options = [])
490: {
491: if (empty($phrase)) {
492: return $text;
493: }
494:
495: $defaults = [
496: 'format' => '<span class="highlight">\1</span>',
497: 'html' => false,
498: 'regex' => '|%s|iu',
499: 'limit' => -1,
500: ];
501: $options += $defaults;
502:
503: $html = $format = $limit = null;
504: /**
505: * @var bool $html
506: * @var string|array $format
507: * @var int $limit
508: */
509: extract($options);
510:
511: if (is_array($phrase)) {
512: $replace = [];
513: $with = [];
514:
515: foreach ($phrase as $key => $segment) {
516: $segment = '(' . preg_quote($segment, '|') . ')';
517: if ($html) {
518: $segment = "(?![^<]+>)$segment(?![^<]+>)";
519: }
520:
521: $with[] = is_array($format) ? $format[$key] : $format;
522: $replace[] = sprintf($options['regex'], $segment);
523: }
524:
525: return preg_replace($replace, $with, $text, $limit);
526: }
527:
528: $phrase = '(' . preg_quote($phrase, '|') . ')';
529: if ($html) {
530: $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
531: }
532:
533: return preg_replace(sprintf($options['regex'], $phrase), $format, $text, $limit);
534: }
535:
536: /**
537: * Strips given text of all links (<a href=....).
538: *
539: * *Warning* This method is not an robust solution in preventing XSS
540: * or malicious HTML.
541: *
542: * @param string $text Text
543: * @return string The text without links
544: * @deprecated 3.2.12 This method will be removed in 4.0.0
545: */
546: public static function stripLinks($text)
547: {
548: deprecationWarning('This method will be removed in 4.0.0.');
549: do {
550: $text = preg_replace('#</?a([/\s][^>]*)?(>|$)#i', '', $text, -1, $count);
551: } while ($count);
552:
553: return $text;
554: }
555:
556: /**
557: * Truncates text starting from the end.
558: *
559: * Cuts a string to the length of $length and replaces the first characters
560: * with the ellipsis if the text is longer than length.
561: *
562: * ### Options:
563: *
564: * - `ellipsis` Will be used as beginning and prepended to the trimmed string
565: * - `exact` If false, $text will not be cut mid-word
566: *
567: * @param string $text String to truncate.
568: * @param int $length Length of returned string, including ellipsis.
569: * @param array $options An array of options.
570: * @return string Trimmed string.
571: */
572: public static function tail($text, $length = 100, array $options = [])
573: {
574: $default = [
575: 'ellipsis' => '...', 'exact' => true
576: ];
577: $options += $default;
578: $exact = $ellipsis = null;
579: /**
580: * @var string $ellipsis
581: * @var bool $exact
582: */
583: extract($options);
584:
585: if (mb_strlen($text) <= $length) {
586: return $text;
587: }
588:
589: $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
590: if (!$exact) {
591: $spacepos = mb_strpos($truncate, ' ');
592: $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
593: }
594:
595: return $ellipsis . $truncate;
596: }
597:
598: /**
599: * Truncates text.
600: *
601: * Cuts a string to the length of $length and replaces the last characters
602: * with the ellipsis if the text is longer than length.
603: *
604: * ### Options:
605: *
606: * - `ellipsis` Will be used as ending and appended to the trimmed string
607: * - `exact` If false, $text will not be cut mid-word
608: * - `html` If true, HTML tags would be handled correctly
609: * - `trimWidth` If true, $text will be truncated with the width
610: *
611: * @param string $text String to truncate.
612: * @param int $length Length of returned string, including ellipsis.
613: * @param array $options An array of HTML attributes and options.
614: * @return string Trimmed string.
615: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#truncating-text
616: */
617: public static function truncate($text, $length = 100, array $options = [])
618: {
619: $default = [
620: 'ellipsis' => '...', 'exact' => true, 'html' => false, 'trimWidth' => false,
621: ];
622: if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
623: $default['ellipsis'] = "\xe2\x80\xa6";
624: }
625: $options += $default;
626:
627: $prefix = '';
628: $suffix = $options['ellipsis'];
629:
630: if ($options['html']) {
631: $ellipsisLength = self::_strlen(strip_tags($options['ellipsis']), $options);
632:
633: $truncateLength = 0;
634: $totalLength = 0;
635: $openTags = [];
636: $truncate = '';
637:
638: preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
639: foreach ($tags as $tag) {
640: $contentLength = 0;
641: if (!in_array($tag[2], static::$_defaultHtmlNoCount, true)) {
642: $contentLength = self::_strlen($tag[3], $options);
643: }
644:
645: if ($truncate === '') {
646: if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2])) {
647: if (preg_match('/<[\w]+[^>]*>/', $tag[0])) {
648: array_unshift($openTags, $tag[2]);
649: } elseif (preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag)) {
650: $pos = array_search($closeTag[1], $openTags);
651: if ($pos !== false) {
652: array_splice($openTags, $pos, 1);
653: }
654: }
655: }
656:
657: $prefix .= $tag[1];
658:
659: if ($totalLength + $contentLength + $ellipsisLength > $length) {
660: $truncate = $tag[3];
661: $truncateLength = $length - $totalLength;
662: } else {
663: $prefix .= $tag[3];
664: }
665: }
666:
667: $totalLength += $contentLength;
668: if ($totalLength > $length) {
669: break;
670: }
671: }
672:
673: if ($totalLength <= $length) {
674: return $text;
675: }
676:
677: $text = $truncate;
678: $length = $truncateLength;
679:
680: foreach ($openTags as $tag) {
681: $suffix .= '</' . $tag . '>';
682: }
683: } else {
684: if (self::_strlen($text, $options) <= $length) {
685: return $text;
686: }
687: $ellipsisLength = self::_strlen($options['ellipsis'], $options);
688: }
689:
690: $result = self::_substr($text, 0, $length - $ellipsisLength, $options);
691:
692: if (!$options['exact']) {
693: if (self::_substr($text, $length - $ellipsisLength, 1, $options) !== ' ') {
694: $result = self::_removeLastWord($result);
695: }
696:
697: // If result is empty, then we don't need to count ellipsis in the cut.
698: if (!strlen($result)) {
699: $result = self::_substr($text, 0, $length, $options);
700: }
701: }
702:
703: return $prefix . $result . $suffix;
704: }
705:
706: /**
707: * Truncate text with specified width.
708: *
709: * @param string $text String to truncate.
710: * @param int $length Length of returned string, including ellipsis.
711: * @param array $options An array of HTML attributes and options.
712: * @return string Trimmed string.
713: * @see \Cake\Utility\Text::truncate()
714: */
715: public static function truncateByWidth($text, $length = 100, array $options = [])
716: {
717: return static::truncate($text, $length, ['trimWidth' => true] + $options);
718: }
719:
720: /**
721: * Get string length.
722: *
723: * ### Options:
724: *
725: * - `html` If true, HTML entities will be handled as decoded characters.
726: * - `trimWidth` If true, the width will return.
727: *
728: * @param string $text The string being checked for length
729: * @param array $options An array of options.
730: * @return int
731: */
732: protected static function _strlen($text, array $options)
733: {
734: if (empty($options['trimWidth'])) {
735: $strlen = 'mb_strlen';
736: } else {
737: $strlen = 'mb_strwidth';
738: }
739:
740: if (empty($options['html'])) {
741: return $strlen($text);
742: }
743:
744: $pattern = '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i';
745: $replace = preg_replace_callback(
746: $pattern,
747: function ($match) use ($strlen) {
748: $utf8 = html_entity_decode($match[0], ENT_HTML5 | ENT_QUOTES, 'UTF-8');
749:
750: return str_repeat(' ', $strlen($utf8, 'UTF-8'));
751: },
752: $text
753: );
754:
755: return $strlen($replace);
756: }
757:
758: /**
759: * Return part of a string.
760: *
761: * ### Options:
762: *
763: * - `html` If true, HTML entities will be handled as decoded characters.
764: * - `trimWidth` If true, will be truncated with specified width.
765: *
766: * @param string $text The input string.
767: * @param int $start The position to begin extracting.
768: * @param int $length The desired length.
769: * @param array $options An array of options.
770: * @return string
771: */
772: protected static function _substr($text, $start, $length, array $options)
773: {
774: if (empty($options['trimWidth'])) {
775: $substr = 'mb_substr';
776: } else {
777: $substr = 'mb_strimwidth';
778: }
779:
780: $maxPosition = self::_strlen($text, ['trimWidth' => false] + $options);
781: if ($start < 0) {
782: $start += $maxPosition;
783: if ($start < 0) {
784: $start = 0;
785: }
786: }
787: if ($start >= $maxPosition) {
788: return '';
789: }
790:
791: if ($length === null) {
792: $length = self::_strlen($text, $options);
793: }
794:
795: if ($length < 0) {
796: $text = self::_substr($text, $start, null, $options);
797: $start = 0;
798: $length += self::_strlen($text, $options);
799: }
800:
801: if ($length <= 0) {
802: return '';
803: }
804:
805: if (empty($options['html'])) {
806: return (string)$substr($text, $start, $length);
807: }
808:
809: $totalOffset = 0;
810: $totalLength = 0;
811: $result = '';
812:
813: $pattern = '/(&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};)/i';
814: $parts = preg_split($pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
815: foreach ($parts as $part) {
816: $offset = 0;
817:
818: if ($totalOffset < $start) {
819: $len = self::_strlen($part, ['trimWidth' => false] + $options);
820: if ($totalOffset + $len <= $start) {
821: $totalOffset += $len;
822: continue;
823: }
824:
825: $offset = $start - $totalOffset;
826: $totalOffset = $start;
827: }
828:
829: $len = self::_strlen($part, $options);
830: if ($offset !== 0 || $totalLength + $len > $length) {
831: if (strpos($part, '&') === 0 && preg_match($pattern, $part)
832: && $part !== html_entity_decode($part, ENT_HTML5 | ENT_QUOTES, 'UTF-8')
833: ) {
834: // Entities cannot be passed substr.
835: continue;
836: }
837:
838: $part = $substr($part, $offset, $length - $totalLength);
839: $len = self::_strlen($part, $options);
840: }
841:
842: $result .= $part;
843: $totalLength += $len;
844: if ($totalLength >= $length) {
845: break;
846: }
847: }
848:
849: return $result;
850: }
851:
852: /**
853: * Removes the last word from the input text.
854: *
855: * @param string $text The input text
856: * @return string
857: */
858: protected static function _removeLastWord($text)
859: {
860: $spacepos = mb_strrpos($text, ' ');
861:
862: if ($spacepos !== false) {
863: $lastWord = mb_strrpos($text, $spacepos);
864:
865: // Some languages are written without word separation.
866: // We recognize a string as a word if it doesn't contain any full-width characters.
867: if (mb_strwidth($lastWord) === mb_strlen($lastWord)) {
868: $text = mb_substr($text, 0, $spacepos);
869: }
870:
871: return $text;
872: }
873:
874: return '';
875: }
876:
877: /**
878: * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
879: * determined by radius.
880: *
881: * @param string $text String to search the phrase in
882: * @param string $phrase Phrase that will be searched for
883: * @param int $radius The amount of characters that will be returned on each side of the founded phrase
884: * @param string $ellipsis Ending that will be appended
885: * @return string Modified string
886: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#extracting-an-excerpt
887: */
888: public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
889: {
890: if (empty($text) || empty($phrase)) {
891: return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
892: }
893:
894: $append = $prepend = $ellipsis;
895:
896: $phraseLen = mb_strlen($phrase);
897: $textLen = mb_strlen($text);
898:
899: $pos = mb_stripos($text, $phrase);
900: if ($pos === false) {
901: return mb_substr($text, 0, $radius) . $ellipsis;
902: }
903:
904: $startPos = $pos - $radius;
905: if ($startPos <= 0) {
906: $startPos = 0;
907: $prepend = '';
908: }
909:
910: $endPos = $pos + $phraseLen + $radius;
911: if ($endPos >= $textLen) {
912: $endPos = $textLen;
913: $append = '';
914: }
915:
916: $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
917: $excerpt = $prepend . $excerpt . $append;
918:
919: return $excerpt;
920: }
921:
922: /**
923: * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
924: *
925: * @param array $list The list to be joined.
926: * @param string|null $and The word used to join the last and second last items together with. Defaults to 'and'.
927: * @param string $separator The separator used to join all the other items together. Defaults to ', '.
928: * @return string The glued together string.
929: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#converting-an-array-to-sentence-form
930: */
931: public static function toList(array $list, $and = null, $separator = ', ')
932: {
933: if ($and === null) {
934: $and = __d('cake', 'and');
935: }
936: if (count($list) > 1) {
937: return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
938: }
939:
940: return array_pop($list);
941: }
942:
943: /**
944: * Check if the string contain multibyte characters
945: *
946: * @param string $string value to test
947: * @return bool
948: */
949: public static function isMultibyte($string)
950: {
951: $length = strlen($string);
952:
953: for ($i = 0; $i < $length; $i++) {
954: $value = ord($string[$i]);
955: if ($value > 128) {
956: return true;
957: }
958: }
959:
960: return false;
961: }
962:
963: /**
964: * Converts a multibyte character string
965: * to the decimal value of the character
966: *
967: * @param string $string String to convert.
968: * @return array
969: */
970: public static function utf8($string)
971: {
972: $map = [];
973:
974: $values = [];
975: $find = 1;
976: $length = strlen($string);
977:
978: for ($i = 0; $i < $length; $i++) {
979: $value = ord($string[$i]);
980:
981: if ($value < 128) {
982: $map[] = $value;
983: } else {
984: if (empty($values)) {
985: $find = ($value < 224) ? 2 : 3;
986: }
987: $values[] = $value;
988:
989: if (count($values) === $find) {
990: if ($find == 3) {
991: $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
992: } else {
993: $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
994: }
995: $values = [];
996: $find = 1;
997: }
998: }
999: }
1000:
1001: return $map;
1002: }
1003:
1004: /**
1005: * Converts the decimal value of a multibyte character string
1006: * to a string
1007: *
1008: * @param array $array Array
1009: * @return string
1010: */
1011: public static function ascii(array $array)
1012: {
1013: $ascii = '';
1014:
1015: foreach ($array as $utf8) {
1016: if ($utf8 < 128) {
1017: $ascii .= chr($utf8);
1018: } elseif ($utf8 < 2048) {
1019: $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
1020: $ascii .= chr(128 + ($utf8 % 64));
1021: } else {
1022: $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
1023: $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
1024: $ascii .= chr(128 + ($utf8 % 64));
1025: }
1026: }
1027:
1028: return $ascii;
1029: }
1030:
1031: /**
1032: * Converts filesize from human readable string to bytes
1033: *
1034: * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
1035: * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
1036: * @return mixed Number of bytes as integer on success, `$default` on failure if not false
1037: * @throws \InvalidArgumentException On invalid Unit type.
1038: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#Cake\Utility\Text::parseFileSize
1039: */
1040: public static function parseFileSize($size, $default = false)
1041: {
1042: if (ctype_digit($size)) {
1043: return (int)$size;
1044: }
1045: $size = strtoupper($size);
1046:
1047: $l = -2;
1048: $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB']);
1049: if ($i === false) {
1050: $l = -1;
1051: $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P']);
1052: }
1053: if ($i !== false) {
1054: $size = (float)substr($size, 0, $l);
1055:
1056: return $size * pow(1024, $i + 1);
1057: }
1058:
1059: if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
1060: $size = substr($size, 0, -1);
1061:
1062: return (int)$size;
1063: }
1064:
1065: if ($default !== false) {
1066: return $default;
1067: }
1068: throw new InvalidArgumentException('No unit type.');
1069: }
1070:
1071: /**
1072: * Get the default transliterator.
1073: *
1074: * @return \Transliterator|null Either a Transliterator instance, or `null`
1075: * in case no transliterator has been set yet.
1076: * @since 3.7.0
1077: */
1078: public static function getTransliterator()
1079: {
1080: return static::$_defaultTransliterator;
1081: }
1082:
1083: /**
1084: * Set the default transliterator.
1085: *
1086: * @param \Transliterator $transliterator A `Transliterator` instance.
1087: * @return void
1088: * @since 3.7.0
1089: */
1090: public static function setTransliterator(\Transliterator $transliterator)
1091: {
1092: static::$_defaultTransliterator = $transliterator;
1093: }
1094:
1095: /**
1096: * Get default transliterator identifier string.
1097: *
1098: * @return string Transliterator identifier.
1099: */
1100: public static function getTransliteratorId()
1101: {
1102: return static::$_defaultTransliteratorId;
1103: }
1104:
1105: /**
1106: * Set default transliterator identifier string.
1107: *
1108: * @param string $transliteratorId Transliterator identifier.
1109: * @return void
1110: */
1111: public static function setTransliteratorId($transliteratorId)
1112: {
1113: static::setTransliterator(transliterator_create($transliteratorId));
1114: static::$_defaultTransliteratorId = $transliteratorId;
1115: }
1116:
1117: /**
1118: * Transliterate string.
1119: *
1120: * @param string $string String to transliterate.
1121: * @param \Transliterator|string|null $transliterator Either a Transliterator
1122: * instance, or a transliterator identifier string. If `null`, the default
1123: * transliterator (identifier) set via `setTransliteratorId()` or
1124: * `setTransliterator()` will be used.
1125: * @return string
1126: * @see https://secure.php.net/manual/en/transliterator.transliterate.php
1127: */
1128: public static function transliterate($string, $transliterator = null)
1129: {
1130: if (!$transliterator) {
1131: $transliterator = static::$_defaultTransliterator ?: static::$_defaultTransliteratorId;
1132: }
1133:
1134: return transliterator_transliterate($transliterator, $string);
1135: }
1136:
1137: /**
1138: * Returns a string with all spaces converted to dashes (by default),
1139: * characters transliterated to ASCII characters, and non word characters removed.
1140: *
1141: * ### Options:
1142: *
1143: * - `replacement`: Replacement string. Default '-'.
1144: * - `transliteratorId`: A valid transliterator id string.
1145: * If `null` (default) the transliterator (identifier) set via
1146: * `setTransliteratorId()` or `setTransliterator()` will be used.
1147: * If `false` no transliteration will be done, only non words will be removed.
1148: * - `preserve`: Specific non-word character to preserve. Default `null`.
1149: * For e.g. this option can be set to '.' to generate clean file names.
1150: *
1151: * @param string $string the string you want to slug
1152: * @param array $options If string it will be use as replacement character
1153: * or an array of options.
1154: * @return string
1155: * @see setTransliterator()
1156: * @see setTransliteratorId()
1157: */
1158: public static function slug($string, $options = [])
1159: {
1160: if (is_string($options)) {
1161: $options = ['replacement' => $options];
1162: }
1163: $options += [
1164: 'replacement' => '-',
1165: 'transliteratorId' => null,
1166: 'preserve' => null
1167: ];
1168:
1169: if ($options['transliteratorId'] !== false) {
1170: $string = static::transliterate($string, $options['transliteratorId']);
1171: }
1172:
1173: $regex = '^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}';
1174: if ($options['preserve']) {
1175: $regex .= preg_quote($options['preserve'], '/');
1176: }
1177: $quotedReplacement = preg_quote($options['replacement'], '/');
1178: $map = [
1179: '/[' . $regex . ']/mu' => ' ',
1180: '/[\s]+/mu' => $options['replacement'],
1181: sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => '',
1182: ];
1183: $string = preg_replace(array_keys($map), $map, $string);
1184:
1185: return $string;
1186: }
1187: }
1188: