File Utility/Text.php

   1: <?php
   2: /**
   3:  * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
   4:  * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
   5:  *
   6:  * Licensed under The MIT License
   7:  * For full copyright and license information, please see the LICENSE.txt
   8:  * Redistributions of files must retain the above copyright notice.
   9:  *
  10:  * @copyright     Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  11:  * @link          https://cakephp.org CakePHP(tm) Project
  12:  * @since         1.2.0
  13:  * @license       https://opensource.org/licenses/mit-license.php MIT License
  14:  */
  15: namespace Cake\Utility;
  16: 
  17: use InvalidArgumentException;
  18: 
  19: /**
  20:  * Text handling methods.
  21:  */
  22: class Text
  23: {
  24: 
  25:     /**
  26:      * Default transliterator.
  27:      *
  28:      * @var \Transliterator Transliterator instance.
  29:      */
  30:     protected static $_defaultTransliterator;
  31: 
  32:     /**
  33:      * Default transliterator id string.
  34:      *
  35:      * @var string $_defaultTransliteratorId Transliterator identifier string.
  36:      */
  37:     protected static $_defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
  38: 
  39:     /**
  40:      * Default html tags who must not be count for truncate text.
  41:      *
  42:      * @var array
  43:      */
  44:     protected static $_defaultHtmlNoCount = [
  45:         'style',
  46:         'script'
  47:     ];
  48: 
  49:     /**
  50:      * Generate a random UUID version 4
  51:      *
  52:      * Warning: This method should not be used as a random seed for any cryptographic operations.
  53:      * Instead you should use the openssl or mcrypt extensions.
  54:      *
  55:      * It should also not be used to create identifiers that have security implications, such as
  56:      * 'unguessable' URL identifiers. Instead you should use `Security::randomBytes()` for that.
  57:      *
  58:      * @see https://www.ietf.org/rfc/rfc4122.txt
  59:      * @return string RFC 4122 UUID
  60:      * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
  61:      */
  62:     public static function uuid()
  63:     {
  64:         $random = function_exists('random_int') ? 'random_int' : 'mt_rand';
  65: 
  66:         return sprintf(
  67:             '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
  68:             // 32 bits for "time_low"
  69:             $random(0, 65535),
  70:             $random(0, 65535),
  71:             // 16 bits for "time_mid"
  72:             $random(0, 65535),
  73:             // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
  74:             $random(0, 4095) | 0x4000,
  75:             // 16 bits, 8 bits for "clk_seq_hi_res",
  76:             // 8 bits for "clk_seq_low",
  77:             // two most significant bits holds zero and one for variant DCE1.1
  78:             $random(0, 0x3fff) | 0x8000,
  79:             // 48 bits for "node"
  80:             $random(0, 65535),
  81:             $random(0, 65535),
  82:             $random(0, 65535)
  83:         );
  84:     }
  85: 
  86:     /**
  87:      * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
  88:      * $leftBound and $rightBound.
  89:      *
  90:      * @param string $data The data to tokenize.
  91:      * @param string $separator The token to split the data on.
  92:      * @param string $leftBound The left boundary to ignore separators in.
  93:      * @param string $rightBound The right boundary to ignore separators in.
  94:      * @return array|string Array of tokens in $data or original input if empty.
  95:      */
  96:     public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
  97:     {
  98:         if (empty($data)) {
  99:             return [];
 100:         }
 101: 
 102:         $depth = 0;
 103:         $offset = 0;
 104:         $buffer = '';
 105:         $results = [];
 106:         $length = mb_strlen($data);
 107:         $open = false;
 108: 
 109:         while ($offset <= $length) {
 110:             $tmpOffset = -1;
 111:             $offsets = [
 112:                 mb_strpos($data, $separator, $offset),
 113:                 mb_strpos($data, $leftBound, $offset),
 114:                 mb_strpos($data, $rightBound, $offset)
 115:             ];
 116:             for ($i = 0; $i < 3; $i++) {
 117:                 if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
 118:                     $tmpOffset = $offsets[$i];
 119:                 }
 120:             }
 121:             if ($tmpOffset !== -1) {
 122:                 $buffer .= mb_substr($data, $offset, $tmpOffset - $offset);
 123:                 $char = mb_substr($data, $tmpOffset, 1);
 124:                 if (!$depth && $char === $separator) {
 125:                     $results[] = $buffer;
 126:                     $buffer = '';
 127:                 } else {
 128:                     $buffer .= $char;
 129:                 }
 130:                 if ($leftBound !== $rightBound) {
 131:                     if ($char === $leftBound) {
 132:                         $depth++;
 133:                     }
 134:                     if ($char === $rightBound) {
 135:                         $depth--;
 136:                     }
 137:                 } else {
 138:                     if ($char === $leftBound) {
 139:                         if (!$open) {
 140:                             $depth++;
 141:                             $open = true;
 142:                         } else {
 143:                             $depth--;
 144:                             $open = false;
 145:                         }
 146:                     }
 147:                 }
 148:                 $tmpOffset += 1;
 149:                 $offset = $tmpOffset;
 150:             } else {
 151:                 $results[] = $buffer . mb_substr($data, $offset);
 152:                 $offset = $length + 1;
 153:             }
 154:         }
 155:         if (empty($results) && !empty($buffer)) {
 156:             $results[] = $buffer;
 157:         }
 158: 
 159:         if (!empty($results)) {
 160:             return array_map('trim', $results);
 161:         }
 162: 
 163:         return [];
 164:     }
 165: 
 166:     /**
 167:      * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
 168:      * corresponds to a variable placeholder name in $str.
 169:      * Example:
 170:      * ```
 171:      * Text::insert(':name is :age years old.', ['name' => 'Bob', 'age' => '65']);
 172:      * ```
 173:      * Returns: Bob is 65 years old.
 174:      *
 175:      * Available $options are:
 176:      *
 177:      * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
 178:      * - after: The character or string after the name of the variable placeholder (Defaults to null)
 179:      * - escape: The character or string used to escape the before character / string (Defaults to `\`)
 180:      * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
 181:      *   (Overwrites before, after, breaks escape / clean)
 182:      * - clean: A boolean or array with instructions for Text::cleanInsert
 183:      *
 184:      * @param string $str A string containing variable placeholders
 185:      * @param array $data A key => val array where each key stands for a placeholder variable name
 186:      *     to be replaced with val
 187:      * @param array $options An array of options, see description above
 188:      * @return string
 189:      */
 190:     public static function insert($str, $data, array $options = [])
 191:     {
 192:         $defaults = [
 193:             'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
 194:         ];
 195:         $options += $defaults;
 196:         $format = $options['format'];
 197:         $data = (array)$data;
 198:         if (empty($data)) {
 199:             return $options['clean'] ? static::cleanInsert($str, $options) : $str;
 200:         }
 201: 
 202:         if (!isset($format)) {
 203:             $format = sprintf(
 204:                 '/(?<!%s)%s%%s%s/',
 205:                 preg_quote($options['escape'], '/'),
 206:                 str_replace('%', '%%', preg_quote($options['before'], '/')),
 207:                 str_replace('%', '%%', preg_quote($options['after'], '/'))
 208:             );
 209:         }
 210: 
 211:         if (strpos($str, '?') !== false && is_numeric(key($data))) {
 212:             $offset = 0;
 213:             while (($pos = strpos($str, '?', $offset)) !== false) {
 214:                 $val = array_shift($data);
 215:                 $offset = $pos + strlen($val);
 216:                 $str = substr_replace($str, $val, $pos, 1);
 217:             }
 218: 
 219:             return $options['clean'] ? static::cleanInsert($str, $options) : $str;
 220:         }
 221: 
 222:         $dataKeys = array_keys($data);
 223:         $hashKeys = array_map('crc32', $dataKeys);
 224:         $tempData = array_combine($dataKeys, $hashKeys);
 225:         krsort($tempData);
 226: 
 227:         foreach ($tempData as $key => $hashVal) {
 228:             $key = sprintf($format, preg_quote($key, '/'));
 229:             $str = preg_replace($key, $hashVal, $str);
 230:         }
 231:         $dataReplacements = array_combine($hashKeys, array_values($data));
 232:         foreach ($dataReplacements as $tmpHash => $tmpValue) {
 233:             $tmpValue = is_array($tmpValue) ? '' : $tmpValue;
 234:             $str = str_replace($tmpHash, $tmpValue, $str);
 235:         }
 236: 
 237:         if (!isset($options['format']) && isset($options['before'])) {
 238:             $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
 239:         }
 240: 
 241:         return $options['clean'] ? static::cleanInsert($str, $options) : $str;
 242:     }
 243: 
 244:     /**
 245:      * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
 246:      * $options. The default method used is text but html is also available. The goal of this function
 247:      * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
 248:      * by Text::insert().
 249:      *
 250:      * @param string $str String to clean.
 251:      * @param array $options Options list.
 252:      * @return string
 253:      * @see \Cake\Utility\Text::insert()
 254:      */
 255:     public static function cleanInsert($str, array $options)
 256:     {
 257:         $clean = $options['clean'];
 258:         if (!$clean) {
 259:             return $str;
 260:         }
 261:         if ($clean === true) {
 262:             $clean = ['method' => 'text'];
 263:         }
 264:         if (!is_array($clean)) {
 265:             $clean = ['method' => $options['clean']];
 266:         }
 267:         switch ($clean['method']) {
 268:             case 'html':
 269:                 $clean += [
 270:                     'word' => '[\w,.]+',
 271:                     'andText' => true,
 272:                     'replacement' => '',
 273:                 ];
 274:                 $kleenex = sprintf(
 275:                     '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
 276:                     preg_quote($options['before'], '/'),
 277:                     $clean['word'],
 278:                     preg_quote($options['after'], '/')
 279:                 );
 280:                 $str = preg_replace($kleenex, $clean['replacement'], $str);
 281:                 if ($clean['andText']) {
 282:                     $options['clean'] = ['method' => 'text'];
 283:                     $str = static::cleanInsert($str, $options);
 284:                 }
 285:                 break;
 286:             case 'text':
 287:                 $clean += [
 288:                     'word' => '[\w,.]+',
 289:                     'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
 290:                     'replacement' => '',
 291:                 ];
 292: 
 293:                 $kleenex = sprintf(
 294:                     '/(%s%s%s%s|%s%s%s%s)/',
 295:                     preg_quote($options['before'], '/'),
 296:                     $clean['word'],
 297:                     preg_quote($options['after'], '/'),
 298:                     $clean['gap'],
 299:                     $clean['gap'],
 300:                     preg_quote($options['before'], '/'),
 301:                     $clean['word'],
 302:                     preg_quote($options['after'], '/')
 303:                 );
 304:                 $str = preg_replace($kleenex, $clean['replacement'], $str);
 305:                 break;
 306:         }
 307: 
 308:         return $str;
 309:     }
 310: 
 311:     /**
 312:      * Wraps text to a specific width, can optionally wrap at word breaks.
 313:      *
 314:      * ### Options
 315:      *
 316:      * - `width` The width to wrap to. Defaults to 72.
 317:      * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
 318:      * - `indent` String to indent with. Defaults to null.
 319:      * - `indentAt` 0 based index to start indenting at. Defaults to 0.
 320:      *
 321:      * @param string $text The text to format.
 322:      * @param array|int $options Array of options to use, or an integer to wrap the text to.
 323:      * @return string Formatted text.
 324:      */
 325:     public static function wrap($text, $options = [])
 326:     {
 327:         if (is_numeric($options)) {
 328:             $options = ['width' => $options];
 329:         }
 330:         $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
 331:         if ($options['wordWrap']) {
 332:             $wrapped = self::wordWrap($text, $options['width'], "\n");
 333:         } else {
 334:             $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
 335:         }
 336:         if (!empty($options['indent'])) {
 337:             $chunks = explode("\n", $wrapped);
 338:             for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
 339:                 $chunks[$i] = $options['indent'] . $chunks[$i];
 340:             }
 341:             $wrapped = implode("\n", $chunks);
 342:         }
 343: 
 344:         return $wrapped;
 345:     }
 346: 
 347:     /**
 348:      * Wraps a complete block of text to a specific width, can optionally wrap
 349:      * at word breaks.
 350:      *
 351:      * ### Options
 352:      *
 353:      * - `width` The width to wrap to. Defaults to 72.
 354:      * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
 355:      * - `indent` String to indent with. Defaults to null.
 356:      * - `indentAt` 0 based index to start indenting at. Defaults to 0.
 357:      *
 358:      * @param string $text The text to format.
 359:      * @param array|int $options Array of options to use, or an integer to wrap the text to.
 360:      * @return string Formatted text.
 361:      */
 362:     public static function wrapBlock($text, $options = [])
 363:     {
 364:         if (is_numeric($options)) {
 365:             $options = ['width' => $options];
 366:         }
 367:         $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
 368: 
 369:         if (!empty($options['indentAt']) && $options['indentAt'] === 0) {
 370:             $indentLength = !empty($options['indent']) ? strlen($options['indent']) : 0;
 371:             $options['width'] -= $indentLength;
 372: 
 373:             return self::wrap($text, $options);
 374:         }
 375: 
 376:         $wrapped = self::wrap($text, $options);
 377: 
 378:         if (!empty($options['indent'])) {
 379:             $indentationLength = mb_strlen($options['indent']);
 380:             $chunks = explode("\n", $wrapped);
 381:             $count = count($chunks);
 382:             if ($count < 2) {
 383:                 return $wrapped;
 384:             }
 385:             $toRewrap = '';
 386:             for ($i = $options['indentAt']; $i < $count; $i++) {
 387:                 $toRewrap .= mb_substr($chunks[$i], $indentationLength) . ' ';
 388:                 unset($chunks[$i]);
 389:             }
 390:             $options['width'] -= $indentationLength;
 391:             $options['indentAt'] = 0;
 392:             $rewrapped = self::wrap($toRewrap, $options);
 393:             $newChunks = explode("\n", $rewrapped);
 394: 
 395:             $chunks = array_merge($chunks, $newChunks);
 396:             $wrapped = implode("\n", $chunks);
 397:         }
 398: 
 399:         return $wrapped;
 400:     }
 401: 
 402:     /**
 403:      * Unicode and newline aware version of wordwrap.
 404:      *
 405:      * @param string $text The text to format.
 406:      * @param int $width The width to wrap to. Defaults to 72.
 407:      * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
 408:      * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
 409:      * @return string Formatted text.
 410:      */
 411:     public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
 412:     {
 413:         $paragraphs = explode($break, $text);
 414:         foreach ($paragraphs as &$paragraph) {
 415:             $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
 416:         }
 417: 
 418:         return implode($break, $paragraphs);
 419:     }
 420: 
 421:     /**
 422:      * Unicode aware version of wordwrap as helper method.
 423:      *
 424:      * @param string $text The text to format.
 425:      * @param int $width The width to wrap to. Defaults to 72.
 426:      * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
 427:      * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
 428:      * @return string Formatted text.
 429:      */
 430:     protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
 431:     {
 432:         if ($cut) {
 433:             $parts = [];
 434:             while (mb_strlen($text) > 0) {
 435:                 $part = mb_substr($text, 0, $width);
 436:                 $parts[] = trim($part);
 437:                 $text = trim(mb_substr($text, mb_strlen($part)));
 438:             }
 439: 
 440:             return implode($break, $parts);
 441:         }
 442: 
 443:         $parts = [];
 444:         while (mb_strlen($text) > 0) {
 445:             if ($width >= mb_strlen($text)) {
 446:                 $parts[] = trim($text);
 447:                 break;
 448:             }
 449: 
 450:             $part = mb_substr($text, 0, $width);
 451:             $nextChar = mb_substr($text, $width, 1);
 452:             if ($nextChar !== ' ') {
 453:                 $breakAt = mb_strrpos($part, ' ');
 454:                 if ($breakAt === false) {
 455:                     $breakAt = mb_strpos($text, ' ', $width);
 456:                 }
 457:                 if ($breakAt === false) {
 458:                     $parts[] = trim($text);
 459:                     break;
 460:                 }
 461:                 $part = mb_substr($text, 0, $breakAt);
 462:             }
 463: 
 464:             $part = trim($part);
 465:             $parts[] = $part;
 466:             $text = trim(mb_substr($text, mb_strlen($part)));
 467:         }
 468: 
 469:         return implode($break, $parts);
 470:     }
 471: 
 472:     /**
 473:      * Highlights a given phrase in a text. You can specify any expression in highlighter that
 474:      * may include the \1 expression to include the $phrase found.
 475:      *
 476:      * ### Options:
 477:      *
 478:      * - `format` The piece of HTML with that the phrase will be highlighted
 479:      * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
 480:      * - `regex` A custom regex rule that is used to match words, default is '|$tag|iu'
 481:      * - `limit` A limit, optional, defaults to -1 (none)
 482:      *
 483:      * @param string $text Text to search the phrase in.
 484:      * @param string|array $phrase The phrase or phrases that will be searched.
 485:      * @param array $options An array of HTML attributes and options.
 486:      * @return string The highlighted text
 487:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#highlighting-substrings
 488:      */
 489:     public static function highlight($text, $phrase, array $options = [])
 490:     {
 491:         if (empty($phrase)) {
 492:             return $text;
 493:         }
 494: 
 495:         $defaults = [
 496:             'format' => '<span class="highlight">\1</span>',
 497:             'html' => false,
 498:             'regex' => '|%s|iu',
 499:             'limit' => -1,
 500:         ];
 501:         $options += $defaults;
 502: 
 503:         $html = $format = $limit = null;
 504:         /**
 505:          * @var bool $html
 506:          * @var string|array $format
 507:          * @var int $limit
 508:          */
 509:         extract($options);
 510: 
 511:         if (is_array($phrase)) {
 512:             $replace = [];
 513:             $with = [];
 514: 
 515:             foreach ($phrase as $key => $segment) {
 516:                 $segment = '(' . preg_quote($segment, '|') . ')';
 517:                 if ($html) {
 518:                     $segment = "(?![^<]+>)$segment(?![^<]+>)";
 519:                 }
 520: 
 521:                 $with[] = is_array($format) ? $format[$key] : $format;
 522:                 $replace[] = sprintf($options['regex'], $segment);
 523:             }
 524: 
 525:             return preg_replace($replace, $with, $text, $limit);
 526:         }
 527: 
 528:         $phrase = '(' . preg_quote($phrase, '|') . ')';
 529:         if ($html) {
 530:             $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
 531:         }
 532: 
 533:         return preg_replace(sprintf($options['regex'], $phrase), $format, $text, $limit);
 534:     }
 535: 
 536:     /**
 537:      * Strips given text of all links (<a href=....).
 538:      *
 539:      * *Warning* This method is not an robust solution in preventing XSS
 540:      * or malicious HTML.
 541:      *
 542:      * @param string $text Text
 543:      * @return string The text without links
 544:      * @deprecated 3.2.12 This method will be removed in 4.0.0
 545:      */
 546:     public static function stripLinks($text)
 547:     {
 548:         deprecationWarning('This method will be removed in 4.0.0.');
 549:         do {
 550:             $text = preg_replace('#</?a([/\s][^>]*)?(>|$)#i', '', $text, -1, $count);
 551:         } while ($count);
 552: 
 553:         return $text;
 554:     }
 555: 
 556:     /**
 557:      * Truncates text starting from the end.
 558:      *
 559:      * Cuts a string to the length of $length and replaces the first characters
 560:      * with the ellipsis if the text is longer than length.
 561:      *
 562:      * ### Options:
 563:      *
 564:      * - `ellipsis` Will be used as beginning and prepended to the trimmed string
 565:      * - `exact` If false, $text will not be cut mid-word
 566:      *
 567:      * @param string $text String to truncate.
 568:      * @param int $length Length of returned string, including ellipsis.
 569:      * @param array $options An array of options.
 570:      * @return string Trimmed string.
 571:      */
 572:     public static function tail($text, $length = 100, array $options = [])
 573:     {
 574:         $default = [
 575:             'ellipsis' => '...', 'exact' => true
 576:         ];
 577:         $options += $default;
 578:         $exact = $ellipsis = null;
 579:         /**
 580:          * @var string $ellipsis
 581:          * @var bool $exact
 582:          */
 583:         extract($options);
 584: 
 585:         if (mb_strlen($text) <= $length) {
 586:             return $text;
 587:         }
 588: 
 589:         $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
 590:         if (!$exact) {
 591:             $spacepos = mb_strpos($truncate, ' ');
 592:             $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
 593:         }
 594: 
 595:         return $ellipsis . $truncate;
 596:     }
 597: 
 598:     /**
 599:      * Truncates text.
 600:      *
 601:      * Cuts a string to the length of $length and replaces the last characters
 602:      * with the ellipsis if the text is longer than length.
 603:      *
 604:      * ### Options:
 605:      *
 606:      * - `ellipsis` Will be used as ending and appended to the trimmed string
 607:      * - `exact` If false, $text will not be cut mid-word
 608:      * - `html` If true, HTML tags would be handled correctly
 609:      * - `trimWidth` If true, $text will be truncated with the width
 610:      *
 611:      * @param string $text String to truncate.
 612:      * @param int $length Length of returned string, including ellipsis.
 613:      * @param array $options An array of HTML attributes and options.
 614:      * @return string Trimmed string.
 615:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#truncating-text
 616:      */
 617:     public static function truncate($text, $length = 100, array $options = [])
 618:     {
 619:         $default = [
 620:             'ellipsis' => '...', 'exact' => true, 'html' => false, 'trimWidth' => false,
 621:         ];
 622:         if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
 623:             $default['ellipsis'] = "\xe2\x80\xa6";
 624:         }
 625:         $options += $default;
 626: 
 627:         $prefix = '';
 628:         $suffix = $options['ellipsis'];
 629: 
 630:         if ($options['html']) {
 631:             $ellipsisLength = self::_strlen(strip_tags($options['ellipsis']), $options);
 632: 
 633:             $truncateLength = 0;
 634:             $totalLength = 0;
 635:             $openTags = [];
 636:             $truncate = '';
 637: 
 638:             preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
 639:             foreach ($tags as $tag) {
 640:                 $contentLength = 0;
 641:                 if (!in_array($tag[2], static::$_defaultHtmlNoCount, true)) {
 642:                     $contentLength = self::_strlen($tag[3], $options);
 643:                 }
 644: 
 645:                 if ($truncate === '') {
 646:                     if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2])) {
 647:                         if (preg_match('/<[\w]+[^>]*>/', $tag[0])) {
 648:                             array_unshift($openTags, $tag[2]);
 649:                         } elseif (preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag)) {
 650:                             $pos = array_search($closeTag[1], $openTags);
 651:                             if ($pos !== false) {
 652:                                 array_splice($openTags, $pos, 1);
 653:                             }
 654:                         }
 655:                     }
 656: 
 657:                     $prefix .= $tag[1];
 658: 
 659:                     if ($totalLength + $contentLength + $ellipsisLength > $length) {
 660:                         $truncate = $tag[3];
 661:                         $truncateLength = $length - $totalLength;
 662:                     } else {
 663:                         $prefix .= $tag[3];
 664:                     }
 665:                 }
 666: 
 667:                 $totalLength += $contentLength;
 668:                 if ($totalLength > $length) {
 669:                     break;
 670:                 }
 671:             }
 672: 
 673:             if ($totalLength <= $length) {
 674:                 return $text;
 675:             }
 676: 
 677:             $text = $truncate;
 678:             $length = $truncateLength;
 679: 
 680:             foreach ($openTags as $tag) {
 681:                 $suffix .= '</' . $tag . '>';
 682:             }
 683:         } else {
 684:             if (self::_strlen($text, $options) <= $length) {
 685:                 return $text;
 686:             }
 687:             $ellipsisLength = self::_strlen($options['ellipsis'], $options);
 688:         }
 689: 
 690:         $result = self::_substr($text, 0, $length - $ellipsisLength, $options);
 691: 
 692:         if (!$options['exact']) {
 693:             if (self::_substr($text, $length - $ellipsisLength, 1, $options) !== ' ') {
 694:                 $result = self::_removeLastWord($result);
 695:             }
 696: 
 697:             // If result is empty, then we don't need to count ellipsis in the cut.
 698:             if (!strlen($result)) {
 699:                 $result = self::_substr($text, 0, $length, $options);
 700:             }
 701:         }
 702: 
 703:         return $prefix . $result . $suffix;
 704:     }
 705: 
 706:     /**
 707:      * Truncate text with specified width.
 708:      *
 709:      * @param string $text String to truncate.
 710:      * @param int $length Length of returned string, including ellipsis.
 711:      * @param array $options An array of HTML attributes and options.
 712:      * @return string Trimmed string.
 713:      * @see \Cake\Utility\Text::truncate()
 714:      */
 715:     public static function truncateByWidth($text, $length = 100, array $options = [])
 716:     {
 717:         return static::truncate($text, $length, ['trimWidth' => true] + $options);
 718:     }
 719: 
 720:     /**
 721:      * Get string length.
 722:      *
 723:      * ### Options:
 724:      *
 725:      * - `html` If true, HTML entities will be handled as decoded characters.
 726:      * - `trimWidth` If true, the width will return.
 727:      *
 728:      * @param string $text The string being checked for length
 729:      * @param array $options An array of options.
 730:      * @return int
 731:      */
 732:     protected static function _strlen($text, array $options)
 733:     {
 734:         if (empty($options['trimWidth'])) {
 735:             $strlen = 'mb_strlen';
 736:         } else {
 737:             $strlen = 'mb_strwidth';
 738:         }
 739: 
 740:         if (empty($options['html'])) {
 741:             return $strlen($text);
 742:         }
 743: 
 744:         $pattern = '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i';
 745:         $replace = preg_replace_callback(
 746:             $pattern,
 747:             function ($match) use ($strlen) {
 748:                 $utf8 = html_entity_decode($match[0], ENT_HTML5 | ENT_QUOTES, 'UTF-8');
 749: 
 750:                 return str_repeat(' ', $strlen($utf8, 'UTF-8'));
 751:             },
 752:             $text
 753:         );
 754: 
 755:         return $strlen($replace);
 756:     }
 757: 
 758:     /**
 759:      * Return part of a string.
 760:      *
 761:      * ### Options:
 762:      *
 763:      * - `html` If true, HTML entities will be handled as decoded characters.
 764:      * - `trimWidth` If true, will be truncated with specified width.
 765:      *
 766:      * @param string $text The input string.
 767:      * @param int $start The position to begin extracting.
 768:      * @param int $length The desired length.
 769:      * @param array $options An array of options.
 770:      * @return string
 771:      */
 772:     protected static function _substr($text, $start, $length, array $options)
 773:     {
 774:         if (empty($options['trimWidth'])) {
 775:             $substr = 'mb_substr';
 776:         } else {
 777:             $substr = 'mb_strimwidth';
 778:         }
 779: 
 780:         $maxPosition = self::_strlen($text, ['trimWidth' => false] + $options);
 781:         if ($start < 0) {
 782:             $start += $maxPosition;
 783:             if ($start < 0) {
 784:                 $start = 0;
 785:             }
 786:         }
 787:         if ($start >= $maxPosition) {
 788:             return '';
 789:         }
 790: 
 791:         if ($length === null) {
 792:             $length = self::_strlen($text, $options);
 793:         }
 794: 
 795:         if ($length < 0) {
 796:             $text = self::_substr($text, $start, null, $options);
 797:             $start = 0;
 798:             $length += self::_strlen($text, $options);
 799:         }
 800: 
 801:         if ($length <= 0) {
 802:             return '';
 803:         }
 804: 
 805:         if (empty($options['html'])) {
 806:             return (string)$substr($text, $start, $length);
 807:         }
 808: 
 809:         $totalOffset = 0;
 810:         $totalLength = 0;
 811:         $result = '';
 812: 
 813:         $pattern = '/(&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};)/i';
 814:         $parts = preg_split($pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
 815:         foreach ($parts as $part) {
 816:             $offset = 0;
 817: 
 818:             if ($totalOffset < $start) {
 819:                 $len = self::_strlen($part, ['trimWidth' => false] + $options);
 820:                 if ($totalOffset + $len <= $start) {
 821:                     $totalOffset += $len;
 822:                     continue;
 823:                 }
 824: 
 825:                 $offset = $start - $totalOffset;
 826:                 $totalOffset = $start;
 827:             }
 828: 
 829:             $len = self::_strlen($part, $options);
 830:             if ($offset !== 0 || $totalLength + $len > $length) {
 831:                 if (strpos($part, '&') === 0 && preg_match($pattern, $part)
 832:                     && $part !== html_entity_decode($part, ENT_HTML5 | ENT_QUOTES, 'UTF-8')
 833:                 ) {
 834:                     // Entities cannot be passed substr.
 835:                     continue;
 836:                 }
 837: 
 838:                 $part = $substr($part, $offset, $length - $totalLength);
 839:                 $len = self::_strlen($part, $options);
 840:             }
 841: 
 842:             $result .= $part;
 843:             $totalLength += $len;
 844:             if ($totalLength >= $length) {
 845:                 break;
 846:             }
 847:         }
 848: 
 849:         return $result;
 850:     }
 851: 
 852:     /**
 853:      * Removes the last word from the input text.
 854:      *
 855:      * @param string $text The input text
 856:      * @return string
 857:      */
 858:     protected static function _removeLastWord($text)
 859:     {
 860:         $spacepos = mb_strrpos($text, ' ');
 861: 
 862:         if ($spacepos !== false) {
 863:             $lastWord = mb_strrpos($text, $spacepos);
 864: 
 865:             // Some languages are written without word separation.
 866:             // We recognize a string as a word if it doesn't contain any full-width characters.
 867:             if (mb_strwidth($lastWord) === mb_strlen($lastWord)) {
 868:                 $text = mb_substr($text, 0, $spacepos);
 869:             }
 870: 
 871:             return $text;
 872:         }
 873: 
 874:         return '';
 875:     }
 876: 
 877:     /**
 878:      * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
 879:      * determined by radius.
 880:      *
 881:      * @param string $text String to search the phrase in
 882:      * @param string $phrase Phrase that will be searched for
 883:      * @param int $radius The amount of characters that will be returned on each side of the founded phrase
 884:      * @param string $ellipsis Ending that will be appended
 885:      * @return string Modified string
 886:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#extracting-an-excerpt
 887:      */
 888:     public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
 889:     {
 890:         if (empty($text) || empty($phrase)) {
 891:             return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
 892:         }
 893: 
 894:         $append = $prepend = $ellipsis;
 895: 
 896:         $phraseLen = mb_strlen($phrase);
 897:         $textLen = mb_strlen($text);
 898: 
 899:         $pos = mb_stripos($text, $phrase);
 900:         if ($pos === false) {
 901:             return mb_substr($text, 0, $radius) . $ellipsis;
 902:         }
 903: 
 904:         $startPos = $pos - $radius;
 905:         if ($startPos <= 0) {
 906:             $startPos = 0;
 907:             $prepend = '';
 908:         }
 909: 
 910:         $endPos = $pos + $phraseLen + $radius;
 911:         if ($endPos >= $textLen) {
 912:             $endPos = $textLen;
 913:             $append = '';
 914:         }
 915: 
 916:         $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
 917:         $excerpt = $prepend . $excerpt . $append;
 918: 
 919:         return $excerpt;
 920:     }
 921: 
 922:     /**
 923:      * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
 924:      *
 925:      * @param array $list The list to be joined.
 926:      * @param string|null $and The word used to join the last and second last items together with. Defaults to 'and'.
 927:      * @param string $separator The separator used to join all the other items together. Defaults to ', '.
 928:      * @return string The glued together string.
 929:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#converting-an-array-to-sentence-form
 930:      */
 931:     public static function toList(array $list, $and = null, $separator = ', ')
 932:     {
 933:         if ($and === null) {
 934:             $and = __d('cake', 'and');
 935:         }
 936:         if (count($list) > 1) {
 937:             return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
 938:         }
 939: 
 940:         return array_pop($list);
 941:     }
 942: 
 943:     /**
 944:      * Check if the string contain multibyte characters
 945:      *
 946:      * @param string $string value to test
 947:      * @return bool
 948:      */
 949:     public static function isMultibyte($string)
 950:     {
 951:         $length = strlen($string);
 952: 
 953:         for ($i = 0; $i < $length; $i++) {
 954:             $value = ord($string[$i]);
 955:             if ($value > 128) {
 956:                 return true;
 957:             }
 958:         }
 959: 
 960:         return false;
 961:     }
 962: 
 963:     /**
 964:      * Converts a multibyte character string
 965:      * to the decimal value of the character
 966:      *
 967:      * @param string $string String to convert.
 968:      * @return array
 969:      */
 970:     public static function utf8($string)
 971:     {
 972:         $map = [];
 973: 
 974:         $values = [];
 975:         $find = 1;
 976:         $length = strlen($string);
 977: 
 978:         for ($i = 0; $i < $length; $i++) {
 979:             $value = ord($string[$i]);
 980: 
 981:             if ($value < 128) {
 982:                 $map[] = $value;
 983:             } else {
 984:                 if (empty($values)) {
 985:                     $find = ($value < 224) ? 2 : 3;
 986:                 }
 987:                 $values[] = $value;
 988: 
 989:                 if (count($values) === $find) {
 990:                     if ($find == 3) {
 991:                         $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
 992:                     } else {
 993:                         $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
 994:                     }
 995:                     $values = [];
 996:                     $find = 1;
 997:                 }
 998:             }
 999:         }
1000: 
1001:         return $map;
1002:     }
1003: 
1004:     /**
1005:      * Converts the decimal value of a multibyte character string
1006:      * to a string
1007:      *
1008:      * @param array $array Array
1009:      * @return string
1010:      */
1011:     public static function ascii(array $array)
1012:     {
1013:         $ascii = '';
1014: 
1015:         foreach ($array as $utf8) {
1016:             if ($utf8 < 128) {
1017:                 $ascii .= chr($utf8);
1018:             } elseif ($utf8 < 2048) {
1019:                 $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
1020:                 $ascii .= chr(128 + ($utf8 % 64));
1021:             } else {
1022:                 $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
1023:                 $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
1024:                 $ascii .= chr(128 + ($utf8 % 64));
1025:             }
1026:         }
1027: 
1028:         return $ascii;
1029:     }
1030: 
1031:     /**
1032:      * Converts filesize from human readable string to bytes
1033:      *
1034:      * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
1035:      * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
1036:      * @return mixed Number of bytes as integer on success, `$default` on failure if not false
1037:      * @throws \InvalidArgumentException On invalid Unit type.
1038:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#Cake\Utility\Text::parseFileSize
1039:      */
1040:     public static function parseFileSize($size, $default = false)
1041:     {
1042:         if (ctype_digit($size)) {
1043:             return (int)$size;
1044:         }
1045:         $size = strtoupper($size);
1046: 
1047:         $l = -2;
1048:         $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB']);
1049:         if ($i === false) {
1050:             $l = -1;
1051:             $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P']);
1052:         }
1053:         if ($i !== false) {
1054:             $size = (float)substr($size, 0, $l);
1055: 
1056:             return $size * pow(1024, $i + 1);
1057:         }
1058: 
1059:         if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
1060:             $size = substr($size, 0, -1);
1061: 
1062:             return (int)$size;
1063:         }
1064: 
1065:         if ($default !== false) {
1066:             return $default;
1067:         }
1068:         throw new InvalidArgumentException('No unit type.');
1069:     }
1070: 
1071:     /**
1072:      * Get the default transliterator.
1073:      *
1074:      * @return \Transliterator|null Either a Transliterator instance, or `null`
1075:      *   in case no transliterator has been set yet.
1076:      * @since 3.7.0
1077:      */
1078:     public static function getTransliterator()
1079:     {
1080:         return static::$_defaultTransliterator;
1081:     }
1082: 
1083:     /**
1084:      * Set the default transliterator.
1085:      *
1086:      * @param \Transliterator $transliterator A `Transliterator` instance.
1087:      * @return void
1088:      * @since 3.7.0
1089:      */
1090:     public static function setTransliterator(\Transliterator $transliterator)
1091:     {
1092:         static::$_defaultTransliterator = $transliterator;
1093:     }
1094: 
1095:     /**
1096:      * Get default transliterator identifier string.
1097:      *
1098:      * @return string Transliterator identifier.
1099:      */
1100:     public static function getTransliteratorId()
1101:     {
1102:         return static::$_defaultTransliteratorId;
1103:     }
1104: 
1105:     /**
1106:      * Set default transliterator identifier string.
1107:      *
1108:      * @param string $transliteratorId Transliterator identifier.
1109:      * @return void
1110:      */
1111:     public static function setTransliteratorId($transliteratorId)
1112:     {
1113:         static::setTransliterator(transliterator_create($transliteratorId));
1114:         static::$_defaultTransliteratorId = $transliteratorId;
1115:     }
1116: 
1117:     /**
1118:      * Transliterate string.
1119:      *
1120:      * @param string $string String to transliterate.
1121:      * @param \Transliterator|string|null $transliterator Either a Transliterator
1122:      *   instance, or a transliterator identifier string. If `null`, the default
1123:      *   transliterator (identifier) set via `setTransliteratorId()` or
1124:      *   `setTransliterator()` will be used.
1125:      * @return string
1126:      * @see https://secure.php.net/manual/en/transliterator.transliterate.php
1127:      */
1128:     public static function transliterate($string, $transliterator = null)
1129:     {
1130:         if (!$transliterator) {
1131:             $transliterator = static::$_defaultTransliterator ?: static::$_defaultTransliteratorId;
1132:         }
1133: 
1134:         return transliterator_transliterate($transliterator, $string);
1135:     }
1136: 
1137:     /**
1138:      * Returns a string with all spaces converted to dashes (by default),
1139:      * characters transliterated to ASCII characters, and non word characters removed.
1140:      *
1141:      * ### Options:
1142:      *
1143:      * - `replacement`: Replacement string. Default '-'.
1144:      * - `transliteratorId`: A valid transliterator id string.
1145:      *   If `null` (default) the transliterator (identifier) set via
1146:      *   `setTransliteratorId()` or `setTransliterator()` will be used.
1147:      *   If `false` no transliteration will be done, only non words will be removed.
1148:      * - `preserve`: Specific non-word character to preserve. Default `null`.
1149:      *   For e.g. this option can be set to '.' to generate clean file names.
1150:      *
1151:      * @param string $string the string you want to slug
1152:      * @param array $options If string it will be use as replacement character
1153:      *   or an array of options.
1154:      * @return string
1155:      * @see setTransliterator()
1156:      * @see setTransliteratorId()
1157:      */
1158:     public static function slug($string, $options = [])
1159:     {
1160:         if (is_string($options)) {
1161:             $options = ['replacement' => $options];
1162:         }
1163:         $options += [
1164:             'replacement' => '-',
1165:             'transliteratorId' => null,
1166:             'preserve' => null
1167:         ];
1168: 
1169:         if ($options['transliteratorId'] !== false) {
1170:             $string = static::transliterate($string, $options['transliteratorId']);
1171:         }
1172: 
1173:         $regex = '^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}';
1174:         if ($options['preserve']) {
1175:             $regex .= preg_quote($options['preserve'], '/');
1176:         }
1177:         $quotedReplacement = preg_quote($options['replacement'], '/');
1178:         $map = [
1179:             '/[' . $regex . ']/mu' => ' ',
1180:             '/[\s]+/mu' => $options['replacement'],
1181:             sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => '',
1182:         ];
1183:         $string = preg_replace(array_keys($map), $map, $string);
1184: 
1185:         return $string;
1186:     }
1187: }
1188:
C CakePHP 3.7 Red Velvet API

Version:

Namespaces

Classes

Traits