2 namespace TYPO3\CMS\Core\Charset;
127 'us-ascii' =>
'ascii',
128 'cp819' =>
'iso-8859-1',
129 'ibm819' =>
'iso-8859-1',
130 'iso-ir-100' =>
'iso-8859-1',
131 'iso-ir-101' =>
'iso-8859-2',
132 'iso-ir-109' =>
'iso-8859-3',
133 'iso-ir-110' =>
'iso-8859-4',
134 'iso-ir-144' =>
'iso-8859-5',
135 'iso-ir-127' =>
'iso-8859-6',
136 'iso-ir-126' =>
'iso-8859-7',
137 'iso-ir-138' =>
'iso-8859-8',
138 'iso-ir-148' =>
'iso-8859-9',
139 'iso-ir-157' =>
'iso-8859-10',
140 'iso-ir-179' =>
'iso-8859-13',
141 'iso-ir-199' =>
'iso-8859-14',
142 'iso-ir-203' =>
'iso-8859-15',
143 'csisolatin1' =>
'iso-8859-1',
144 'csisolatin2' =>
'iso-8859-2',
145 'csisolatin3' =>
'iso-8859-3',
146 'csisolatin5' =>
'iso-8859-9',
147 'csisolatin8' =>
'iso-8859-14',
148 'csisolatin9' =>
'iso-8859-15',
149 'csisolatingreek' =>
'iso-8859-7',
150 'iso-celtic' =>
'iso-8859-14',
151 'latin1' =>
'iso-8859-1',
152 'latin2' =>
'iso-8859-2',
153 'latin3' =>
'iso-8859-3',
154 'latin5' =>
'iso-8859-9',
155 'latin6' =>
'iso-8859-10',
156 'latin8' =>
'iso-8859-14',
157 'latin9' =>
'iso-8859-15',
158 'l1' =>
'iso-8859-1',
159 'l2' =>
'iso-8859-2',
160 'l3' =>
'iso-8859-3',
161 'l5' =>
'iso-8859-9',
162 'l6' =>
'iso-8859-10',
163 'l8' =>
'iso-8859-14',
164 'l9' =>
'iso-8859-15',
165 'cyrillic' =>
'iso-8859-5',
166 'arabic' =>
'iso-8859-6',
167 'tis-620' =>
'iso-8859-11',
168 'win874' =>
'windows-874',
169 'win1250' =>
'windows-1250',
170 'win1251' =>
'windows-1251',
171 'win1252' =>
'windows-1252',
172 'win1253' =>
'windows-1253',
173 'win1254' =>
'windows-1254',
174 'win1255' =>
'windows-1255',
175 'win1256' =>
'windows-1256',
176 'win1257' =>
'windows-1257',
177 'win1258' =>
'windows-1258',
178 'cp1250' =>
'windows-1250',
179 'cp1251' =>
'windows-1251',
180 'cp1252' =>
'windows-1252',
181 'ms-ee' =>
'windows-1250',
182 'ms-ansi' =>
'windows-1252',
183 'ms-greek' =>
'windows-1253',
184 'ms-turk' =>
'windows-1254',
185 'winbaltrim' =>
'windows-1257',
186 'koi-8ru' =>
'koi-8r',
190 'macintosh' =>
'macroman',
191 'euc-cn' =>
'gb2312',
192 'x-euc-cn' =>
'gb2312',
198 'sjis' =>
'shift_jis',
199 'shift-jis' =>
'shift_jis',
200 'cp932' =>
'shift_jis',
217 'af' =>
'west_european',
220 'bs' =>
'east_european',
221 'cs' =>
'east_european',
222 'da' =>
'west_european',
223 'de' =>
'west_european',
224 'es' =>
'west_european',
227 'eu' =>
'west_european',
229 'fi' =>
'west_european',
230 'fo' =>
'west_european',
231 'fr' =>
'west_european',
232 'ga' =>
'west_european',
233 'gl' =>
'west_european',
237 'hr' =>
'east_european',
238 'hu' =>
'east_european',
240 'is' =>
'west_european',
241 'it' =>
'west_european',
244 'kl' =>
'west_european',
247 'lt' =>
'lithuanian',
248 'lv' =>
'west_european',
249 'nl' =>
'west_european',
250 'no' =>
'west_european',
251 'nb' =>
'west_european',
252 'nn' =>
'west_european',
253 'pl' =>
'east_european',
254 'pt' =>
'west_european',
255 'ro' =>
'east_european',
257 'sk' =>
'east_european',
258 'sl' =>
'east_european',
260 'sv' =>
'west_european',
264 'vi' =>
'vietnamese',
269 'afk' =>
'west_european',
272 'cat' =>
'west_european',
273 'chs' =>
'simpl_chinese',
274 'cht' =>
'trad_chinese',
275 'csy' =>
'east_european',
276 'dan' =>
'west_european',
277 'deu' =>
'west_european',
278 'dea' =>
'west_european',
279 'des' =>
'west_european',
280 'ena' =>
'west_european',
281 'enc' =>
'west_european',
282 'eng' =>
'west_european',
283 'enz' =>
'west_european',
284 'enu' =>
'west_european',
285 'euq' =>
'west_european',
286 'fos' =>
'west_european',
288 'fin' =>
'west_european',
289 'fra' =>
'west_european',
290 'frb' =>
'west_european',
291 'frc' =>
'west_european',
292 'frs' =>
'west_european',
294 'glg' =>
'west_european',
298 'hun' =>
'east_european',
299 'isl' =>
'west_european',
300 'ita' =>
'west_european',
301 'its' =>
'west_european',
305 'lth' =>
'lithuanian',
306 'lvi' =>
'west_european',
307 'msl' =>
'west_european',
308 'nlb' =>
'west_european',
309 'nld' =>
'west_european',
310 'nor' =>
'west_european',
311 'non' =>
'west_european',
312 'plk' =>
'east_european',
313 'ptg' =>
'west_european',
314 'ptb' =>
'west_european',
315 'rom' =>
'east_european',
317 'slv' =>
'east_european',
318 'sky' =>
'east_european',
319 'srl' =>
'east_european',
321 'esp' =>
'west_european',
322 'esm' =>
'west_european',
323 'esn' =>
'west_european',
324 'sve' =>
'west_european',
331 'afrikaans' =>
'west_european',
332 'albanian' =>
'albanian',
333 'arabic' =>
'arabic',
334 'basque' =>
'west_european',
335 'bosnian' =>
'east_european',
336 'bulgarian' =>
'east_european',
337 'catalan' =>
'west_european',
338 'croatian' =>
'east_european',
339 'czech' =>
'east_european',
340 'danish' =>
'west_european',
341 'dutch' =>
'west_european',
342 'english' =>
'west_european',
343 'esperanto' =>
'unicode',
344 'estonian' =>
'estonian',
345 'faroese' =>
'west_european',
347 'finnish' =>
'west_european',
348 'french' =>
'west_european',
349 'galician' =>
'west_european',
350 'georgian' =>
'unicode',
351 'german' =>
'west_european',
353 'greenlandic' =>
'west_european',
354 'hebrew' =>
'hebrew',
355 'hindi' =>
'unicode',
356 'hungarian' =>
'east_european',
357 'icelandic' =>
'west_european',
358 'italian' =>
'west_european',
359 'khmer' =>
'unicode',
360 'latvian' =>
'west_european',
361 'lettish' =>
'west_european',
362 'lithuanian' =>
'lithuanian',
363 'malay' =>
'west_european',
364 'norwegian' =>
'west_european',
365 'persian' =>
'arabic',
366 'polish' =>
'east_european',
367 'portuguese' =>
'west_european',
368 'russian' =>
'cyrillic',
369 'romanian' =>
'east_european',
370 'serbian' =>
'cyrillic',
371 'slovak' =>
'east_european',
372 'slovenian' =>
'east_european',
373 'spanish' =>
'west_european',
374 'svedish' =>
'west_european',
376 'turkish' =>
'turkish',
377 'ukrainian' =>
'cyrillic'
386 'west_european' =>
'iso-8859-1',
387 'estonian' =>
'iso-8859-1',
388 'east_european' =>
'iso-8859-2',
389 'baltic' =>
'iso-8859-4',
390 'cyrillic' =>
'iso-8859-5',
391 'arabic' =>
'iso-8859-6',
392 'greek' =>
'iso-8859-7',
393 'hebrew' =>
'iso-8859-8',
394 'turkish' =>
'iso-8859-9',
395 'thai' =>
'iso-8859-11',
396 'lithuanian' =>
'iso-8859-13',
397 'chinese' =>
'gb2312',
398 'japanese' =>
'euc-jp',
399 'korean' =>
'euc-kr',
400 'simpl_chinese' =>
'gb2312',
401 'trad_chinese' =>
'big5',
403 'unicode' =>
'utf-8',
404 'albanian' =>
'utf-8'
413 'east_european' =>
'windows-1250',
414 'cyrillic' =>
'windows-1251',
415 'west_european' =>
'windows-1252',
416 'greek' =>
'windows-1253',
417 'turkish' =>
'windows-1254',
418 'hebrew' =>
'windows-1255',
419 'arabic' =>
'windows-1256',
420 'baltic' =>
'windows-1257',
421 'estonian' =>
'windows-1257',
422 'lithuanian' =>
'windows-1257',
423 'vietnamese' =>
'windows-1258',
426 'chinese' =>
'gb2312',
427 'japanese' =>
'shift_jis',
428 'simpl_chinese' =>
'gb2312',
429 'trad_chinese' =>
'big5',
430 'albanian' =>
'windows-1250',
440 'japanese.euc' =>
'euc-jp',
441 'ja_jp.ujis' =>
'euc-jp',
442 'korean.euc' =>
'euc-kr',
443 'sr@Latn' =>
'iso-8859-2',
457 'ar' =>
'iso-8859-6',
458 'ba' =>
'iso-8859-2',
459 'bg' =>
'windows-1251',
461 'ca' =>
'iso-8859-15',
463 'cs' =>
'windows-1250',
464 'cz' =>
'windows-1250',
468 'el' =>
'iso-8859-7',
471 'et' =>
'iso-8859-4',
481 'gr' =>
'iso-8859-7',
485 'hr' =>
'windows-1250',
486 'hu' =>
'iso-8859-2',
496 'lt' =>
'windows-1257',
502 'pl' =>
'iso-8859-2',
506 'ro' =>
'iso-8859-2',
507 'ru' =>
'windows-1251',
509 'si' =>
'windows-1250',
510 'sk' =>
'windows-1250',
511 'sl' =>
'windows-1250',
515 'th' =>
'iso-8859-11',
516 'tr' =>
'iso-8859-9',
517 'ua' =>
'windows-1251',
518 'uk' =>
'windows-1251',
540 $charset = trim(strtolower($charset));
541 if (isset($this->synonyms[$charset])) {
542 $charset = $this->synonyms[$charset];
560 $locale = strtolower($locale);
562 if (isset($this->locale_to_charset[$locale])) {
563 return $this->locale_to_charset[$locale];
566 list($locale, $modifier) = explode(
'@', $locale);
568 list($locale, $charset) = explode(
'.', $locale);
573 if ($modifier ===
'euro') {
574 return 'iso-8859-15';
577 list($language, ) = explode(
'_', $locale);
578 if (isset($this->lang_to_script[$language])) {
579 $script = $this->lang_to_script[$language];
581 if (TYPO3_OS ===
'WIN') {
582 $cs = $this->script_to_charset_windows[$script] ?:
'windows-1252';
584 $cs = $this->script_to_charset_unix[$script] ?:
'utf-8';
604 public function conv($inputString, $fromCharset, $toCharset, $useEntityForNoChar =
false)
606 if ($fromCharset === $toCharset) {
610 if ($toCharset ===
'utf-8' || !$useEntityForNoChar) {
611 switch (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_convMethod']) {
613 $convertedString = mb_convert_encoding($inputString, $toCharset, $fromCharset);
614 if (
false !== $convertedString) {
615 return $convertedString;
620 $convertedString = iconv($fromCharset, $toCharset .
'//TRANSLIT', $inputString);
621 if (
false !== $convertedString) {
622 return $convertedString;
626 $convertedString = recode_string($fromCharset .
'..' . $toCharset, $inputString);
627 if (
false !== $convertedString) {
628 return $convertedString;
633 if ($fromCharset !==
'utf-8') {
634 $inputString = $this->
utf8_encode($inputString, $fromCharset);
636 if ($toCharset !==
'utf-8') {
637 $inputString = $this->
utf8_decode($inputString, $toCharset, $useEntityForNoChar);
653 public function convArray(&$array, $fromCharset, $toCharset, $useEntityForNoChar =
false)
655 foreach ($array as $key => $value) {
656 if (is_array($array[$key])) {
657 $this->
convArray($array[$key], $fromCharset, $toCharset, $useEntityForNoChar);
658 }
elseif (is_string($array[$key])) {
659 $array[$key] = $this->
conv($array[$key], $fromCharset, $toCharset, $useEntityForNoChar);
673 if ($charset ===
'utf-8') {
682 for ($a = 0; $a < $strLen; $a++) {
683 $chr =
substr($str, $a, 1);
686 if (isset($this->twoByteSets[$charset])) {
687 $ord2 = ord($str[$a + 1]);
689 $ord = $ord << 8 | $ord2;
691 if (isset($this->parsedCharsets[$charset][
'local'][$ord])) {
692 $outStr .= $this->parsedCharsets[$charset][
'local'][$ord];
694 $outStr .= chr($this->noCharByteVal);
701 if (isset($this->eucBasedSets[$charset])) {
703 if ($charset !==
'shift_jis' || ($ord < 160 || $ord > 223)) {
705 $ord2 = ord(
substr($str, $a, 1));
706 $ord = $ord * 256 + $ord2;
709 if (isset($this->parsedCharsets[$charset][
'local'][$ord])) {
711 $outStr .= $this->parsedCharsets[$charset][
'local'][$ord];
713 $outStr .= chr($this->noCharByteVal);
731 public function utf8_decode($str, $charset, $useEntityForNoChar =
false)
733 if ($charset ===
'utf-8') {
742 for ($a = 0, $i = 0; $a < $strLen; $a++, $i++) {
743 $chr =
substr($str, $a, 1);
752 for ($b = 0; $b < 8; $b++) {
759 $buf .=
substr($str, $a, 1);
765 if (isset($this->parsedCharsets[$charset][
'utf8'][$buf])) {
767 $mByte = $this->parsedCharsets[$charset][
'utf8'][$buf];
770 $outStr .= chr(($mByte >> 8 & 255)) . chr(($mByte & 255));
772 $outStr .= chr($mByte);
774 }
elseif ($useEntityForNoChar) {
778 $outStr .= chr($this->noCharByteVal);
781 $outStr .= chr($this->noCharByteVal);
802 for ($a = 0; $a < $strLen; $a++) {
803 $chr =
substr($str, $a, 1);
812 for ($b = 0; $b < 8; $b++) {
819 $buf .=
substr($str, $a, 1);
826 $outStr .= chr($this->noCharByteVal);
844 if ($alsoStdHtmlEnt) {
845 $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, ENT_COMPAT,
'UTF-8'));
847 $token = md5(microtime());
848 $parts = explode($token, preg_replace(
'/(&([#[:alnum:]]*);)/', $token .
'${2}' . $token, $str));
849 foreach ($parts as $k => $v) {
856 if (
substr($v, $position, 1) ===
'#') {
858 if (
substr($v, $position, 1) ===
'x') {
859 $v = hexdec(
substr($v, ++$position));
861 $v =
substr($v, $position);
864 }
elseif ($alsoStdHtmlEnt && isset($trans_tbl[
'&' . $v .
';'])) {
866 $v = $trans_tbl[
'&' . $v .
';'];
870 $parts[$k] =
'&' . $v .
';';
873 return implode(
'', $parts);
894 for ($a = 0; $a < $strLen; $a++) {
895 $chr =
substr($str, $a, 1);
904 for ($b = 0; $b < 8; $b++) {
911 $buf .=
substr($str, $a, 1);
921 $outArr[] = $retChar ? chr($ord) : $ord;
950 if ($unicodeInteger < 128) {
951 $str .= chr($unicodeInteger);
952 }
elseif ($unicodeInteger < 2048) {
953 $str .= chr(192 | $unicodeInteger >> 6);
954 $str .= chr(128 | $unicodeInteger & 63);
955 }
elseif ($unicodeInteger < 65536) {
956 $str .= chr(224 | $unicodeInteger >> 12);
957 $str .= chr(128 | $unicodeInteger >> 6 & 63);
958 $str .= chr(128 | $unicodeInteger & 63);
959 }
elseif ($unicodeInteger < 2097152) {
960 $str .= chr(240 | $unicodeInteger >> 18);
961 $str .= chr(128 | $unicodeInteger >> 12 & 63);
962 $str .= chr(128 | $unicodeInteger >> 6 & 63);
963 $str .= chr(128 | $unicodeInteger & 63);
964 }
elseif ($unicodeInteger < 67108864) {
965 $str .= chr(248 | $unicodeInteger >> 24);
966 $str .= chr(128 | $unicodeInteger >> 18 & 63);
967 $str .= chr(128 | $unicodeInteger >> 12 & 63);
968 $str .= chr(128 | $unicodeInteger >> 6 & 63);
969 $str .= chr(128 | $unicodeInteger & 63);
970 }
elseif ($unicodeInteger < 2147483648) {
971 $str .= chr(252 | $unicodeInteger >> 30);
972 $str .= chr(128 | $unicodeInteger >> 24 & 63);
973 $str .= chr(128 | $unicodeInteger >> 18 & 63);
974 $str .= chr(128 | $unicodeInteger >> 12 & 63);
975 $str .= chr(128 | $unicodeInteger >> 6 & 63);
976 $str .= chr(128 | $unicodeInteger & 63);
979 $str .= chr($this->noCharByteVal);
998 if (($ord & 192) === 192) {
1001 for ($b = 0; $b < 8; $b++) {
1006 $binBuf .=
substr(
'00000000' . decbin(ord(
substr($str, ($b + 1), 1))), -6);
1011 $binBuf =
substr((
'00000000' . decbin(ord($str[0]))), -(6 - $b)) . $binBuf;
1012 $int = bindec($binBuf);
1016 return $hex ?
'x' . dechex($int) : $int;
1037 if (!is_array($this->parsedCharsets[$charset])) {
1045 if ($cacheFile && @is_file($cacheFile)) {
1051 $this->parsedCharsets[$charset] = array(
'local' => array(),
'utf8' => array());
1054 foreach ($lines as $value) {
1056 if (trim($value) && $value[0] !==
'#') {
1059 if (!$detectedType) {
1060 $detectedType = preg_match(
'/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $value) ?
'whitespaced' :
'ms-token';
1062 if ($detectedType ===
'ms-token') {
1063 list($hexbyte, $utf8) = preg_split(
'/[=:]/', $value, 3);
1064 }
elseif ($detectedType ===
'whitespaced') {
1066 preg_match(
'/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $value, $regA);
1067 $hexbyte = $regA[1];
1068 $utf8 =
'U+' . $regA[2];
1070 $decval = hexdec(trim($hexbyte));
1071 if ($decval > 127) {
1072 $utf8decval = hexdec(
substr(trim($utf8), 2));
1073 $this->parsedCharsets[$charset][
'local'][$decval] = $this->
UnumberToChar($utf8decval);
1074 $this->parsedCharsets[$charset][
'utf8'][$this->parsedCharsets[$charset][
'local'][$decval]] = $decval;
1108 if (is_array($this->caseFolding[
'utf-8'])) {
1112 if ($cacheFileCase && @is_file($cacheFileCase)) {
1118 if (is_array($this->toASCII[
'utf-8'])) {
1122 if ($cacheFileASCII && @is_file($cacheFileASCII)) {
1133 $fh = fopen($unicodeDataFile,
'rb');
1139 $this->caseFolding[
'utf-8'] = array();
1140 $utf8CaseFolding = &$this->caseFolding[
'utf-8'];
1142 $utf8CaseFolding[
'toUpper'] = array();
1143 $utf8CaseFolding[
'toLower'] = array();
1144 $utf8CaseFolding[
'toTitle'] = array();
1146 $decomposition = array();
1153 while (!feof($fh)) {
1154 $line = fgets($fh, 4096);
1156 list($char, $name, $cat, , , $decomp, , , $num, , , , $upper, $lower, $title, ) = explode(
';', rtrim($line));
1157 $ord = hexdec($char);
1164 $utf8CaseFolding[
'toUpper'][$utf8_char] = $this->
UnumberToChar(hexdec($upper));
1167 $utf8CaseFolding[
'toLower'][$utf8_char] = $this->
UnumberToChar(hexdec($lower));
1170 if ($title && $title !== $upper) {
1171 $utf8CaseFolding[
'toTitle'][$utf8_char] = $this->
UnumberToChar(hexdec($title));
1176 $mark[
'U+' . $char] = 1;
1180 if ($ord > 128 && $num !==
'') {
1181 $number[
'U+' . $char] = $num;
1186 if (preg_match(
'/^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH/', $name, $match) && !$decomp) {
1187 $c = ord($match[2]);
1188 if ($match[1] ===
'SMALL') {
1191 $decomposition[
'U+' . $char] = array(dechex($c));
1195 if (preg_match(
'/(<.*>)? *(.+)/', $decomp, $match)) {
1196 switch ($match[1]) {
1199 $match[2] =
'0028 ' . $match[2] .
' 0029';
1203 $match[2] =
'005B ' . $match[2] .
' 005D';
1207 if (preg_match(
'/^0020 /', $match[2])) {
1218 $decomposition[
'U+' . $char] = explode(
' ', $match[2]);
1225 $fh = fopen($specialCasingFile,
'rb');
1227 while (!feof($fh)) {
1228 $line = fgets($fh, 4096);
1229 if ($line[0] !==
'#' && trim($line) !==
'') {
1231 if ($cond ===
'' || $cond[0] ===
'#') {
1233 if ($char !== $lower) {
1234 $arr = explode(
' ', $lower);
1235 for ($i = 0; isset($arr[$i]); $i++) {
1238 $utf8CaseFolding[
'toLower'][$utf8_char] = implode(
'', $arr);
1240 if ($char !== $title && $title !== $upper) {
1241 $arr = explode(
' ', $title);
1242 for ($i = 0; isset($arr[$i]); $i++) {
1245 $utf8CaseFolding[
'toTitle'][$utf8_char] = implode(
'', $arr);
1247 if ($char !== $upper) {
1248 $arr = explode(
' ', $upper);
1249 for ($i = 0; isset($arr[$i]); $i++) {
1252 $utf8CaseFolding[
'toUpper'][$utf8_char] = implode(
'', $arr);
1263 $fh = fopen($customTranslitFile,
'rb');
1265 while (!feof($fh)) {
1266 $line = fgets($fh, 4096);
1267 if ($line[0] !==
'#' && trim($line) !==
'') {
1270 $omit[
'U+' . $char] = 1;
1272 $decomposition[
'U+' . $char] = explode(
' ', $translit);
1279 foreach ($decomposition as $from => $to) {
1280 $code_decomp = array();
1281 while ($code_value = array_shift($to)) {
1283 if (isset($decomposition[
'U+' . $code_value])) {
1284 foreach (array_reverse($decomposition[
'U+' . $code_value]) as $cv) {
1285 array_unshift($to, $cv);
1287 }
elseif (!isset($mark[(
'U+' . $code_value)])) {
1289 array_push($code_decomp, $code_value);
1292 if (!empty($code_decomp) || isset($omit[$from])) {
1293 $decomposition[$from] = $code_decomp;
1295 unset($decomposition[$from]);
1299 $this->toASCII[
'utf-8'] = array();
1300 $ascii = &$this->toASCII[
'utf-8'];
1301 foreach ($decomposition as $from => $to) {
1302 $code_decomp = array();
1303 while ($code_value = array_shift($to)) {
1304 $ord = hexdec($code_value);
1309 array_push($code_decomp, chr($ord));
1312 $ascii[$this->
UnumberToChar(hexdec($from))] = join(
'', $code_decomp);
1315 foreach ($number as $from => $to) {
1317 if (!isset($ascii[$utf8_char])) {
1318 $ascii[$utf8_char] = $to;
1321 if ($cacheFileCase) {
1324 if ($cacheFileASCII) {
1341 if (is_array($this->caseFolding[$charset])) {
1346 if ($cacheFile && @is_file($cacheFile)) {
1358 $nochar = chr($this->noCharByteVal);
1359 foreach ($this->parsedCharsets[$charset][
'local'] as $ci => $utf8) {
1362 $cc = $this->
utf8_decode($this->caseFolding[
'utf-8'][
'toUpper'][$utf8], $charset);
1363 if ($cc !==
'' && $cc !== $nochar) {
1364 $this->caseFolding[$charset][
'toUpper'][$c] = $cc;
1366 $cc = $this->
utf8_decode($this->caseFolding[
'utf-8'][
'toLower'][$utf8], $charset);
1367 if ($cc !==
'' && $cc !== $nochar) {
1368 $this->caseFolding[$charset][
'toLower'][$c] = $cc;
1370 $cc = $this->
utf8_decode($this->caseFolding[
'utf-8'][
'toTitle'][$utf8], $charset);
1371 if ($cc !==
'' && $cc !== $nochar) {
1372 $this->caseFolding[$charset][
'toTitle'][$c] = $cc;
1378 for ($i = $start; $i <= $end; $i++) {
1379 $this->caseFolding[$charset][
'toUpper'][chr($i)] = chr($i - 32);
1383 for ($i = $start; $i <= $end; $i++) {
1384 $this->caseFolding[$charset][
'toLower'][chr($i)] = chr($i + 32);
1403 if (is_array($this->toASCII[$charset])) {
1408 if ($cacheFile && @is_file($cacheFile)) {
1420 foreach ($this->parsedCharsets[$charset][
'local'] as $ci => $utf8) {
1423 if (isset($this->toASCII[
'utf-8'][$utf8])) {
1424 $this->toASCII[$charset][$c] = $this->toASCII[
'utf-8'][$utf8];
1449 public function substr($charset, $string, $start, $len = null)
1451 if ($len === 0 || $string ===
'') {
1454 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'mbstring') {
1456 if ($len === null) {
1458 $enc = mb_internal_encoding();
1459 mb_internal_encoding($charset);
1460 $str = mb_substr($string, $start);
1462 mb_internal_encoding($enc);
1465 return mb_substr($string, $start, $len, $charset);
1467 }
elseif (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'iconv') {
1469 if ($len === null) {
1471 $enc = iconv_get_encoding(
'internal_encoding');
1472 iconv_set_encoding(
'internal_encoding', $charset);
1473 $str = iconv_substr($string, $start);
1475 iconv_set_encoding(
'internal_encoding', $enc);
1478 return iconv_substr($string, $start, $len, $charset);
1480 }
elseif ($charset ===
'utf-8') {
1482 }
elseif ($this->eucBasedSets[$charset]) {
1483 return $this->
euc_substr($string, $start, $charset, $len);
1484 }
elseif ($this->twoByteSets[$charset]) {
1485 return substr($string, $start * 2, $len * 2);
1486 }
elseif ($this->fourByteSets[$charset]) {
1487 return substr($string, $start * 4, $len * 4);
1490 return $len === null ?
substr($string, $start) :
substr($string, $start, $len);
1504 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'mbstring') {
1505 return mb_strlen($string, $charset);
1506 }
elseif (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'iconv') {
1507 return iconv_strlen($string, $charset);
1508 }
elseif ($charset ===
'utf-8') {
1510 }
elseif ($this->eucBasedSets[$charset]) {
1512 }
elseif ($this->twoByteSets[$charset]) {
1513 return strlen($string) / 2;
1514 }
elseif ($this->fourByteSets[$charset]) {
1515 return strlen($string) / 4;
1533 if ((
int)$len === 0 || mb_strlen($string, $charset) <= abs($len)) {
1537 $string = mb_substr($string, 0, $len, $charset) . $crop;
1539 $string = $crop . mb_substr($string, $len, mb_strlen($string, $charset), $charset);
1555 public function crop($charset, $string, $len, $crop =
'')
1557 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'mbstring') {
1558 return $this->
cropMbstring($charset, $string, $len, $crop);
1560 if ((
int)$len === 0) {
1563 if ($charset ===
'utf-8') {
1565 }
elseif ($this->eucBasedSets[$charset]) {
1571 $i =
strlen($string) + $len;
1582 if (isset($string[$i])) {
1583 return substr($string, 0, $i) . $crop;
1586 if (isset($string[$i - 1])) {
1587 return $crop .
substr($string, $i);
1608 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'mbstring') {
1609 return mb_strcut($string, 0, $len, $charset);
1610 }
elseif ($charset ===
'utf-8') {
1612 }
elseif ($this->eucBasedSets[$charset]) {
1614 }
elseif ($this->twoByteSets[$charset]) {
1618 }
elseif ($this->fourByteSets[$charset]) {
1624 return substr($string, 0, $len);
1643 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'mbstring') {
1644 if ($case ===
'toLower') {
1645 $string = mb_strtolower($string, $charset);
1647 $string = mb_strtoupper($string, $charset);
1649 }
elseif ($charset ===
'utf-8') {
1651 }
elseif (isset($this->eucBasedSets[$charset])) {
1671 $firstChar = $this->
substr($charset, $string, 0, 1);
1672 $firstChar = $this->
conv_case($charset, $firstChar, $case);
1673 $remainder = $this->
substr($charset, $string, 1);
1674 return $firstChar . $remainder;
1686 if ($charset ===
'utf-8') {
1688 }
elseif (isset($this->eucBasedSets[$charset])) {
1706 $allLanguageCodes = array();
1707 $selectedLanguage =
'default';
1709 foreach ($this->charSetArray as $typo3Lang => $charSet) {
1710 $allLanguageCodes[$typo3Lang] = $typo3Lang;
1715 foreach ($this->locales->getIsoMapping() as $typo3Lang => $isoLang) {
1716 $isoLang = join(
'-', explode(
'_', $isoLang));
1717 $allLanguageCodes[$typo3Lang] = $isoLang;
1720 $allLanguageCodes = array_flip($allLanguageCodes);
1723 $sortedPreferredLanguages = array();
1724 foreach ($preferredLanguages as $preferredLanguage) {
1726 if (strpos($preferredLanguage,
';q=') !==
false) {
1727 list($preferredLanguage, $quality) = explode(
';q=', $preferredLanguage);
1729 $sortedPreferredLanguages[$preferredLanguage] = $quality;
1732 arsort($sortedPreferredLanguages, SORT_NUMERIC);
1733 foreach ($sortedPreferredLanguages as $preferredLanguage => $quality) {
1734 if (isset($allLanguageCodes[$preferredLanguage])) {
1735 $selectedLanguage = $allLanguageCodes[$preferredLanguage];
1739 list($preferredLanguage, ) = explode(
'-', $preferredLanguage);
1740 if (isset($allLanguageCodes[$preferredLanguage])) {
1741 $selectedLanguage = $allLanguageCodes[$preferredLanguage];
1745 if (!$selectedLanguage || $selectedLanguage ===
'en') {
1746 $selectedLanguage =
'default';
1748 return $selectedLanguage;
1773 $map = &$this->caseFolding[$charset][$opt];
1780 $map = &$this->toASCII[$charset];
1786 for ($i = 0; isset($str[$i]); $i++) {
1788 if (isset($map[$c])) {
1814 if ((
string)$len ===
'0') {
1818 if ($byte_start ===
false) {
1824 $str =
substr($str, $byte_start);
1828 if ($byte_end ===
false) {
1829 return $len < 0 ?
'' : $str;
1832 return substr($str, 0, $byte_end);
1850 for ($i = 0; isset($str[$i]); $i++) {
1855 }
elseif (($c & 192) === 192) {
1875 if (ord($str[$i]) & 128) {
1876 for (; $i > 0 && !(ord($str[$i]) & 64); $i--) {
1882 for ($bc = 0, $mbs = ord($str[$i]); $mbs & 128; $mbs = $mbs << 1) {
1886 if ($bc + $i > $len) {
1887 return substr($str, 0, $i);
1890 return substr($str, 0, $len);
1904 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'mbstring') {
1905 return mb_strpos($haystack, $needle, $offset,
'utf-8');
1906 }
elseif (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'iconv') {
1907 return iconv_strpos($haystack, $needle, $offset,
'utf-8');
1910 if ($byte_offset ===
false) {
1914 $byte_pos = strpos($haystack, $needle, $byte_offset);
1915 if ($byte_pos ===
false) {
1932 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'mbstring') {
1933 return mb_strrpos($haystack, $needle,
'utf-8');
1934 }
elseif (
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
't3lib_cs_utils'] ===
'iconv') {
1935 return iconv_strrpos($haystack, $needle,
'utf-8');
1937 $byte_pos = strrpos($haystack, $needle);
1938 if ($byte_pos ===
false) {
1966 for (; isset($str[$i]) && $n < $p; $i += $d) {
1967 $c = (int)ord($str[$i]);
1971 }
elseif (($c & 192) === 192) {
1976 if (!isset($str[$i])) {
1982 while (ord($str[$i]) & 128 && !(ord($str[$i]) & 64)) {
2004 for ($i = $pos; $i > 0; $i--) {
2005 $c = (int)ord($str[$i]);
2009 }
elseif (($c & 192) === 192) {
2014 if (!isset($str[$i])) {
2038 $map = &$this->caseFolding[
'utf-8'][$opt];
2041 $map = &$this->toASCII[
'utf-8'];
2046 for ($i = 0; isset($str[$i]); $i++) {
2051 }
elseif (($c & 192) === 192) {
2053 for ($bc = 0; $c & 128; $c = $c << 1) {
2057 $mbc =
substr($str, $i, $bc);
2060 if (isset($map[$mbc])) {
2091 $shiftJis = $charset ===
'shift_jis';
2092 for ($i = 0; isset($str[$i]) && $i < $len; $i++) {
2095 if ($c >= 128 && $c < 160 || $c >= 224) {
2104 if (!isset($str[$i])) {
2110 return substr($str, 0, $len - 1);
2112 return substr($str, 0, $len);
2128 if ($byte_start ===
false) {
2132 $str =
substr($str, $byte_start);
2136 if ($byte_end ===
false) {
2139 return substr($str, 0, $byte_end);
2156 $sjis = $charset ===
'shift_jis';
2158 for ($i = 0; isset($str[$i]); $i++) {
2161 if ($c >= 128 && $c < 160 || $c >= 224) {
2184 $sjis = $charset ===
'shift_jis';
2196 for (; isset($str[$i]) && $n < $p; $i += $d) {
2199 if ($c >= 128 && $c < 160 || $c >= 224) {
2209 if (!isset($str[$i])) {
2237 $map = &$this->caseFolding[$charset][$opt];
2244 $map = &$this->toASCII[$charset];
2249 $sjis = $charset ===
'shift_jis';
2251 for ($i = 0; isset($str[$i]); $i++) {
2256 if ($c >= 128 && $c < 160 || $c >= 224) {
2257 $mbc =
substr($str, $i, 2);
2263 $mbc =
substr($str, $i, 2);
2267 if (isset($map[$mbc])) {