2 namespace TYPO3\CMS\IndexedSearch;
67 $this->langObject = TYPO3_MODE ===
'FE' ?
$GLOBALS[
'TSFE'] :
$GLOBALS[
'LANG'];
79 $indexerConfig = unserialize(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXT'][
'extConf'][
'indexed_search']);
81 $exe = TYPO3_OS ==
'WIN' ?
'.exe' :
'';
87 if (in_array($extension, $ignoreExtensions)) {
88 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:ignoreExtensions'), $extension), 1);
95 if ($indexerConfig[
'pdftools']) {
96 $pdfPath = rtrim($indexerConfig[
'pdftools'],
'/') .
'/';
97 if (@is_file(($pdfPath .
'pdftotext' . $exe)) && @is_file(($pdfPath .
'pdfinfo' . $exe))) {
98 $this->app[
'pdfinfo'] = $pdfPath .
'pdfinfo' . $exe;
99 $this->app[
'pdftotext'] = $pdfPath .
'pdftotext' . $exe;
104 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:pdfToolsNotFound'), $pdfPath), 3);
107 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:pdfToolsDisabled'), 1);
112 if ($indexerConfig[
'catdoc']) {
113 $catdocPath = rtrim($indexerConfig[
'catdoc'],
'/') .
'/';
114 if (@is_file(($catdocPath .
'catdoc' . $exe))) {
115 $this->app[
'catdoc'] = $catdocPath .
'catdoc' . $exe;
118 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:catdocNotFound'), $catdocPath), 3);
121 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:catdocDisabled'), 1);
128 if ($indexerConfig[
'ppthtml']) {
129 $ppthtmlPath = rtrim($indexerConfig[
'ppthtml'],
'/') .
'/';
130 if (@is_file(($ppthtmlPath .
'ppthtml' . $exe))) {
131 $this->app[
'ppthtml'] = $ppthtmlPath .
'ppthtml' . $exe;
134 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:ppthtmlNotFound'), $ppthtmlPath), 3);
137 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:ppthtmlDisabled'), 1);
143 if ($indexerConfig[
'xlhtml']) {
144 $xlhtmlPath = rtrim($indexerConfig[
'xlhtml'],
'/') .
'/';
145 if (@is_file(($xlhtmlPath .
'xlhtml' . $exe))) {
146 $this->app[
'xlhtml'] = $xlhtmlPath .
'xlhtml' . $exe;
149 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:xlhtmlNotFound'), $xlhtmlPath), 3);
152 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:xlhtmlDisabled'), 1);
162 if ($indexerConfig[
'unzip']) {
163 $unzipPath = rtrim($indexerConfig[
'unzip'],
'/') .
'/';
164 if (@is_file($unzipPath .
'unzip' . $exe)) {
165 $this->app[
'unzip'] = $unzipPath .
'unzip' . $exe;
168 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unzipNotFound'), $unzipPath), 3);
171 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unzipDisabled'), 1);
181 if ($indexerConfig[
'unzip']) {
182 $unzipPath = rtrim($indexerConfig[
'unzip'],
'/') .
'/';
183 if (@is_file(($unzipPath .
'unzip' . $exe))) {
184 $this->app[
'unzip'] = $unzipPath .
'unzip' . $exe;
187 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unzipNotFound'), $unzipPath), 3);
190 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unzipDisabled'), 1);
195 if ($indexerConfig[
'unrtf']) {
196 $unrtfPath = rtrim($indexerConfig[
'unrtf'],
'/') .
'/';
197 if (@is_file(($unrtfPath .
'unrtf' . $exe))) {
198 $this->app[
'unrtf'] = $unrtfPath .
'unrtf' . $exe;
201 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unrtfNotFound'), $unrtfPath), 3);
204 $this->pObj->log_setTSlogMessage($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:unrtfDisabled'), 1);
218 $mainExtension =
'html';
225 $mainExtension =
'jpeg';
231 $this->supportedExtensions[$extension] =
true;
232 $this->ext2itemtype_map[$extension] = $mainExtension ?: $extension;
247 switch ($extension) {
291 $indexerConfig = unserialize(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXT'][
'extConf'][
'indexed_search']);
294 if (in_array($extension, $ignoreExtensions)) {
298 switch ($extension) {
301 if ($indexerConfig[
'pdftools']) {
302 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.PDF'), $extension);
307 if ($indexerConfig[
'catdoc']) {
308 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.DOC'), $extension);
315 if ($indexerConfig[
'ppthtml']) {
316 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.PP'), $extension);
322 if ($indexerConfig[
'xlhtml']) {
323 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.XLS'), $extension);
329 if ($indexerConfig[
'unzip']) {
330 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.DOC'), $extension);
336 if ($indexerConfig[
'unzip']) {
337 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.PP'), $extension);
342 if ($indexerConfig[
'unzip']) {
343 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.XLS'), $extension);
348 if ($indexerConfig[
'unzip']) {
349 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.SXC'), $extension);
354 if ($indexerConfig[
'unzip']) {
355 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.SXI'), $extension);
360 if ($indexerConfig[
'unzip']) {
361 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.SXW'), $extension);
366 if ($indexerConfig[
'unzip']) {
367 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.ODS'), $extension);
372 if ($indexerConfig[
'unzip']) {
373 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.ODP'), $extension);
378 if ($indexerConfig[
'unzip']) {
379 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.ODT'), $extension);
384 if ($indexerConfig[
'unrtf']) {
385 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.RTF'), $extension);
392 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.Images'), $extension);
397 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.HTML'), $extension);
401 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.TXT'), $extension);
405 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.CSV'), $extension);
409 return sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:extension.XML'), $extension);
426 switch ((
string)$extension) {
441 protected function sL($reference, $useHtmlSpecialChar =
false)
443 return $this->langObject->sL($reference, $useHtmlSpecialChar);
463 if (!$this->supportedExtensions[$ext]) {
469 if ($this->app[
'pdfinfo']) {
472 $cmd = $this->app[
'pdfinfo'] .
' ' . escapeshellarg($absFile);
476 if ((
int)$pdfInfo[
'pages']) {
477 list($low, $high) = explode(
'-', $cPKey);
481 @unlink($tempFileName);
483 $cmd = $this->app[
'pdftotext'] .
' -f ' . $low .
' -l ' . $high .
' -enc UTF-8 -q ' . escapeshellarg($absFile) .
' ' . $tempFileName;
485 if (@is_file($tempFileName)) {
487 unlink($tempFileName);
490 $this->pObj->log_setTSlogMessage(sprintf($this->
sL(
'LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:pdfToolsFailed'), $absFile), 2);
492 if ((
string)$content !==
'') {
493 $contentArr = $this->pObj->splitRegularContent($this->
removeEndJunk($content));
496 if (!empty($pdfInfo[
'title'])) {
497 $contentArr[
'title'] = $pdfInfo[
'title'];
503 if ($this->app[
'catdoc']) {
505 $cmd = $this->app[
'catdoc'] .
' -d utf-8 ' . escapeshellarg($absFile);
507 $content = implode(LF, $res);
509 $contentArr = $this->pObj->splitRegularContent($this->
removeEndJunk($content));
515 if ($this->app[
'ppthtml']) {
517 $cmd = $this->app[
'ppthtml'] .
' ' . escapeshellarg($absFile);
519 $content = implode(LF, $res);
521 $content = $this->pObj->convertHTMLToUtf8($content);
522 $contentArr = $this->pObj->splitHTMLContent($this->
removeEndJunk($content));
523 $contentArr[
'title'] = basename($absFile);
528 if ($this->app[
'xlhtml']) {
530 $cmd = $this->app[
'xlhtml'] .
' -nc -te ' . escapeshellarg($absFile);
532 $content = implode(LF, $res);
534 $content = $this->pObj->convertHTMLToUtf8($content);
535 $contentArr = $this->pObj->splitHTMLContent($this->
removeEndJunk($content));
536 $contentArr[
'title'] = basename($absFile);
547 if ($this->app[
'unzip']) {
553 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' word/document.xml';
559 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' ppt/slides/slide1.xml';
564 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' xl/worksheets/sheet1.xml';
568 $content_xml = implode(LF, $res);
570 $utf8_content = trim(strip_tags(str_replace(
'<',
' <', $content_xml)));
571 $contentArr = $this->pObj->splitRegularContent($utf8_content);
573 $contentArr[
'title'] = basename($absFile);
575 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' docProps/core.xml';
577 $meta_xml = implode(LF, $res);
580 if (is_array($metaContent)) {
581 $contentArr[
'title'] .=
' ' . $metaContent[
'cp:coreProperties'][0][
'ch'][
'dc:title'][0][
'values'][0];
582 $contentArr[
'description'] = $metaContent[
'cp:coreProperties'][0][
'ch'][
'dc:subject'][0][
'values'][0];
583 $contentArr[
'description'] .=
' ' . $metaContent[
'cp:coreProperties'][0][
'ch'][
'dc:description'][0][
'values'][0];
584 $contentArr[
'keywords'] = $metaContent[
'cp:coreProperties'][0][
'ch'][
'cp:keywords'][0][
'values'][0];
595 if ($this->app[
'unzip']) {
598 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' content.xml';
600 $content_xml = implode(LF, $res);
603 $cmd = $this->app[
'unzip'] .
' -p ' . escapeshellarg($absFile) .
' meta.xml';
605 $meta_xml = implode(LF, $res);
607 $utf8_content = trim(strip_tags(str_replace(
'<',
' <', $content_xml)));
608 $contentArr = $this->pObj->splitRegularContent($utf8_content);
609 $contentArr[
'title'] = basename($absFile);
613 $metaContent = $metaContent[
'office:document-meta'][0][
'ch'][
'office:meta'][0][
'ch'];
614 if (is_array($metaContent)) {
615 $contentArr[
'title'] = $metaContent[
'dc:title'][0][
'values'][0] ? $metaContent[
'dc:title'][0][
'values'][0] : $contentArr[
'title'];
616 $contentArr[
'description'] = $metaContent[
'dc:subject'][0][
'values'][0] .
' ' . $metaContent[
'dc:description'][0][
'values'][0];
618 if (is_array($metaContent[
'meta:keywords'][0][
'ch'][
'meta:keyword'])) {
619 foreach ($metaContent[
'meta:keywords'][0][
'ch'][
'meta:keyword'] as $kwDat) {
620 $contentArr[
'keywords'] .= $kwDat[
'values'][0] .
' ';
628 if ($this->app[
'unrtf']) {
630 $cmd = $this->app[
'unrtf'] .
' ' . escapeshellarg($absFile);
632 $fileContent = implode(LF, $res);
634 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
635 $contentArr = $this->pObj->splitHTMLContent($fileContent);
645 $contentCharset =
'utf-8';
646 $content = $this->pObj->convertHTMLToUtf8($content, $contentCharset);
647 $contentArr = $this->pObj->splitRegularContent($content);
648 $contentArr[
'title'] = basename($absFile);
655 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
656 $contentArr = $this->pObj->splitHTMLContent($fileContent);
663 preg_match(
'/^[[:space:]]*<\\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i', substr($fileContent, 0, 200), $reg);
664 $charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) :
'utf-8';
666 $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace(
'<',
' <', $fileContent)), $charset);
667 $contentArr = $this->pObj->splitRegularContent($fileContent);
668 $contentArr[
'title'] = basename($absFile);
677 if (function_exists(
'exif_read_data')) {
678 $exif = @exif_read_data($absFile,
'IFD0');
683 $comment = trim($exif[
'COMMENT'][0] .
' ' . $exif[
'ImageDescription']);
687 $contentArr = $this->pObj->splitRegularContent($comment);
688 $contentArr[
'title'] = basename($absFile);
696 if (is_array($contentArr) && !$contentArr[
'title']) {
698 $contentArr[
'title'] = str_replace(
'_',
' ', basename($absFile));
716 static $lastLocale = null;
717 if (!
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
'UTF8filesystem']) {
722 if ($lastLocale == null) {
723 throw new \RuntimeException(
'Cannot reset locale to NULL.', 1357064326);
725 setlocale(LC_CTYPE, $lastLocale);
728 if ($lastLocale !== null) {
729 throw new \RuntimeException(
'Cannot set new locale as locale has already been changed before.', 1357064437);
731 $lastLocale = setlocale(LC_CTYPE, 0);
732 setlocale(LC_CTYPE,
$GLOBALS[
'TYPO3_CONF_VARS'][
'SYS'][
'systemLocale']);
753 $cmd = $this->app[
'pdfinfo'] .
' ' . escapeshellarg($absFile);
757 if ((
int)$pdfInfo[
'pages']) {
760 if ($this->pdf_mode > 0) {
761 $iter = ceil($pdfInfo[
'pages'] / $this->pdf_mode);
766 for ($a = 0; $a < $iter; $a++) {
767 $low = floor($a * ($pdfInfo[
'pages'] / $iter)) + 1;
768 $high = floor(($a + 1) * ($pdfInfo[
'pages'] / $iter));
769 $cParts[] = $low .
'-' . $high;
790 if (is_array($pdfInfoArray)) {
791 foreach ($pdfInfoArray as $line) {
792 $parts = explode(
':', $line, 2);
793 if (count($parts) > 1 && trim($parts[0])) {
794 $res[strtolower(trim($parts[0]))] = trim($parts[1]);
809 return trim(preg_replace(
'/[' . LF . chr(12) .
']*$/',
'', $string));
825 if ($extension ===
'htm') {
827 }
elseif ($extension ===
'jpeg') {
830 return 'EXT:indexed_search/Resources/Public/Icons/FileTypes/' . $extension .
'.gif';