2 namespace TYPO3\CMS\IndexedSearch;
31 -1 =>
'mtime matched the document, so no changes detected and no content updated',
32 -2 =>
'The minimum age was not exceeded',
33 1 =>
'The configured max-age was exceeded for the document and thus it\'s indexed.',
34 2 =>
'The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.',
35 3 =>
'The minimum age was exceed, but mtime was not set, so the page was indexed.',
36 4 =>
'Page has never been indexed (is not represented in the index_phash table).'
242 if (\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded(
'crawler') && $pObj->applicationData[
'tx_crawler'][
'running'] && in_array(
'tx_indexedsearch_reindex', $pObj->applicationData[
'tx_crawler'][
'parameters'][
'procInstructions'])) {
244 $pObj->applicationData[
'tx_crawler'][
'log'][] =
'Forced Re-indexing enabled';
246 $this->crawlerActive =
true;
248 $this->forceIndexing =
true;
251 if ($pObj->config[
'config'][
'index_enable']) {
253 if (!
$indexerConfig[
'disableFrontendIndexing'] || $this->crawlerActive) {
254 if (!$pObj->page[
'no_search']) {
255 if (!$pObj->no_cache) {
256 if ((
int)$pObj->sys_language_uid === (
int)$pObj->sys_language_content) {
258 $this->conf = array();
260 $this->conf[
'id'] = $pObj->id;
262 $this->conf[
'type'] = $pObj->type;
264 $this->conf[
'sys_language_uid'] = $pObj->sys_language_uid;
266 $this->conf[
'MP'] = $pObj->MP;
268 $this->conf[
'gr_list'] = $pObj->gr_list;
270 $this->conf[
'cHash'] = $pObj->cHash;
272 $this->conf[
'cHash_array'] = $pObj->cHash_array;
274 $this->conf[
'crdate'] = $pObj->page[
'crdate'];
276 $this->conf[
'page_cache_reg1'] = $pObj->page_cache_reg1;
279 $this->conf[
'rootline_uids'] = array();
280 foreach ($pObj->config[
'rootLine'] as $rlkey => $rldat) {
281 $this->conf[
'rootline_uids'][$rlkey] = $rldat[
'uid'];
284 $this->conf[
'content'] = $pObj->content;
286 $this->conf[
'indexedDocTitle'] = $pObj->convOutputCharset($pObj->indexedDocTitle);
288 $this->conf[
'metaCharset'] = $pObj->metaCharset;
290 $this->conf[
'mtime'] = isset($pObj->register[
'SYS_LASTCHANGED']) ? $pObj->register[
'SYS_LASTCHANGED'] : $pObj->page[
'SYS_LASTCHANGED'];
293 $this->conf[
'index_externals'] = $pObj->config[
'config'][
'index_externals'];
295 $this->conf[
'index_descrLgd'] = $pObj->config[
'config'][
'index_descrLgd'];
297 $this->conf[
'index_metatags'] = isset($pObj->config[
'config'][
'index_metatags']) ? $pObj->config[
'config'][
'index_metatags'] :
true;
299 $this->conf[
'recordUid'] = 0;
300 $this->conf[
'freeIndexUid'] = 0;
301 $this->conf[
'freeIndexSetId'] = 0;
306 $this->
log_setTSlogMessage(
'Index page? No, ->sys_language_uid was different from sys_language_content which indicates that the page contains fall-back content and that would be falsely indexed as localized content.');
309 $this->
log_setTSlogMessage(
'Index page? No, page was set to "no_cache" and so cannot be indexed.');
312 $this->
log_setTSlogMessage(
'Index page? No, The "No Search" flag has been set in the page properties!');
315 $this->
log_setTSlogMessage(
'Index page? No, Ordinary Frontend indexing during rendering is disabled.');
338 public function backend_initIndexer($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array = array(), $createCHash =
false)
341 $this->conf = array();
343 $this->conf[
'id'] = $id;
345 $this->conf[
'type'] = $type;
347 $this->conf[
'sys_language_uid'] = $sys_language_uid;
349 $this->conf[
'MP'] = $MP;
351 $this->conf[
'gr_list'] =
'0,-1';
359 $this->conf[
'cHash'] =
'';
362 $this->conf[
'cHash_array'] = $cHash_array;
365 $this->conf[
'freeIndexUid'] = 0;
366 $this->conf[
'freeIndexSetId'] = 0;
367 $this->conf[
'page_cache_reg1'] =
'';
369 $this->conf[
'rootline_uids'] = $uidRL;
371 $this->conf[
'index_externals'] = 1;
373 $this->conf[
'index_descrLgd'] = 200;
375 $this->conf[
'index_metatags'] =
true;
390 $this->conf[
'freeIndexUid'] = $freeIndexUid;
391 $this->conf[
'freeIndexSetId'] = $freeIndexSetId;
410 $this->conf[
'mtime'] = $mtime;
412 $this->conf[
'crdate'] = $crdate;
414 $this->conf[
'recordUid'] = $recordUid;
417 $this->conf[
'content'] =
'
420 <title>' . htmlspecialchars($title) .
'</title>
421 <meta name="keywords" content="' . htmlspecialchars($keywords) .
'" />
422 <meta name="description" content="' . htmlspecialchars($description) .
'" />
425 ' . htmlspecialchars($content) .
'
430 $this->conf[
'metaCharset'] = $charset;
432 $this->conf[
'indexedDocTitle'] =
'';
451 $this->cHashParams = $this->conf[
'cHash_array'];
452 if (is_array($this->cHashParams) && !empty($this->cHashParams)) {
453 if ($this->conf[
'cHash']) {
455 $this->cHashParams[
'cHash'] = $this->conf[
'cHash'];
457 unset($this->cHashParams[
'encryptionKey']);
462 $this->indexerConfig = unserialize(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXT'][
'extConf'][
'indexed_search']);
468 $this->enableMetaphoneSearch = !isset($this->indexerConfig[
'enableMetaphoneSearch']) || $this->indexerConfig[
'enableMetaphoneSearch'];
472 if ($this->conf[
'index_externals']) {
476 $lexerObjRef =
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'lexer'] ?
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'lexer'] :
'TYPO3\\CMS\\IndexedSearch\\Lexer';
478 $this->lexerObj->debug = $this->indexerConfig[
'debugMode'];
481 if ($this->enableMetaphoneSearch &&
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'metaphone']) {
483 $this->metaphoneObj->pObj = $this;
498 if (is_array(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'external_parsers'])) {
499 foreach (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'external_parsers'] as $extension => $_objRef) {
501 $this->external_parsers[$extension]->pObj = $this;
503 if (!$this->external_parsers[$extension]->initParser($extension)) {
504 unset($this->external_parsers[$extension]);
522 $check = $this->
checkMtimeTstamp($this->conf[
'mtime'], $this->hash[
'phash']);
524 if ($check > 0 || !$is_grlist || $this->forceIndexing) {
526 if ($this->forceIndexing) {
534 $this->
log_push(
'Split content',
'');
536 if ($this->conf[
'indexedDocTitle']) {
537 $this->contentParts[
'title'] = $this->conf[
'indexedDocTitle'];
546 if (!is_array($checkCHash) || $check === 1) {
548 $this->
log_push(
'Converting charset of content (' . $this->conf[
'metaCharset'] .
') to utf-8',
'');
552 $this->
log_push(
'Extract words from content',
'');
556 $this->
log_push(
'Analyse the extracted words',
'');
560 $this->
log_push(
'Submitting page',
'');
564 $this->
log_push(
'Check word list and submit words',
'');
567 $this->
submitWords($indexArr, $this->hash[
'phash']);
573 $this->
log_push(
'Checking external files',
'');
574 if ($this->conf[
'index_externals']) {
580 $this->
updateTstamp($this->hash[
'phash'], $this->conf[
'mtime']);
583 $this->
update_grlist($checkCHash[
'phash'], $this->hash[
'phash']);
585 $this->
log_setTSlogMessage(
'Indexing not needed, the contentHash, ' . $this->content_md5h .
', has not changed. Timestamp, grlist and rootline updated if necessary.');
603 $contentArr[
'body'] = stristr($content,
'<body');
604 $headPart = substr($content, 0, -strlen($contentArr[
'body']));
606 $this->
embracingTags($headPart,
'TITLE', $contentArr[
'title'], $dummy2, $dummy);
607 $titleParts = explode(
':', $contentArr[
'title'], 2);
608 $contentArr[
'title'] = trim(isset($titleParts[1]) ? $titleParts[1] : $titleParts[0]);
610 if ($this->conf[
'index_metatags']) {
613 while ($this->
embracingTags($headPart,
'meta', $dummy, $headPart, $meta[$i])) {
617 for ($i = 0; isset($meta[$i]); $i++) {
619 if (stristr($meta[$i][
'name'],
'keywords')) {
622 if (stristr($meta[$i][
'name'],
'description')) {
623 $contentArr[
'description'] .=
',' . $meta[$i][
'content'];
630 $tagList = explode(
',', $this->excludeSections);
631 foreach ($tagList as $tag) {
632 while ($this->
embracingTags($contentArr[
'body'], $tag, $dummy, $contentArr[
'body'], $dummy2)) {
636 $contentArr[
'body'] = str_replace(
'<',
' <', $contentArr[
'body']);
637 $contentArr[
'body'] = trim(strip_tags($contentArr[
'body']));
638 $contentArr[
'keywords'] = trim($contentArr[
'keywords']);
639 $contentArr[
'description'] = trim($contentArr[
'description']);
652 if (preg_match(
'/<meta[[:space:]]+[^>]*http-equiv[[:space:]]*=[[:space:]]*["\']CONTENT-TYPE["\'][^>]*>/i', $content, $reg)) {
653 if (preg_match(
'/charset[[:space:]]*=[[:space:]]*([[:alnum:]-]+)/i', $reg[0], $reg2)) {
670 $charset = $this->csObj->parse_charset($charset);
672 if ($charset && $charset !==
'utf-8') {
673 $content = $this->csObj->utf8_encode($content, $charset);
676 return $this->csObj->entities_to_utf8($content,
true);
691 public function embracingTags($string, $tagName, &$tagContent, &$stringAfter, &$paramList)
693 $endTag =
'</' . $tagName .
'>';
694 $startTag =
'<' . $tagName;
696 $isTagInText = stristr($string, $startTag);
701 list($paramList, $isTagInText) = explode(
'>', substr($isTagInText, strlen($startTag)), 2);
702 $afterTagInText = stristr($isTagInText, $endTag);
703 if ($afterTagInText) {
704 $stringBefore = substr($string, 0, strpos(strtolower($string), strtolower($startTag)));
705 $tagContent = substr($isTagInText, 0, strlen($isTagInText) - strlen($afterTagInText));
706 $stringAfter = $stringBefore . substr($afterTagInText, strlen($endTag));
709 $stringAfter = $isTagInText;
722 $expBody = preg_split(
'/\\<\\!\\-\\-[\\s]?TYPO3SEARCH_/', $body);
723 if (count($expBody) > 1) {
725 foreach ($expBody as $val) {
726 $part = explode(
'-->', $val, 2);
727 if (trim($part[0]) ==
'begin') {
730 }
elseif (trim($part[0]) ==
'end') {
752 if ($this->indexerConfig[
'useCrawlerForExternalFiles'] && \TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded(
'crawler')) {
757 foreach (
$list as $linkInfo) {
759 if ($linkInfo[
'localPath']) {
761 $linkSource = htmlspecialchars_decode($linkInfo[
'localPath']);
763 $linkSource = htmlspecialchars_decode($linkInfo[
'href']);
766 $qParts = parse_url($linkSource);
768 if ($qParts[
'query'] && strstr($qParts[
'query'],
'jumpurl=')) {
769 parse_str($qParts[
'query'], $getP);
770 $linkSource = $getP[
'jumpurl'];
771 $qParts = parse_url($linkSource);
773 if (!$linkInfo[
'localPath'] && $qParts[
'scheme']) {
774 if ($this->indexerConfig[
'indexExternalURLs']) {
778 }
elseif (!$qParts[
'query']) {
779 $linkSource = urldecode($linkSource);
781 $localFile = $linkSource;
785 if ($localFile && @is_file($localFile)) {
787 if ($linkInfo[
'localPath']) {
788 $fI = pathinfo($linkSource);
789 $ext = strtolower($fI[
'extension']);
790 if (is_object($crawler)) {
792 'document' => $linkSource,
793 'alturl' => $linkInfo[
'href'],
794 'conf' => $this->conf
796 unset($params[
'conf'][
'content']);
797 $crawler->addQueueEntry_callBack(0, $params, Hook\CrawlerFilesHook::class, $this->conf[
'id']);
798 $this->
log_setTSlogMessage(
'media "' . $params[
'document'] .
'" added to "crawler" queue.', 1);
803 if (is_object($crawler)) {
805 'document' => $linkSource,
806 'conf' => $this->conf
808 unset($params[
'conf'][
'content']);
809 $crawler->addQueueEntry_callBack(0, $params, Hook\CrawlerFilesHook::class, $this->conf[
'id']);
810 $this->
log_setTSlogMessage(
'media "' . $params[
'document'] .
'" added to "crawler" queue.', 1);
830 $htmlParts = $htmlParser->splitTags(
'a', $html);
831 $hyperLinksData = array();
832 foreach ($htmlParts as $index => $tagData) {
833 if ($index % 2 !== 0) {
834 $tagAttributes = $htmlParser->get_tag_attributes($tagData,
true);
835 $firstTagName = $htmlParser->getFirstTagName($tagData);
836 if (strtolower($firstTagName) ===
'a') {
837 if ($tagAttributes[0][
'href'] && $tagAttributes[0][
'href'][0] !=
'#') {
838 $hyperLinksData[] = array(
840 'href' => $tagAttributes[0][
'href'],
847 return $hyperLinksData;
860 $htmlParts = $htmlParser->splitTags(
'base', $html);
861 foreach ($htmlParts as $index => $tagData) {
862 if ($index % 2 !== 0) {
863 $tagAttributes = $htmlParser->get_tag_attributes($tagData,
true);
864 $firstTagName = $htmlParser->getFirstTagName($tagData);
865 if (strtolower($firstTagName) ===
'base') {
866 $href = $tagAttributes[0][
'href'];
891 $qParts = parse_url($externalUrl);
892 $fI = pathinfo($qParts[
'path']);
893 $ext = strtolower($fI[
'extension']);
896 if (stristr($urlHeaders[
'Content-Type'],
'text/html')) {
898 if ((
string)$content !==
'') {
922 if ((
string)$content !==
'') {
926 foreach ($headers as $line) {
927 if (trim($line) ===
'') {
930 list($headKey, $headValue) = explode(
':', $line, 2);
931 $retVal[$headKey] = $headValue;
946 static $pathFunctions = array(
947 'createLocalPathFromT3vars',
948 'createLocalPathUsingAbsRefPrefix',
949 'createLocalPathUsingDomainURL',
950 'createLocalPathFromAbsoluteURL',
951 'createLocalPathFromRelativeURL'
953 foreach ($pathFunctions as $functionName) {
954 $localPath = $this->{$functionName}($sourcePath);
955 if ($localPath !=
'') {
973 $indexLocalFiles =
$GLOBALS[
'T3_VAR'][
'ext'][
'indexed_search'][
'indexLocalFiles'];
974 if (is_array($indexLocalFiles)) {
979 if (isset($indexLocalFiles[$md5]) && is_file($indexLocalFiles[$md5])) {
980 $localPath = $indexLocalFiles[$md5];
996 $baseURLLength = strlen($baseURL);
997 if (substr($sourcePath, 0, $baseURLLength) == $baseURL) {
998 $sourcePath = substr($sourcePath, $baseURLLength);
999 $localPath = PATH_site . $sourcePath;
1000 if (!self::isAllowedLocalFile($localPath)) {
1018 $absRefPrefix =
$GLOBALS[
'TSFE']->config[
'config'][
'absRefPrefix'];
1019 $absRefPrefixLength = strlen($absRefPrefix);
1020 if ($absRefPrefixLength > 0 && substr($sourcePath, 0, $absRefPrefixLength) == $absRefPrefix) {
1021 $sourcePath = substr($sourcePath, $absRefPrefixLength);
1022 $localPath = PATH_site . $sourcePath;
1023 if (!self::isAllowedLocalFile($localPath)) {
1041 if ($sourcePath[0] ==
'/') {
1042 $sourcePath = substr($sourcePath, 1);
1043 $localPath = PATH_site . $sourcePath;
1044 if (!self::isAllowedLocalFile($localPath)) {
1060 if (self::isRelativeURL($sourcePath)) {
1061 $localPath = PATH_site . $sourcePath;
1062 if (!self::isAllowedLocalFile($localPath)) {
1077 $urlParts = @parse_url(
$url);
1078 return $urlParts[
'scheme'] ==
'' && $urlParts[
'path'][0] !=
'/';
1090 $insideWebPath = substr($filePath, 0, strlen(PATH_site)) == PATH_site;
1091 $isFile = is_file($filePath);
1092 return $insideWebPath && $isFile;
1112 $fI = pathinfo($file);
1113 $ext = $altExtension ?: strtolower($fI[
'extension']);
1115 if (!$contentTmpFile) {
1125 $absFile = $contentTmpFile;
1128 if ($absFile && @is_file($absFile)) {
1129 if ($this->external_parsers[$ext]) {
1130 $fileInfo = stat($absFile);
1132 foreach ($cParts as $cPKey) {
1133 $this->internal_log = array();
1134 $this->
log_push(
'Index: ' . str_replace(
'.',
'_', basename($file)) . ($cPKey ?
'#' . $cPKey :
''),
'');
1136 $subinfo = array(
'key' => $cPKey);
1138 $phash_arr = ($this->file_phash_arr = $this->
setExtHashes($file, $subinfo));
1140 if ($check > 0 || $force) {
1147 if ($this->externalFileCounter < $this->maxExternalFiles || $force) {
1149 $this->
log_push(
'Split content',
'');
1157 $this->externalFileCounter++;
1159 $this->
log_push(
'Extract words from content',
'');
1163 $this->
log_push(
'Analyse the extracted words',
'');
1167 $this->
log_push(
'Submitting page',
'');
1172 $this->
log_push(
'Check word list and submit words',
'');
1175 $this->
submitWords($indexArr, $phash_arr[
'phash']);
1182 $this->
updateTstamp($phash_arr[
'phash'], $fileInfo[
'mtime']);
1189 $this->
log_setTSlogMessage(
'The limit of ' . $this->maxExternalFiles .
' has already been exceeded, so no indexing will take place this time.');
1200 $this->
log_setTSlogMessage(
'Indexing not possible; The extension "' . $ext .
'" was not supported.');
1203 $this->
log_setTSlogMessage(
'Indexing not possible; File "' . $absFile .
'" not found or valid.');
1218 $contentArray = null;
1220 if (is_object($this->external_parsers[$fileExtension])) {
1221 $contentArray = $this->external_parsers[$fileExtension]->readFileContent($fileExtension, $absoluteFileName, $sectionPointer);
1223 return $contentArray;
1237 if (is_object($this->external_parsers[$ext])) {
1238 $cParts = $this->external_parsers[$ext]->fileContentParts($ext, $absFile);
1253 $contentArr[
'body'] = $content;
1272 foreach ($contentArr as $key => $value) {
1273 if ((
string)$contentArr[$key] !==
'') {
1274 if ($charset !==
'utf-8') {
1275 $contentArr[$key] = $this->csObj->utf8_encode($contentArr[$key], $charset);
1278 $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key],
true);
1292 foreach ($contentArr as $key => $value) {
1293 $contentArr[$key] = $this->lexerObj->split2Words($contentArr[$key]);
1296 $contentArr[
'title'] = array_unique($contentArr[
'title']);
1297 $contentArr[
'keywords'] = array_unique($contentArr[
'keywords']);
1298 $contentArr[
'description'] = array_unique($contentArr[
'description']);
1314 $bodyDescription = preg_replace(
'/\s+/u',
' ', $contentArr[
'body']);
1316 $bodyDescription = $this->csObj->strtrunc(
'utf-8', $bodyDescription, $maxL);
1318 return $bodyDescription;
1329 $indexArr = array();
1349 foreach ($content[$key] as $val) {
1350 $val = substr($val, 0, 60);
1352 if (!isset($retArr[$val])) {
1356 $metaphone = $this->enableMetaphoneSearch ? substr($this->
metaphone($val, $this->storeMetaphoneInfoAsWords), 0, 60) :
'';
1357 $retArr[$val][
'metaphone'] = $metaphone;
1360 if ($this->storeMetaphoneInfoAsWords) {
1361 $this->metaphoneContent .=
' ' . $retArr[$val][
'metaphone'];
1364 $retArr[$val][
'cmp'] = $retArr[$val][
'cmp'] | pow(2, $offset);
1366 $retArr[$val][
'count']++;
1380 foreach ($content[
'body'] as $key => $val) {
1381 $val = substr($val, 0, 60);
1383 if (!isset($retArr[$val])) {
1385 $retArr[$val][
'first'] = $key;
1389 $metaphone = $this->enableMetaphoneSearch ? substr($this->
metaphone($val, $this->storeMetaphoneInfoAsWords), 0, 60) :
'';
1390 $retArr[$val][
'metaphone'] = $metaphone;
1393 if ($this->storeMetaphoneInfoAsWords) {
1394 $this->metaphoneContent .=
' ' . $retArr[$val][
'metaphone'];
1397 $retArr[$val][
'count']++;
1409 public function metaphone($word, $returnRawMetaphoneValue =
false)
1411 if (is_object($this->metaphoneObj)) {
1412 $metaphoneRawValue = $this->metaphoneObj->metaphone($word, $this->conf[
'sys_language_uid']);
1417 if ($returnRawMetaphoneValue) {
1418 $result = $metaphoneRawValue;
1419 }
elseif ($metaphoneRawValue !==
'') {
1444 'phash' => $this->hash[
'phash'],
1445 'phash_grouping' => $this->hash[
'phash_grouping'],
1446 'cHashParams' => serialize($this->cHashParams),
1447 'contentHash' => $this->content_md5h,
1448 'data_page_id' => $this->conf[
'id'],
1449 'data_page_reg1' => $this->conf[
'page_cache_reg1'],
1450 'data_page_type' => $this->conf[
'type'],
1451 'data_page_mp' => $this->conf[
'MP'],
1452 'gr_list' => $this->conf[
'gr_list'],
1455 'item_title' => $this->contentParts[
'title'],
1457 'item_mtime' => (
int)$this->conf[
'mtime'],
1458 'item_size' => strlen($this->conf[
'content']),
1461 'item_crdate' => $this->conf[
'crdate'],
1463 'sys_language_uid' => $this->conf[
'sys_language_uid'],
1466 'recordUid' => (
int)$this->conf[
'recordUid'],
1467 'freeIndexUid' => (
int)$this->conf[
'freeIndexUid'],
1468 'freeIndexSetId' => (
int)$this->conf[
'freeIndexSetId']
1471 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_phash', $fields);
1474 $this->
submit_section($this->hash[
'phash'], $this->hash[
'phash']);
1476 $this->
submit_grlist($this->hash[
'phash'], $this->hash[
'phash']);
1479 'phash' => $this->hash[
'phash'],
1480 'fulltextdata' => implode(
' ', $this->contentParts),
1481 'metaphonedata' => $this->metaphoneContent
1483 if ($this->indexerConfig[
'fullTextDataLength'] > 0) {
1484 $fields[
'fulltextdata'] = substr($fields[
'fulltextdata'], 0, $this->indexerConfig[
'fullTextDataLength']);
1487 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_fulltext', $fields);
1490 if ($this->indexerConfig[
'debugMode']) {
1492 'phash' => $this->hash[
'phash'],
1493 'debuginfo' => serialize(array(
1494 'cHashParams' => $this->cHashParams,
1495 'external_parsers initialized' => array_keys($this->external_parsers),
1496 'conf' => array_merge($this->conf, array(
'content' => substr($this->conf[
'content'], 0, 1000))),
1497 'contentParts' => array_merge($this->contentParts, array(
'body' => substr($this->contentParts[
'body'], 0, 1000))),
1498 'logs' => $this->internal_log,
1499 'lexer' => $this->lexerObj->debugString
1503 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_debug', $fields);
1521 'phash_x' => $phash_x,
1523 'gr_list' => $this->conf[
'gr_list']
1526 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_grlist', $fields);
1542 'phash_t3' => $hash_t3,
1543 'page_id' => (
int)$this->conf[
'id']
1547 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_section', $fields);
1560 $tableArray = explode(
',',
'index_phash,index_section,index_grlist,index_fulltext,index_debug');
1561 foreach ($tableArray as $table) {
1563 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery($table,
'phash=' . (
int)$phash);
1568 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery(
'index_section',
'phash_t3=' . (
int)$phash);
1594 $storeItemType = $this->external_parsers[$ext]->ext2itemtype_map[$ext];
1595 $storeItemType = $storeItemType ?: $ext;
1599 $fileParts = parse_url($file);
1602 'phash' =>
$hash[
'phash'],
1603 'phash_grouping' =>
$hash[
'phash_grouping'],
1604 'cHashParams' => serialize($subinfo),
1606 'data_filename' => $file,
1607 'item_type' => $storeItemType,
1608 'item_title' => trim(
$contentParts[
'title']) ?: basename($file),
1610 'item_mtime' => $mtime,
1611 'item_size' => $size,
1612 'item_crdate' => $ctime,
1615 'gr_list' => $this->conf[
'gr_list'],
1616 'externalUrl' => $fileParts[
'scheme'] ? 1 : 0,
1617 'recordUid' => (
int)$this->conf[
'recordUid'],
1618 'freeIndexUid' => (
int)$this->conf[
'freeIndexUid'],
1619 'freeIndexSetId' => (
int)$this->conf[
'freeIndexSetId'],
1620 'sys_language_uid' => (
int)$this->conf[
'sys_language_uid']
1623 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_phash', $fields);
1627 'phash' =>
$hash[
'phash'],
1629 'metaphonedata' => $this->metaphoneContent
1631 if ($this->indexerConfig[
'fullTextDataLength'] > 0) {
1632 $fields[
'fulltextdata'] = substr($fields[
'fulltextdata'], 0, $this->indexerConfig[
'fullTextDataLength']);
1635 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_fulltext', $fields);
1638 if ($this->indexerConfig[
'debugMode']) {
1640 'phash' =>
$hash[
'phash'],
1641 'debuginfo' => serialize(array(
1642 'cHashParams' => $subinfo,
1644 'logs' => $this->internal_log,
1645 'lexer' => $this->lexerObj->debugString
1649 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_debug', $fields);
1681 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'phash',
'index_section',
'phash=' . (
int)
$hash .
' AND page_id=' . (
int)$this->conf[
'id']);
1697 $tableArray = explode(
',',
'index_phash,index_grlist,index_fulltext,index_debug');
1698 foreach ($tableArray as $table) {
1700 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery($table,
'phash=' . (
int)$phash);
1724 $row =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetSingleRow(
'item_mtime,tstamp',
'index_phash',
'phash=' . (
int)$phash);
1727 if ($this->tstamp_maxAge && $row[
'tstamp'] + $this->tstamp_maxAge <
$GLOBALS[
'EXEC_TIME']) {
1732 if (!$this->tstamp_minAge || $row[
'tstamp'] + $this->tstamp_minAge <
$GLOBALS[
'EXEC_TIME']) {
1736 if ($row[
'item_mtime'] != $mtime) {
1743 if ($this->tstamp_maxAge) {
1744 $this->
log_setTSlogMessage(
'mtime matched, timestamp NOT updated because a maxAge is set (' . ($row[
'tstamp'] + $this->tstamp_maxAge -
$GLOBALS[
'EXEC_TIME']) .
' seconds to expire time).', 1);
1777 $row =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetSingleRow(
'phash',
'index_phash',
'phash_grouping=' . (
int)$this->hash[
'phash_grouping'] .
' AND contentHash=' . (
int)$this->content_md5h);
1797 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'*',
'index_phash',
'phash_grouping=' . (
int)$hashGr .
' AND contentHash=' . (
int)
$content_md5h);
1798 $result = $count == 0;
1813 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'phash_x',
'index_grlist',
'phash_x=' . (
int)$phash_x);
1814 $result = $count > 0;
1833 $this->
log_setTSlogMessage(
'Inserted gr_list \'' . $this->conf[
'gr_list'] .
'\' for phash \
'' . $phash .
'\'', 1);
1848 $updateFields = array(
1852 $updateFields[
'item_mtime'] = (int)$mtime;
1854 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_phash',
'phash=' . (
int)$phash, $updateFields);
1867 $updateFields = array(
1868 'freeIndexSetId' => (
int)$this->conf[
'freeIndexSetId']
1870 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_phash',
'phash=' . (
int)$phash, $updateFields);
1884 $updateFields = array(
1885 'parsetime' => (
int)$parsetime
1887 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_phash',
'phash=' . (
int)$phash, $updateFields);
1899 $updateFields = array();
1901 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_section',
'page_id=' . (
int)$this->conf[
'id'], $updateFields);
1914 $fieldArray[
'rl0'] = (int)$this->conf[
'rootline_uids'][0];
1915 $fieldArray[
'rl1'] = (int)$this->conf[
'rootline_uids'][1];
1916 $fieldArray[
'rl2'] = (int)$this->conf[
'rootline_uids'][2];
1917 if (is_array(
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'addRootLineFields'])) {
1918 foreach (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'addRootLineFields'] as $fieldName => $rootLineLevel) {
1919 $fieldArray[$fieldName] = (int)$this->conf[
'rootline_uids'][$rootLineLevel];
1948 if (!empty($wordListArray)) {
1949 $phashArray = array();
1950 foreach ($wordListArray as $value) {
1951 $phashArray[] = (int)$value[
'hash'];
1953 $cwl = implode(
',', $phashArray);
1954 $count =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'baseword',
'index_words',
'wid IN (' . $cwl .
')');
1955 $wordListArrayCount = count($wordListArray);
1956 if ($count !== $wordListArrayCount) {
1957 $res =
$GLOBALS[
'TYPO3_DB']->exec_SELECTquery(
'baseword',
'index_words',
'wid IN (' . $cwl .
')');
1959 while (
false != ($row =
$GLOBALS[
'TYPO3_DB']->sql_fetch_assoc($res))) {
1960 unset($wordListArray[$row[
'baseword']]);
1962 $GLOBALS[
'TYPO3_DB']->sql_free_result($res);
1963 foreach ($wordListArray as $key => $val) {
1964 $insertFields = array(
1965 'wid' => $val[
'hash'],
1967 'metaphone' => $val[
'metaphone']
1970 $GLOBALS[
'TYPO3_DB']->exec_INSERTquery(
'index_words', $insertFields);
1987 $stopWords =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'wid',
'index_words',
'is_stopword != 0',
'',
'',
'',
'wid');
1989 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery(
'index_rel',
'phash=' . (
int)$phash);
1990 $fields = array(
'phash',
'wid',
'count',
'first',
'freq',
'flags');
1992 foreach ($wordList as $val) {
1993 if (isset($stopWords[$val[
'hash']])) {
2001 $this->
freqMap($val[
'count'] / $this->wordcount),
2002 $val[
'cmp'] & $this->flagBitMask
2005 $GLOBALS[
'TYPO3_DB']->exec_INSERTmultipleRows(
'index_rel', $fields, $rows);
2020 $newFreq = $freq * $mapFactor;
2021 $newFreq = $newFreq > $this->freqRange ? $this->freqRange : $newFreq;
2023 $newFreq = $freq / $mapFactor;
2042 'id' => (
int)$this->conf[
'id'],
2043 'type' => (
int)$this->conf[
'type'],
2044 'sys_lang' => (
int)$this->conf[
'sys_language_uid'],
2045 'MP' => (
string)$this->conf[
'MP'],
2046 'cHash' => $this->cHashParams
2051 $hArray[
'gr_list'] = (string)$this->conf[
'gr_list'];
2072 $hArray[
'subinfo'] = $subinfo;
2118 $GLOBALS[
'TT']->setTSlogMessage($msg, $errorNum);
2120 $this->internal_log[] = $msg;
2134 return ' ' . implode(
', ', $keywords) .
' ';