2 namespace TYPO3\CMS\IndexedSearch\Hook;
17 use TYPO3\CMS\Backend\Form\FormEngine;
56 $indexingConfigurations =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'*',
'index_config',
'hidden=0
57 AND (starttime=0 OR starttime<=' .
$GLOBALS[
'EXEC_TIME'] .
')
58 AND timer_next_indexing<' .
$GLOBALS[
'EXEC_TIME'] .
'
62 foreach ($indexingConfigurations as $cfgRec) {
70 'timer_next_indexing' => $nextTime,
73 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_config',
'uid=' . (
int)$cfgRec[
'uid'], $field_array);
75 switch ($cfgRec[
'type']) {
80 'indexConfigUid' => $cfgRec[
'uid'],
81 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
82 'url' =>
'Records (start)'
85 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
91 'indexConfigUid' => $cfgRec[
'uid'],
93 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
95 'url' => $cfgRec[
'filepath'],
99 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
105 'indexConfigUid' => $cfgRec[
'uid'],
107 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
109 'url' => $cfgRec[
'externalUrl'],
113 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
119 'indexConfigUid' => $cfgRec[
'uid'],
121 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
123 'url' => (
int)$cfgRec[
'alternative_source_pid'],
127 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
134 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'crawler'][$cfgRec[
'type']]) {
136 if (is_object($hookObj)) {
139 'indexConfigUid' => $cfgRec[
'uid'],
141 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
'/CUSTOM]'),
143 'url' => $hookObj->initMessage($message)
145 $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec[
'pid']);
164 if ($params[
'indexConfigUid']) {
166 $cfgRec =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetSingleRow(
'*',
'index_config',
'uid=' . (
int)$params[
'indexConfigUid']);
167 if (is_array($cfgRec)) {
169 $session_data = unserialize($cfgRec[
'session_data']);
171 switch ($cfgRec[
'type']) {
193 if (
$GLOBALS[
'TYPO3_CONF_VARS'][
'EXTCONF'][
'indexed_search'][
'crawler'][$cfgRec[
'type']]) {
195 if (is_object($hookObj)) {
198 $hookObj->indexOperation($cfgRec, $session_data, $params, $this);
203 $field_array = array(
204 'session_data' => serialize($session_data)
206 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_config',
'uid=' . (
int)$cfgRec[
'uid'], $field_array);
209 return array(
'log' => $params);
223 if ($cfgRec[
'table2index'] && isset(
$GLOBALS[
'TCA'][$cfgRec[
'table2index']])) {
225 if (!is_array($session_data)) {
226 $session_data = array(
231 $pid = (int)$cfgRec[
'alternative_source_pid'] ?: $cfgRec[
'pid'];
232 $numberOfRecords = $cfgRec[
'recordsbatch'] ? \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec[
'recordsbatch'], 1) : 100;
236 $recs =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'*', $cfgRec[
'table2index'],
'pid = ' . $pid .
'
240 foreach ($recs as $r) {
244 $session_data[
'uid'] = $r[
'uid'];
248 'indexConfigUid' => $cfgRec[
'uid'],
249 'url' =>
'Records from UID#' . ($r[
'uid'] + 1) .
'-?',
250 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']')
252 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid']);
269 $readpath = $params[
'url'];
274 if (@is_file($readpath)) {
280 $indexerObj->backend_initIndexer($cfgRec[
'pid'], 0, 0,
'', $rl);
281 $indexerObj->backend_setFreeIndexUid($cfgRec[
'uid'], $cfgRec[
'set_id']);
282 $indexerObj->hash[
'phash'] = -1;
285 $indexerObj->indexRegularDocument(\TYPO3\CMS\Core\Utility\PathUtility::stripPathSitePrefix($readpath),
true);
286 }
elseif (@is_dir($readpath)) {
293 if (is_array($directoryList) && $params[
'depth'] < $cfgRec[
'depth']) {
294 foreach ($directoryList as $subdir) {
295 if ((
string)$subdir !=
'') {
296 $files[] = $readpath . $subdir .
'/';
302 foreach ($files as $path) {
303 $this->instanceCounter++;
304 if ($path !== $params[
'url']) {
307 'indexConfigUid' => $cfgRec[
'uid'],
309 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
310 'depth' => $params[
'depth'] + 1
312 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid'],
$GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
331 if (!is_array($session_data)) {
332 $session_data = array(
333 'urlLog' => array($params[
'url'])
338 $subUrls = $this->
indexExtUrl($params[
'url'], $cfgRec[
'pid'], $rl, $cfgRec[
'uid'], $cfgRec[
'set_id']);
340 if ($params[
'depth'] < $cfgRec[
'depth']) {
341 foreach ($subUrls as
$url) {
342 if ($url = $this->
checkUrl($url, $session_data[
'urlLog'], $cfgRec[
'externalUrl'])) {
344 $this->instanceCounter++;
345 $session_data[
'urlLog'][] =
$url;
348 'indexConfigUid' => $cfgRec[
'uid'],
350 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
351 'depth' => $params[
'depth'] + 1
353 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid'],
$GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
372 $pageUid = (int)$params[
'url'];
375 $res = $pObj->getUrlsForPageRow($pageRow);
376 $duplicateTrack = array();
378 $downloadUrls = array();
382 foreach ($res as $paramSetKey => $vv) {
383 $urlList = $pObj->urlListFromUrlArray($vv, $pageRow,
$GLOBALS[
'EXEC_TIME'], 30, 1, 0, $duplicateTrack, $downloadUrls, array(
'tx_indexedsearch_reindex'));
387 if ($params[
'depth'] < $cfgRec[
'depth']) {
392 foreach ($recs as $r) {
393 $this->instanceCounter++;
394 $url =
'pages:' . $r[
'uid'] .
': ' . $r[
'title'];
395 $session_data[
'urlLog'][] =
$url;
398 'indexConfigUid' => $cfgRec[
'uid'],
400 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']'),
401 'depth' => $params[
'depth'] + 1
403 $pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid'],
$GLOBALS[
'EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
419 foreach ($runningIndexingConfigurations as $cfgRec) {
421 $queued_items =
$GLOBALS[
'TYPO3_DB']->exec_SELECTcountRows(
'*',
'tx_crawler_queue',
'set_id=' . (
int)$cfgRec[
'set_id'] .
' AND exec_time=0');
422 if (!$queued_items) {
424 $oldPhashRows =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'phash',
'index_phash',
'freeIndexUid=' . (
int)$cfgRec[
'uid'] .
' AND freeIndexSetId<>' . (
int)$cfgRec[
'set_id']);
425 foreach ($oldPhashRows as $pHashRow) {
427 $tableArr = array(
'index_phash',
'index_rel',
'index_section',
'index_grlist',
'index_fulltext',
'index_debug');
428 foreach ($tableArr as $table) {
429 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery($table,
'phash=' . (
int)$pHashRow[
'phash']);
433 $field_array = array(
437 $GLOBALS[
'TYPO3_DB']->exec_UPDATEquery(
'index_config',
'uid=' . (
int)$cfgRec[
'uid'], $field_array);
457 $url = preg_replace(
'/\\/\\/$/',
'/',
$url);
459 if (!strstr(
$url,
'../')) {
461 if (!in_array(
$url, $urlLog)) {
482 $indexerObj->backend_initIndexer($pageId, 0, 0,
'', $rl);
483 $indexerObj->backend_setFreeIndexUid($cfgUid, $setId);
484 $indexerObj->hash[
'phash'] = -1;
486 $indexerObj->indexExternalUrl(
$url);
487 $url_qParts = parse_url(
$url);
488 $baseAbsoluteHref = $url_qParts[
'scheme'] .
'://' . $url_qParts[
'host'];
489 $baseHref = $indexerObj->extractBaseHref($indexerObj->indexExternalUrl_content);
492 $baseHref = $baseAbsoluteHref;
493 $baseHref .= substr($url_qParts[
'path'], 0, strrpos($url_qParts[
'path'],
'/'));
495 $baseHref = rtrim($baseHref,
'/');
498 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
500 foreach (
$list as $count => $linkInfo) {
502 $subUrl = htmlspecialchars_decode($linkInfo[
'href']);
503 $qParts = parse_url($subUrl);
504 if (!$qParts[
'scheme']) {
506 if ($relativeUrl[0] ===
'/') {
507 $subUrl = $baseAbsoluteHref . $relativeUrl;
509 $subUrl = $baseHref .
'/' . $relativeUrl;
512 $subUrls[] = $subUrl;
530 $languageField =
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'languageField'];
531 $sys_language_uid = $languageField ? $r[$languageField] : 0;
534 parse_str(str_replace(
'###UID###', $r[
'uid'], $cfgRec[
'get_params']), $GETparams);
535 $indexerObj->backend_initIndexer($cfgRec[
'pid'], 0, $sys_language_uid,
'', $rl, $GETparams, (
bool)$cfgRec[
'chashcalc']);
536 $indexerObj->backend_setFreeIndexUid($cfgRec[
'uid'], $cfgRec[
'set_id']);
537 $indexerObj->forceIndexing =
true;
539 foreach ($fieldList as $k => $v) {
543 $theContent .= $r[$v] .
' ';
547 $indexerObj->backend_indexAsTYPO3Page(strip_tags(str_replace(
'<',
' <', $theTitle)),
'',
'', strip_tags(str_replace(
'<',
' <', $theContent)),
$GLOBALS[
'LANG']->charSet, $r[
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'tstamp']], $r[
$GLOBALS[
'TCA'][$cfgRec[
'table2index']][
'ctrl'][
'crdate']], $r[
'uid']);
565 $rootLine = $sys_page->getRootLine($id);
567 $tmpl->runThroughTemplates($rootLine, 0);
569 $rootline_uids = array();
570 foreach ($tmpl->rootLine as $rlkey => $rldat) {
571 $rootline_uids[$rlkey] = $rldat[
'uid'];
573 return $rootline_uids;
584 $currentTime =
$GLOBALS[
'EXEC_TIME'];
586 if ($cfgRec[
'timer_frequency'] <= 24 * 3600) {
587 $aMidNight = mktime(0, 0, 0) - 1 * 24 * 3600;
589 $lastTime = $cfgRec[
'timer_next_indexing'] ?:
$GLOBALS[
'EXEC_TIME'];
590 $aMidNight = mktime(0, 0, 0, date(
'm', $lastTime), date(
'd', $lastTime), date(
'y', $lastTime));
593 $lastSureOffset = $aMidNight + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec[
'timer_offset'], 0, 86400);
594 $frequencySeconds = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec[
'timer_frequency'], 1);
596 $frequencyBlocksUntilNextTime = ceil(($currentTime - $lastSureOffset) / $frequencySeconds);
598 return $lastSureOffset + $frequencyBlocksUntilNextTime * $frequencySeconds;
610 if (trim($url_deny)) {
612 foreach ($url_denyArray as $testurl) {
614 echo
$url .
' /// ' . $url_deny . LF;
632 'indexConfigUid' => $cfgRec[
'uid'],
635 'procInstructions' => array(
'[Index Cfg UID#' . $cfgRec[
'uid'] .
']')
637 $this->pObj->addQueueEntry_callBack($cfgRec[
'set_id'], $nparams, $this->callBack, $cfgRec[
'pid']);
649 $oldPhashRows =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'phash',
'index_section',
'page_id=' . (
int)$id);
650 if (!empty($oldPhashRows)) {
651 $pHashesToDelete = array();
652 foreach ($oldPhashRows as $pHashRow) {
653 $pHashesToDelete[] = $pHashRow[
'phash'];
655 $where_clause =
'phash IN (' . implode(
',',
$GLOBALS[
'TYPO3_DB']->cleanIntArray($pHashesToDelete)) .
')';
664 foreach ($tables as $table) {
665 $GLOBALS[
'TYPO3_DB']->exec_DELETEquery($table, $where_clause);
688 if ($command ===
'delete' && $table ===
'pages') {
706 if (!empty($fieldArray)) {
708 if ($status ===
'new') {
709 $id = $pObj->substNEWwithIDs[$id];
710 }
elseif ($table ===
'pages' && $status ===
'update' && (array_key_exists(
'hidden', $fieldArray) && $fieldArray[
'hidden'] == 1 || array_key_exists(
'no_search', $fieldArray) && $fieldArray[
'no_search'] == 1)) {
716 if (is_array($currentRecord)) {
718 $indexingConfigurations =
$GLOBALS[
'TYPO3_DB']->exec_SELECTgetRows(
'*',
'index_config',
'hidden=0
719 AND (starttime=0 OR starttime<=' .
$GLOBALS[
'EXEC_TIME'] .
')
722 AND table2index=' .
$GLOBALS[
'TYPO3_DB']->fullQuoteStr($table,
'index_config') .
'
724 (alternative_source_pid=0 AND pid=' . (
int)$currentRecord[
'pid'] .
')
725 OR (alternative_source_pid=' . (
int)$currentRecord[
'pid'] .
')
727 AND records_indexonchange=1
729 foreach ($indexingConfigurations as $cfgRec) {