TYPO3  7.6
Hook/CrawlerHook.php
Go to the documentation of this file.
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Hook;
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
17 use TYPO3\CMS\Backend\Form\FormEngine;
20 
25 {
32 
38  public $instanceCounter = 0;
39 
43  public $callBack = CrawlerHook::class;
44 
53  public function crawler_init(&$pObj)
54  {
55  // Select all indexing configuration which are waiting to be activated:
56  $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', 'index_config', 'hidden=0
57  AND (starttime=0 OR starttime<=' . $GLOBALS['EXEC_TIME'] . ')
58  AND timer_next_indexing<' . $GLOBALS['EXEC_TIME'] . '
59  AND set_id=0
60  ' . BackendUtility::deleteClause('index_config'));
61  // For each configuration, check if it should be executed and if so, start:
62  foreach ($indexingConfigurations as $cfgRec) {
63  // Generate a unique set-ID:
64  $setId = GeneralUtility::md5int(microtime());
65  // Get next time:
66  $nextTime = $this->generateNextIndexingTime($cfgRec);
67  // Start process by updating index-config record:
68  $field_array = array(
69  'set_id' => $setId,
70  'timer_next_indexing' => $nextTime,
71  'session_data' => ''
72  );
73  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
74  // Based on configuration type:
75  switch ($cfgRec['type']) {
76  case 1:
77  // RECORDS:
78  // Parameters:
79  $params = array(
80  'indexConfigUid' => $cfgRec['uid'],
81  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
82  'url' => 'Records (start)'
83  );
84  //
85  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
86  break;
87  case 2:
88  // FILES:
89  // Parameters:
90  $params = array(
91  'indexConfigUid' => $cfgRec['uid'],
92  // General
93  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
94  // General
95  'url' => $cfgRec['filepath'],
96  // Partly general... (for URL and file types)
97  'depth' => 0
98  );
99  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
100  break;
101  case 3:
102  // External URL:
103  // Parameters:
104  $params = array(
105  'indexConfigUid' => $cfgRec['uid'],
106  // General
107  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
108  // General
109  'url' => $cfgRec['externalUrl'],
110  // Partly general... (for URL and file types)
111  'depth' => 0
112  );
113  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
114  break;
115  case 4:
116  // Page tree
117  // Parameters:
118  $params = array(
119  'indexConfigUid' => $cfgRec['uid'],
120  // General
121  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
122  // General
123  'url' => (int)$cfgRec['alternative_source_pid'],
124  // Partly general... (for URL and file types and page tree (root))
125  'depth' => 0
126  );
127  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
128  break;
129  case 5:
130  // Meta configuration, nothing to do:
131  // NOOP
132  break;
133  default:
134  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
135  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
136  if (is_object($hookObj)) {
137  // Parameters:
138  $params = array(
139  'indexConfigUid' => $cfgRec['uid'],
140  // General
141  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . '/CUSTOM]'),
142  // General
143  'url' => $hookObj->initMessage($message)
144  );
145  $pObj->addQueueEntry_callBack($setId, $params, $this->callBack, $cfgRec['pid']);
146  }
147  }
148  }
149  }
150  // Finally, look up all old index configurations which are finished and needs to be reset and done.
152  }
153 
161  public function crawler_execute($params, &$pObj)
162  {
163  // Indexer configuration ID must exist:
164  if ($params['indexConfigUid']) {
165  // Load the indexing configuration record:
166  $cfgRec = $GLOBALS['TYPO3_DB']->exec_SELECTgetSingleRow('*', 'index_config', 'uid=' . (int)$params['indexConfigUid']);
167  if (is_array($cfgRec)) {
168  // Unpack session data:
169  $session_data = unserialize($cfgRec['session_data']);
170  // Select which type:
171  switch ($cfgRec['type']) {
172  case 1:
173  // Records:
174  $this->crawler_execute_type1($cfgRec, $session_data, $params, $pObj);
175  break;
176  case 2:
177  // Files
178  $this->crawler_execute_type2($cfgRec, $session_data, $params, $pObj);
179  break;
180  case 3:
181  // External URL:
182  $this->crawler_execute_type3($cfgRec, $session_data, $params, $pObj);
183  break;
184  case 4:
185  // Page tree:
186  $this->crawler_execute_type4($cfgRec, $session_data, $params, $pObj);
187  break;
188  case 5:
189  // Meta
190  // NOOP (should never enter here!)
191  break;
192  default:
193  if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) {
194  $hookObj = GeneralUtility::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]);
195  if (is_object($hookObj)) {
196  $this->pObj = $pObj;
197  // For addQueueEntryForHook()
198  $hookObj->indexOperation($cfgRec, $session_data, $params, $this);
199  }
200  }
201  }
202  // Save process data which might be modified:
203  $field_array = array(
204  'session_data' => serialize($session_data)
205  );
206  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
207  }
208  }
209  return array('log' => $params);
210  }
211 
221  public function crawler_execute_type1($cfgRec, &$session_data, $params, &$pObj)
222  {
223  if ($cfgRec['table2index'] && isset($GLOBALS['TCA'][$cfgRec['table2index']])) {
224  // Init session data array if not already:
225  if (!is_array($session_data)) {
226  $session_data = array(
227  'uid' => 0
228  );
229  }
230  // Init:
231  $pid = (int)$cfgRec['alternative_source_pid'] ?: $cfgRec['pid'];
232  $numberOfRecords = $cfgRec['recordsbatch'] ? \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['recordsbatch'], 1) : 100;
233  // Get root line:
234  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
235  // Select
236  $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', $cfgRec['table2index'], 'pid = ' . $pid . '
237  AND uid > ' . (int)$session_data['uid'] . BackendUtility::deleteClause($cfgRec['table2index']) . BackendUtility::BEenableFields($cfgRec['table2index']), '', 'uid', $numberOfRecords);
238  // Traverse:
239  if (!empty($recs)) {
240  foreach ($recs as $r) {
241  // Index single record:
242  $this->indexSingleRecord($r, $cfgRec, $rl);
243  // Update the UID we last processed:
244  $session_data['uid'] = $r['uid'];
245  }
246  // Finally, set entry for next indexing of batch of records:
247  $nparams = array(
248  'indexConfigUid' => $cfgRec['uid'],
249  'url' => 'Records from UID#' . ($r['uid'] + 1) . '-?',
250  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']')
251  );
252  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
253  }
254  }
255  }
256 
266  public function crawler_execute_type2($cfgRec, &$session_data, $params, &$pObj)
267  {
268  // Prepare path, making it absolute and checking:
269  $readpath = $params['url'];
270  if (!GeneralUtility::isAbsPath($readpath)) {
271  $readpath = GeneralUtility::getFileAbsFileName($readpath);
272  }
273  if (GeneralUtility::isAllowedAbsPath($readpath)) {
274  if (@is_file($readpath)) {
275  // If file, index it!
276  // Get root line (need to provide this when indexing external files)
277  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
278  // (Re)-Indexing file on page.
279  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
280  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, 0, '', $rl);
281  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
282  $indexerObj->hash['phash'] = -1;
283  // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
284  // Index document:
285  $indexerObj->indexRegularDocument(\TYPO3\CMS\Core\Utility\PathUtility::stripPathSitePrefix($readpath), true);
286  } elseif (@is_dir($readpath)) {
287  // If dir, read content and create new pending items for log:
288  // Select files and directories in path:
289  $extList = implode(',', GeneralUtility::trimExplode(',', $cfgRec['extensions'], true));
290  $fileArr = array();
291  $files = GeneralUtility::getAllFilesAndFoldersInPath($fileArr, $readpath, $extList, 0, 0);
292  $directoryList = GeneralUtility::get_dirs($readpath);
293  if (is_array($directoryList) && $params['depth'] < $cfgRec['depth']) {
294  foreach ($directoryList as $subdir) {
295  if ((string)$subdir != '') {
296  $files[] = $readpath . $subdir . '/';
297  }
298  }
299  }
300  $files = GeneralUtility::removePrefixPathFromList($files, PATH_site);
301  // traverse the items and create log entries:
302  foreach ($files as $path) {
303  $this->instanceCounter++;
304  if ($path !== $params['url']) {
305  // Parameters:
306  $nparams = array(
307  'indexConfigUid' => $cfgRec['uid'],
308  'url' => $path,
309  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
310  'depth' => $params['depth'] + 1
311  );
312  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
313  }
314  }
315  }
316  }
317  }
318 
328  public function crawler_execute_type3($cfgRec, &$session_data, $params, &$pObj)
329  {
330  // Init session data array if not already:
331  if (!is_array($session_data)) {
332  $session_data = array(
333  'urlLog' => array($params['url'])
334  );
335  }
336  // Index the URL:
337  $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
338  $subUrls = $this->indexExtUrl($params['url'], $cfgRec['pid'], $rl, $cfgRec['uid'], $cfgRec['set_id']);
339  // Add more elements to log now:
340  if ($params['depth'] < $cfgRec['depth']) {
341  foreach ($subUrls as $url) {
342  if ($url = $this->checkUrl($url, $session_data['urlLog'], $cfgRec['externalUrl'])) {
343  if (!$this->checkDeniedSuburls($url, $cfgRec['url_deny'])) {
344  $this->instanceCounter++;
345  $session_data['urlLog'][] = $url;
346  // Parameters:
347  $nparams = array(
348  'indexConfigUid' => $cfgRec['uid'],
349  'url' => $url,
350  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
351  'depth' => $params['depth'] + 1
352  );
353  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
354  }
355  }
356  }
357  }
358  }
359 
369  public function crawler_execute_type4($cfgRec, &$session_data, $params, &$pObj)
370  {
371  // Base page uid:
372  $pageUid = (int)$params['url'];
373  // Get array of URLs from page:
374  $pageRow = BackendUtility::getRecord('pages', $pageUid);
375  $res = $pObj->getUrlsForPageRow($pageRow);
376  $duplicateTrack = array();
377  // Registry for duplicates
378  $downloadUrls = array();
379  // Dummy.
380  // Submit URLs:
381  if (!empty($res)) {
382  foreach ($res as $paramSetKey => $vv) {
383  $urlList = $pObj->urlListFromUrlArray($vv, $pageRow, $GLOBALS['EXEC_TIME'], 30, 1, 0, $duplicateTrack, $downloadUrls, array('tx_indexedsearch_reindex'));
384  }
385  }
386  // Add subpages to log now:
387  if ($params['depth'] < $cfgRec['depth']) {
388  // Subpages selected
389  $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid,title', 'pages', 'pid = ' . $pageUid . BackendUtility::deleteClause('pages'));
390  // Traverse subpages and add to queue:
391  if (!empty($recs)) {
392  foreach ($recs as $r) {
393  $this->instanceCounter++;
394  $url = 'pages:' . $r['uid'] . ': ' . $r['title'];
395  $session_data['urlLog'][] = $url;
396  // Parameters:
397  $nparams = array(
398  'indexConfigUid' => $cfgRec['uid'],
399  'url' => $r['uid'],
400  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']'),
401  'depth' => $params['depth'] + 1
402  );
403  $pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid'], $GLOBALS['EXEC_TIME'] + $this->instanceCounter * $this->secondsPerExternalUrl);
404  }
405  }
406  }
407  }
408 
415  {
416  // Lookup running index configurations:
417  $runningIndexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid,set_id', 'index_config', 'set_id<>0' . BackendUtility::deleteClause('index_config'));
418  // For each running configuration, look up how many log entries there are which are scheduled for execution and if none, clear the "set_id" (means; Processing was DONE)
419  foreach ($runningIndexingConfigurations as $cfgRec) {
420  // Look for ended processes:
421  $queued_items = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows('*', 'tx_crawler_queue', 'set_id=' . (int)$cfgRec['set_id'] . ' AND exec_time=0');
422  if (!$queued_items) {
423  // Lookup old phash rows:
424  $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash', 'index_phash', 'freeIndexUid=' . (int)$cfgRec['uid'] . ' AND freeIndexSetId<>' . (int)$cfgRec['set_id']);
425  foreach ($oldPhashRows as $pHashRow) {
426  // Removing old registrations for all tables (code copied from \TYPO3\CMS\IndexedSearch\Domain\Repository\IndexedPagesController\AdministrationRepository)
427  $tableArr = array('index_phash', 'index_rel', 'index_section', 'index_grlist', 'index_fulltext', 'index_debug');
428  foreach ($tableArr as $table) {
429  $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash=' . (int)$pHashRow['phash']);
430  }
431  }
432  // End process by updating index-config record:
433  $field_array = array(
434  'set_id' => 0,
435  'session_data' => ''
436  );
437  $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config', 'uid=' . (int)$cfgRec['uid'], $field_array);
438  }
439  }
440  }
441 
442  /*****************************************
443  *
444  * Helper functions
445  *
446  *****************************************/
455  public function checkUrl($url, $urlLog, $baseUrl)
456  {
457  $url = preg_replace('/\\/\\/$/', '/', $url);
458  list($url) = explode('#', $url);
459  if (!strstr($url, '../')) {
460  if (GeneralUtility::isFirstPartOfStr($url, $baseUrl)) {
461  if (!in_array($url, $urlLog)) {
462  return $url;
463  }
464  }
465  }
466  }
467 
478  public function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId)
479  {
480  // Index external URL:
481  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
482  $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
483  $indexerObj->backend_setFreeIndexUid($cfgUid, $setId);
484  $indexerObj->hash['phash'] = -1;
485  // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
486  $indexerObj->indexExternalUrl($url);
487  $url_qParts = parse_url($url);
488  $baseAbsoluteHref = $url_qParts['scheme'] . '://' . $url_qParts['host'];
489  $baseHref = $indexerObj->extractBaseHref($indexerObj->indexExternalUrl_content);
490  if (!$baseHref) {
491  // Extract base href from current URL
492  $baseHref = $baseAbsoluteHref;
493  $baseHref .= substr($url_qParts['path'], 0, strrpos($url_qParts['path'], '/'));
494  }
495  $baseHref = rtrim($baseHref, '/');
496  // Get URLs on this page:
497  $subUrls = array();
498  $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
499  // Traverse links:
500  foreach ($list as $count => $linkInfo) {
501  // Decode entities:
502  $subUrl = htmlspecialchars_decode($linkInfo['href']);
503  $qParts = parse_url($subUrl);
504  if (!$qParts['scheme']) {
505  $relativeUrl = GeneralUtility::resolveBackPath($subUrl);
506  if ($relativeUrl[0] === '/') {
507  $subUrl = $baseAbsoluteHref . $relativeUrl;
508  } else {
509  $subUrl = $baseHref . '/' . $relativeUrl;
510  }
511  }
512  $subUrls[] = $subUrl;
513  }
514  return $subUrls;
515  }
516 
525  public function indexSingleRecord($r, $cfgRec, $rl = null)
526  {
527  // Init:
528  $rl = is_array($rl) ? $rl : $this->getUidRootLineForClosestTemplate($cfgRec['pid']);
529  $fieldList = GeneralUtility::trimExplode(',', $cfgRec['fieldlist'], true);
530  $languageField = $GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['languageField'];
531  $sys_language_uid = $languageField ? $r[$languageField] : 0;
532  // (Re)-Indexing a row from a table:
533  $indexerObj = GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
534  parse_str(str_replace('###UID###', $r['uid'], $cfgRec['get_params']), $GETparams);
535  $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, (bool)$cfgRec['chashcalc']);
536  $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
537  $indexerObj->forceIndexing = true;
538  $theContent = '';
539  foreach ($fieldList as $k => $v) {
540  if (!$k) {
541  $theTitle = $r[$v];
542  } else {
543  $theContent .= $r[$v] . ' ';
544  }
545  }
546  // Indexing the record as a page (but with parameters set, see ->backend_setFreeIndexUid())
547  $indexerObj->backend_indexAsTYPO3Page(strip_tags(str_replace('<', ' <', $theTitle)), '', '', strip_tags(str_replace('<', ' <', $theContent)), $GLOBALS['LANG']->charSet, $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['tstamp']], $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['crdate']], $r['uid']);
548  }
549 
557  public function getUidRootLineForClosestTemplate($id)
558  {
559  $tmpl = GeneralUtility::makeInstance(\TYPO3\CMS\Core\TypoScript\ExtendedTemplateService::class);
560  $tmpl->tt_track = 0;
561  // Do not log time-performance information
562  $tmpl->init();
563  // Gets the rootLine
564  $sys_page = GeneralUtility::makeInstance(\TYPO3\CMS\Frontend\Page\PageRepository::class);
565  $rootLine = $sys_page->getRootLine($id);
566  // This generates the constants/config + hierarchy info for the template.
567  $tmpl->runThroughTemplates($rootLine, 0);
568  // Root line uids
569  $rootline_uids = array();
570  foreach ($tmpl->rootLine as $rlkey => $rldat) {
571  $rootline_uids[$rlkey] = $rldat['uid'];
572  }
573  return $rootline_uids;
574  }
575 
582  public function generateNextIndexingTime($cfgRec)
583  {
584  $currentTime = $GLOBALS['EXEC_TIME'];
585  // Now, find a midnight time to use for offset calculation. This has to differ depending on whether we have frequencies within a day or more than a day; Less than a day, we don't care which day to use for offset, more than a day we want to respect the currently entered day as offset regardless of when the script is run - thus the day-of-week used in case "Weekly" is selected will be respected
586  if ($cfgRec['timer_frequency'] <= 24 * 3600) {
587  $aMidNight = mktime(0, 0, 0) - 1 * 24 * 3600;
588  } else {
589  $lastTime = $cfgRec['timer_next_indexing'] ?: $GLOBALS['EXEC_TIME'];
590  $aMidNight = mktime(0, 0, 0, date('m', $lastTime), date('d', $lastTime), date('y', $lastTime));
591  }
592  // Find last offset time plus frequency in seconds:
593  $lastSureOffset = $aMidNight + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['timer_offset'], 0, 86400);
594  $frequencySeconds = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cfgRec['timer_frequency'], 1);
595  // Now, find out how many blocks of the length of frequency there is until the next time:
596  $frequencyBlocksUntilNextTime = ceil(($currentTime - $lastSureOffset) / $frequencySeconds);
597  // Set next time to the offset + the frequencyblocks multiplied with the frequency length in seconds.
598  return $lastSureOffset + $frequencyBlocksUntilNextTime * $frequencySeconds;
599  }
600 
608  public function checkDeniedSuburls($url, $url_deny)
609  {
610  if (trim($url_deny)) {
611  $url_denyArray = GeneralUtility::trimExplode(LF, $url_deny, true);
612  foreach ($url_denyArray as $testurl) {
613  if (GeneralUtility::isFirstPartOfStr($url, $testurl)) {
614  echo $url . ' /// ' . $url_deny . LF;
615  return true;
616  }
617  }
618  }
619  return false;
620  }
621 
629  public function addQueueEntryForHook($cfgRec, $title)
630  {
631  $nparams = array(
632  'indexConfigUid' => $cfgRec['uid'],
633  // This must ALWAYS be the cfgRec uid!
634  'url' => $title,
635  'procInstructions' => array('[Index Cfg UID#' . $cfgRec['uid'] . ']')
636  );
637  $this->pObj->addQueueEntry_callBack($cfgRec['set_id'], $nparams, $this->callBack, $cfgRec['pid']);
638  }
639 
646  public function deleteFromIndex($id)
647  {
648  // Lookup old phash rows:
649  $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('phash', 'index_section', 'page_id=' . (int)$id);
650  if (!empty($oldPhashRows)) {
651  $pHashesToDelete = array();
652  foreach ($oldPhashRows as $pHashRow) {
653  $pHashesToDelete[] = $pHashRow['phash'];
654  }
655  $where_clause = 'phash IN (' . implode(',', $GLOBALS['TYPO3_DB']->cleanIntArray($pHashesToDelete)) . ')';
656  $tables = array(
657  'index_debug',
658  'index_fulltext',
659  'index_grlist',
660  'index_phash',
661  'index_rel',
662  'index_section',
663  );
664  foreach ($tables as $table) {
665  $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, $where_clause);
666  }
667  }
668  }
669 
670  /*************************
671  *
672  * Hook functions for TCEmain (indexing of records)
673  *
674  *************************/
685  public function processCmdmap_preProcess($command, $table, $id, $value, $pObj)
686  {
687  // Clean up the index
688  if ($command === 'delete' && $table === 'pages') {
689  $this->deleteFromIndex($id);
690  }
691  }
692 
703  public function processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, $pObj)
704  {
705  // Check if any fields are actually updated:
706  if (!empty($fieldArray)) {
707  // Translate new ids.
708  if ($status === 'new') {
709  $id = $pObj->substNEWwithIDs[$id];
710  } elseif ($table === 'pages' && $status === 'update' && (array_key_exists('hidden', $fieldArray) && $fieldArray['hidden'] == 1 || array_key_exists('no_search', $fieldArray) && $fieldArray['no_search'] == 1)) {
711  // If the page should be hidden or not indexed after update, delete index for this page
712  $this->deleteFromIndex($id);
713  }
714  // Get full record and if exists, search for indexing configurations:
715  $currentRecord = BackendUtility::getRecord($table, $id);
716  if (is_array($currentRecord)) {
717  // Select all (not running) indexing configurations of type "record" (1) and which points to this table and is located on the same page as the record or pointing to the right source PID
718  $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('*', 'index_config', 'hidden=0
719  AND (starttime=0 OR starttime<=' . $GLOBALS['EXEC_TIME'] . ')
720  AND set_id=0
721  AND type=1
722  AND table2index=' . $GLOBALS['TYPO3_DB']->fullQuoteStr($table, 'index_config') . '
723  AND (
724  (alternative_source_pid=0 AND pid=' . (int)$currentRecord['pid'] . ')
725  OR (alternative_source_pid=' . (int)$currentRecord['pid'] . ')
726  )
727  AND records_indexonchange=1
728  ' . BackendUtility::deleteClause('index_config'));
729  foreach ($indexingConfigurations as $cfgRec) {
730  $this->indexSingleRecord($currentRecord, $cfgRec);
731  }
732  }
733  }
734  }
735 }