TYPO3  7.6
core/Classes/Resource/Index/Indexer.php
Go to the documentation of this file.
1 <?php
2 namespace TYPO3\CMS\Core\Resource\Index;
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
21 
25 class Indexer
26 {
30  protected $filesToUpdate = array();
31 
35  protected $identifiedFileUids = array();
36 
40  protected $storage = null;
41 
45  protected $extractionServices = null;
46 
51  {
52  $this->storage = $storage;
53  }
54 
62  public function createIndexEntry($identifier)
63  {
64  if (!isset($identifier) || !is_string($identifier) || $identifier === '') {
65  throw new \InvalidArgumentException('Invalid file identifier given. It must be of type string and not empty. "' . gettype($identifier) . '" given.', 1401732565);
66  }
67  $fileProperties = $this->gatherFileInformationArray($identifier);
68  $record = $this->getFileIndexRepository()->addRaw($fileProperties);
69  $fileObject = $this->getResourceFactory()->getFileObject($record['uid'], $record);
70  $this->extractRequiredMetaData($fileObject);
71  return $fileObject;
72  }
73 
80  public function updateIndexEntry(File $fileObject)
81  {
82  $updatedInformation = $this->gatherFileInformationArray($fileObject->getIdentifier());
83  $fileObject->updateProperties($updatedInformation);
84  $this->getFileIndexRepository()->update($fileObject);
85  $this->extractRequiredMetaData($fileObject);
86  }
87 
91  public function processChangesInStorages()
92  {
93  // get all file-identifiers from the storage
94  $availableFiles = $this->storage->getFileIdentifiersInFolder($this->storage->getRootLevelFolder()->getIdentifier(), true, true);
95  $this->detectChangedFilesInStorage($availableFiles);
97 
98  $this->detectMissingFiles();
99  }
100 
105  public function runMetaDataExtraction($maximumFileCount = -1)
106  {
107  $fileIndexRecords = $this->getFileIndexRepository()->findInStorageWithIndexOutstanding($this->storage, $maximumFileCount);
108  foreach ($fileIndexRecords as $indexRecord) {
109  $fileObject = $this->getResourceFactory()->getFileObject($indexRecord['uid'], $indexRecord);
110  $this->extractMetaData($fileObject);
111  }
112  }
113 
119  public function extractMetaData(File $fileObject)
120  {
121  $newMetaData = array(
122  0 => $fileObject->_getMetaData()
123  );
124  foreach ($this->getExtractionServices() as $service) {
125  if ($service->canProcess($fileObject)) {
126  $newMetaData[$service->getPriority()] = $service->extractMetaData($fileObject, $newMetaData);
127  }
128  }
129  ksort($newMetaData);
130  $metaData = array();
131  foreach ($newMetaData as $data) {
132  $metaData = array_merge($metaData, $data);
133  }
134  $fileObject->_updateMetaDataProperties($metaData);
135  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
136  $this->getFileIndexRepository()->updateIndexingTime($fileObject->getUid());
137  }
138 
144  protected function getExtractionServices()
145  {
146  if ($this->extractionServices === null) {
147  $this->extractionServices = $this->getExtractorRegistry()->getExtractorsWithDriverSupport($this->storage->getDriverType());
148  }
150  }
151 
156  protected function detectMissingFiles()
157  {
158  $indexedNotExistentFiles = $this->getFileIndexRepository()->findInStorageAndNotInUidList(
159  $this->storage,
160  $this->identifiedFileUids
161  );
162 
163  foreach ($indexedNotExistentFiles as $record) {
164  if (!$this->storage->hasFile($record['identifier'])) {
165  $this->getFileIndexRepository()->markFileAsMissing($record['uid']);
166  }
167  }
168  }
169 
176  protected function detectChangedFilesInStorage(array $fileIdentifierArray)
177  {
178  foreach ($fileIdentifierArray as $fileIdentifier) {
179  // skip processed files
180  if ($this->storage->isWithinProcessingFolder($fileIdentifier)) {
181  continue;
182  }
183  // Get the modification time for file-identifier from the storage
184  $modificationTime = $this->storage->getFileInfoByIdentifier($fileIdentifier, array('mtime'));
185  // Look if the the modification time in FS is higher than the one in database (key needed on timestamps)
186  $indexRecord = $this->getFileIndexRepository()->findOneByStorageUidAndIdentifier($this->storage->getUid(), $fileIdentifier);
187 
188  if ($indexRecord !== false) {
189  $this->identifiedFileUids[] = $indexRecord['uid'];
190 
191  if ($indexRecord['modification_date'] !== $modificationTime['mtime'] || $indexRecord['missing']) {
192  $this->filesToUpdate[$fileIdentifier] = $indexRecord;
193  }
194  } else {
195  $this->filesToUpdate[$fileIdentifier] = null;
196  }
197  }
198  }
199 
206  protected function processChangedAndNewFiles()
207  {
208  foreach ($this->filesToUpdate as $identifier => $data) {
209  if ($data == null) {
210  // search for files with same content hash in indexed storage
211  $fileHash = $this->storage->hashFileByIdentifier($identifier, 'sha1');
212  $files = $this->getFileIndexRepository()->findByContentHash($fileHash);
213  $fileObject = null;
214  if (!empty($files)) {
215  foreach ($files as $fileIndexEntry) {
216  // check if file is missing then we assume it's moved/renamed
217  if (!$this->storage->hasFile($fileIndexEntry['identifier'])) {
218  $fileObject = $this->getResourceFactory()->getFileObject($fileIndexEntry['uid'], $fileIndexEntry);
219  $fileObject->updateProperties(array(
220  'identifier' => $identifier
221  ));
222  $this->updateIndexEntry($fileObject);
223  $this->identifiedFileUids[] = $fileObject->getUid();
224  break;
225  }
226  }
227  }
228  // create new index when no missing file with same content hash is found
229  if ($fileObject === null) {
230  $fileObject = $this->createIndexEntry($identifier);
231  $this->identifiedFileUids[] = $fileObject->getUid();
232  }
233  } else {
234  // update existing file
235  $fileObject = $this->getResourceFactory()->getFileObject($data['uid'], $data);
236  $this->updateIndexEntry($fileObject);
237  }
238  }
239  }
240 
247  protected function extractRequiredMetaData(File $fileObject)
248  {
249  // since the core desperately needs image sizes in metadata table do this manually
250  // prevent doing this for remote storages, remote storages must provide the data with extractors
251  if ($fileObject->getType() == File::FILETYPE_IMAGE && $this->storage->getDriverType() === 'Local') {
252  $rawFileLocation = $fileObject->getForLocalProcessing(false);
253  $imageInfo = GeneralUtility::makeInstance(ImageInfo::class, $rawFileLocation);
254  $metaData = array(
255  'width' => $imageInfo->getWidth(),
256  'height' => $imageInfo->getHeight(),
257  );
258  $this->getMetaDataRepository()->update($fileObject->getUid(), $metaData);
259  $fileObject->_updateMetaDataProperties($metaData);
260  }
261  }
262 
263  /****************************
264  *
265  * UTILITY
266  *
267  ****************************/
268 
275  protected function gatherFileInformationArray($identifier)
276  {
277  $fileInfo = $this->storage->getFileInfoByIdentifier($identifier);
278  $fileInfo = $this->transformFromDriverFileInfoArrayToFileObjectFormat($fileInfo);
279  $fileInfo['type'] = $this->getFileType($fileInfo['mime_type']);
280  $fileInfo['sha1'] = $this->storage->hashFileByIdentifier($identifier, 'sha1');
281  $fileInfo['extension'] = \TYPO3\CMS\Core\Utility\PathUtility::pathinfo($fileInfo['name'], PATHINFO_EXTENSION);
282  $fileInfo['missing'] = 0;
283 
284  return $fileInfo;
285  }
286 
293  protected function getFileType($mimeType)
294  {
295  list($fileType) = explode('/', $mimeType);
296  switch (strtolower($fileType)) {
297  case 'text':
298  $type = File::FILETYPE_TEXT;
299  break;
300  case 'image':
301  $type = File::FILETYPE_IMAGE;
302  break;
303  case 'audio':
304  $type = File::FILETYPE_AUDIO;
305  break;
306  case 'video':
307  $type = File::FILETYPE_VIDEO;
308  break;
309  case 'application':
310  case 'software':
312  break;
313  default:
314  $type = File::FILETYPE_UNKNOWN;
315  }
316  return $type;
317  }
318 
329  protected function transformFromDriverFileInfoArrayToFileObjectFormat(array $fileInfo)
330  {
331  $mappingInfo = array(
332  // 'driverKey' => 'fileProperty' Key is from the driver, value is for the property in the file
333  'size' => 'size',
334  'atime' => null,
335  'mtime' => 'modification_date',
336  'ctime' => 'creation_date',
337  'mimetype' => 'mime_type'
338  );
339  $mappedFileInfo = array();
340  foreach ($fileInfo as $key => $value) {
341  if (array_key_exists($key, $mappingInfo)) {
342  if ($mappingInfo[$key] !== null) {
343  $mappedFileInfo[$mappingInfo[$key]] = $value;
344  }
345  } else {
346  $mappedFileInfo[$key] = $value;
347  }
348  }
349  return $mappedFileInfo;
350  }
351 
352 
358  protected function getFileIndexRepository()
359  {
361  }
362 
368  protected function getMetaDataRepository()
369  {
371  }
372 
378  protected function getResourceFactory()
379  {
380  return \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance();
381  }
382 
388  protected function getExtractorRegistry()
389  {
391  }
392 }