TYPO3  7.6
RteHtmlParser.php
Go to the documentation of this file.
1 <?php
2 namespace TYPO3\CMS\Core\Html;
3 
4 /*
5  * This file is part of the TYPO3 CMS project.
6  *
7  * It is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU General Public License, either version 2
9  * of the License, or any later version.
10  *
11  * For the full copyright and license information, please read the
12  * LICENSE.txt file that was distributed with this source code.
13  *
14  * The TYPO3 project - inspiring people to share!
15  */
16 
20 use TYPO3\CMS\Core\Resource;
22 
27 {
31  public $blockElementList = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD,HEADER,SECTION,FOOTER,NAV,ARTICLE,ASIDE';
32 
38  public $recPid = 0;
39 
45  public $elRef = '';
46 
52  public $relPath = '';
53 
59  public $relBackPath = '';
60 
66  public $tsConfig = array();
67 
73  public $procOptions = array();
74 
81 
87  public $rte_p = '';
88 
94  public $getKeepTags_cache = array();
95 
101  public $allowedClasses = array();
102 
108  public $preserveTags = '';
109 
117  public function init($elRef = '', $recPid = 0)
118  {
119  $this->recPid = $recPid;
120  $this->elRef = $elRef;
121  }
122 
131  public function setRelPath($path)
132  {
133  $path = trim($path);
134  $path = preg_replace('/^\\//', '', $path);
135  $path = preg_replace('/\\/$/', '', $path);
136  if ($path) {
137  $this->relPath = $path;
138  $this->relBackPath = '';
139  $partsC = count(explode('/', $this->relPath));
140  for ($a = 0; $a < $partsC; $a++) {
141  $this->relBackPath .= '../';
142  }
143  $this->relPath .= '/';
144  }
145  }
146 
157  public static function evalWriteFile($pArr, $currentRecord)
158  {
160  }
161 
162  /**********************************************
163  *
164  * Main function
165  *
166  **********************************************/
177  public function RTE_transform($value, $specConf, $direction = 'rte', $thisConfig = array())
178  {
179  // Init:
180  $this->tsConfig = $thisConfig;
181  $this->procOptions = (array)$thisConfig['proc.'];
182  $this->preserveTags = strtoupper(implode(',', GeneralUtility::trimExplode(',', $this->procOptions['preserveTags'])));
183  // dynamic configuration of blockElementList
184  if ($this->procOptions['blockElementList']) {
185  $this->blockElementList = $this->procOptions['blockElementList'];
186  }
187  // Get parameters for rte_transformation:
188  $p = ($this->rte_p = BackendUtility::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']));
189  // Setting modes:
190  if ((string)$this->procOptions['overruleMode'] !== '') {
191  $modes = array_unique(GeneralUtility::trimExplode(',', $this->procOptions['overruleMode']));
192  } else {
193  $modes = array_unique(GeneralUtility::trimExplode('-', $p['mode']));
194  }
195  $revmodes = array_flip($modes);
196  // Find special modes and extract them:
197  if (isset($revmodes['ts'])) {
198  $modes[$revmodes['ts']] = 'ts_transform,ts_preserve,ts_images,ts_links';
199  }
200  // Find special modes and extract them:
201  if (isset($revmodes['ts_css'])) {
202  $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
203  }
204  // Make list unique
205  $modes = array_unique(GeneralUtility::trimExplode(',', implode(',', $modes), true));
206  // Reverse order if direction is "rte"
207  if ($direction == 'rte') {
208  $modes = array_reverse($modes);
209  }
210  // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independent processing options you can set up:
211  $entry_HTMLparser = $this->procOptions['entryHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_' . $direction . '.']) : '';
212  $exit_HTMLparser = $this->procOptions['exitHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_' . $direction . '.']) : '';
213  // Line breaks of content is unified into char-10 only (removing char 13)
214  if (!$this->procOptions['disableUnifyLineBreaks']) {
215  $value = str_replace(CRLF, LF, $value);
216  }
217  // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
218  if (is_array($entry_HTMLparser)) {
219  $value = $this->HTMLcleaner($value, $entry_HTMLparser[0], $entry_HTMLparser[1], $entry_HTMLparser[2], $entry_HTMLparser[3]);
220  }
221  // Traverse modes:
222  foreach ($modes as $cmd) {
223  // ->DB
224  if ($direction == 'db') {
225  // Checking for user defined transformation:
226  if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
227  $_procObj = GeneralUtility::getUserObj($_classRef);
228  $_procObj->pObj = $this;
229  $_procObj->transformationKey = $cmd;
230  $value = $_procObj->transform_db($value, $this);
231  } else {
232  // ... else use defaults:
233  switch ($cmd) {
234  case 'ts_images':
235  $value = $this->TS_images_db($value);
236  break;
237  case 'ts_reglinks':
238  $value = $this->TS_reglinks($value, 'db');
239  break;
240  case 'ts_links':
241  $value = $this->TS_links_db($value);
242  break;
243  case 'ts_preserve':
244  $value = $this->TS_preserve_db($value);
245  break;
246  case 'ts_transform':
247 
248  case 'css_transform':
249  $this->allowedClasses = GeneralUtility::trimExplode(',', $this->procOptions['allowedClasses'], true);
250  // CR has a very disturbing effect, so just remove all CR and rely on LF
251  $value = str_replace(CR, '', $value);
252  // Transform empty paragraphs into spacing paragraphs
253  $value = str_replace('<p></p>', '<p>&nbsp;</p>', $value);
254  // Double any trailing spacing paragraph so that it does not get removed by divideIntoLines()
255  $value = preg_replace('/<p>&nbsp;<\/p>$/', '<p>&nbsp;</p>' . '<p>&nbsp;</p>', $value);
256  $value = $this->TS_transform_db($value, $cmd == 'css_transform');
257  break;
258  case 'ts_strip':
259  $value = $this->TS_strip_db($value);
260  break;
261  default:
262  // Do nothing
263  }
264  }
265  }
266  // ->RTE
267  if ($direction == 'rte') {
268  // Checking for user defined transformation:
269  if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
270  $_procObj = GeneralUtility::getUserObj($_classRef);
271  $_procObj->pObj = $this;
272  $value = $_procObj->transform_rte($value, $this);
273  } else {
274  // ... else use defaults:
275  switch ($cmd) {
276  case 'ts_images':
277  $value = $this->TS_images_rte($value);
278  break;
279  case 'ts_reglinks':
280  $value = $this->TS_reglinks($value, 'rte');
281  break;
282  case 'ts_links':
283  $value = $this->TS_links_rte($value);
284  break;
285  case 'ts_preserve':
286  $value = $this->TS_preserve_rte($value);
287  break;
288  case 'ts_transform':
289 
290  case 'css_transform':
291  // Has a very disturbing effect, so just remove all '13' - depend on '10'
292  $value = str_replace(CR, '', $value);
293  $value = $this->TS_transform_rte($value, $cmd == 'css_transform');
294  break;
295  default:
296  // Do nothing
297  }
298  }
299  }
300  }
301  // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
302  if (is_array($exit_HTMLparser)) {
303  $value = $this->HTMLcleaner($value, $exit_HTMLparser[0], $exit_HTMLparser[1], $exit_HTMLparser[2], $exit_HTMLparser[3]);
304  }
305  // Final clean up of linebreaks:
306  if (!$this->procOptions['disableUnifyLineBreaks']) {
307  // Make sure no \r\n sequences has entered in the meantime...
308  $value = str_replace(CRLF, LF, $value);
309  // ... and then change all \n into \r\n
310  $value = str_replace(LF, CRLF, $value);
311  }
312  // Return value:
313  return $value;
314  }
315 
316  /************************************
317  *
318  * Specific RTE TRANSFORMATION functions
319  *
320  *************************************/
332  public function TS_images_db($value)
333  {
334  // Split content by <img> tags and traverse the resulting array for processing:
335  $imgSplit = $this->splitTags('img', $value);
336  if (count($imgSplit) > 1) {
337  $siteUrl = $this->siteUrl();
338  $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
340  $resourceFactory = Resource\ResourceFactory::getInstance();
342  $magicImageService = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Resource\Service\MagicImageService::class);
343  $magicImageService->setMagicImageMaximumDimensions($this->tsConfig);
344  foreach ($imgSplit as $k => $v) {
345  // Image found, do processing:
346  if ($k % 2) {
347  // Get attributes
348  $attribArray = $this->get_tag_attributes_classic($v, 1);
349  // It's always an absolute URL coming from the RTE into the Database.
350  $absoluteUrl = trim($attribArray['src']);
351  // Make path absolute if it is relative and we have a site path which is not '/'
352  $pI = pathinfo($absoluteUrl);
353  if ($sitePath && !$pI['scheme'] && GeneralUtility::isFirstPartOfStr($absoluteUrl, $sitePath)) {
354  // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
355  $absoluteUrl = substr($absoluteUrl, strlen($sitePath));
356  $absoluteUrl = $siteUrl . $absoluteUrl;
357  }
358  // Image dimensions set in the img tag, if any
359  $imgTagDimensions = $this->getWHFromAttribs($attribArray);
360  if ($imgTagDimensions[0]) {
361  $attribArray['width'] = $imgTagDimensions[0];
362  }
363  if ($imgTagDimensions[1]) {
364  $attribArray['height'] = $imgTagDimensions[1];
365  }
366  $originalImageFile = null;
367  if ($attribArray['data-htmlarea-file-uid']) {
368  // An original image file uid is available
369  try {
371  $originalImageFile = $resourceFactory->getFileObject(intval($attribArray['data-htmlarea-file-uid']));
372  } catch (Resource\Exception\FileDoesNotExistException $fileDoesNotExistException) {
373  // Log the fact the file could not be retrieved.
374  $message = sprintf('Could not find file with uid "%s"', $attribArray['data-htmlarea-file-uid']);
375  $this->getLogger()->error($message);
376  }
377  }
378  if ($originalImageFile instanceof Resource\File) {
379  // Public url of local file is relative to the site url, absolute otherwise
380  if ($absoluteUrl == $originalImageFile->getPublicUrl() || $absoluteUrl == $siteUrl . $originalImageFile->getPublicUrl()) {
381  // This is a plain image, i.e. reference to the original image
382  if ($this->procOptions['plainImageMode']) {
383  // "plain image mode" is configured
384  // Find the dimensions of the original image
385  $imageInfo = array(
386  $originalImageFile->getProperty('width'),
387  $originalImageFile->getProperty('height')
388  );
389  if (!$imageInfo[0] || !$imageInfo[1]) {
390  $filePath = $originalImageFile->getForLocalProcessing(false);
391  $imageInfo = @getimagesize($filePath);
392  }
393  $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
394  }
395  } else {
396  // Magic image case: get a processed file with the requested configuration
397  $imageConfiguration = array(
398  'width' => $imgTagDimensions[0],
399  'height' => $imgTagDimensions[1]
400  );
401  $magicImage = $magicImageService->createMagicImage($originalImageFile, $imageConfiguration);
402  $attribArray['width'] = $magicImage->getProperty('width');
403  $attribArray['height'] = $magicImage->getProperty('height');
404  $attribArray['src'] = $magicImage->getPublicUrl();
405  }
406  } elseif (!GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl) && !$this->procOptions['dontFetchExtPictures'] && TYPO3_MODE === 'BE') {
407  // External image from another URL: in that case, fetch image, unless the feature is disabled or we are not in backend mode
408  // Fetch the external image
409  $externalFile = $this->getUrl($absoluteUrl);
410  if ($externalFile) {
411  $pU = parse_url($absoluteUrl);
412  $pI = pathinfo($pU['path']);
413  if (GeneralUtility::inList('gif,png,jpeg,jpg', strtolower($pI['extension']))) {
414  $fileName = GeneralUtility::shortMD5($absoluteUrl) . '.' . $pI['extension'];
415  // We insert this image into the user default upload folder
416  list($table, $field) = explode(':', $this->elRef);
417  $folder = $GLOBALS['BE_USER']->getDefaultUploadFolder($this->recPid, $table, $field);
418  $fileObject = $folder->createFile($fileName)->setContents($externalFile);
419  $imageConfiguration = array(
420  'width' => $attribArray['width'],
421  'height' => $attribArray['height']
422  );
423  $magicImage = $magicImageService->createMagicImage($fileObject, $imageConfiguration);
424  $attribArray['width'] = $magicImage->getProperty('width');
425  $attribArray['height'] = $magicImage->getProperty('height');
426  $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
427  $attribArray['src'] = $magicImage->getPublicUrl();
428  }
429  }
430  } elseif (GeneralUtility::isFirstPartOfStr($absoluteUrl, $siteUrl)) {
431  // Finally, check image as local file (siteURL equals the one of the image)
432  // Image has no data-htmlarea-file-uid attribute
433  // Relative path, rawurldecoded for special characters.
434  $path = rawurldecode(substr($absoluteUrl, strlen($siteUrl)));
435  // Absolute filepath, locked to relative path of this project
436  $filepath = GeneralUtility::getFileAbsFileName($path);
437  // Check file existence (in relative directory to this installation!)
438  if ($filepath && @is_file($filepath)) {
439  // Treat it as a plain image
440  if ($this->procOptions['plainImageMode']) {
441  // If "plain image mode" has been configured
442  // Find the original dimensions of the image
443  $imageInfo = @getimagesize($filepath);
444  $attribArray = $this->applyPlainImageModeSettings($imageInfo, $attribArray);
445  }
446  // Let's try to find a file uid for this image
447  try {
448  $fileOrFolderObject = $resourceFactory->retrieveFileOrFolderObject($path);
449  if ($fileOrFolderObject instanceof Resource\FileInterface) {
450  $fileIdentifier = $fileOrFolderObject->getIdentifier();
451  $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
452  // @todo if the retrieved file is a processed file, get the original file...
453  $attribArray['data-htmlarea-file-uid'] = $fileObject->getUid();
454  }
455  } catch (Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
456  // Nothing to be done if file/folder not found
457  }
458  }
459  }
460  // Remove width and height from style attribute
461  $attribArray['style'] = preg_replace('/((?:^|)\\s*(?:width|height)\\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
462  // Must have alt attribute
463  if (!isset($attribArray['alt'])) {
464  $attribArray['alt'] = '';
465  }
466  // Convert absolute to relative url
467  if (GeneralUtility::isFirstPartOfStr($attribArray['src'], $siteUrl)) {
468  $attribArray['src'] = $this->relBackPath . substr($attribArray['src'], strlen($siteUrl));
469  }
470  $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
471  }
472  }
473  }
474  return implode('', $imgSplit);
475  }
476 
485  public function TS_images_rte($value)
486  {
487  // Split content by <img> tags and traverse the resulting array for processing:
488  $imgSplit = $this->splitTags('img', $value);
489  if (count($imgSplit) > 1) {
490  $siteUrl = $this->siteUrl();
491  $sitePath = str_replace(GeneralUtility::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
492  foreach ($imgSplit as $k => $v) {
493  // Image found
494  if ($k % 2) {
495  // Get the attributes of the img tag
496  $attribArray = $this->get_tag_attributes_classic($v, 1);
497  $absoluteUrl = trim($attribArray['src']);
498  // Transform the src attribute into an absolute url, if it not already
499  if (strtolower(substr($absoluteUrl, 0, 4)) !== 'http') {
500  $attribArray['src'] = substr($attribArray['src'], strlen($this->relBackPath));
501  // If site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
502  $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
503  $attribArray['src'] = $siteUrl . $attribArray['src'];
504  }
505  // Must have alt attribute
506  if (!isset($attribArray['alt'])) {
507  $attribArray['alt'] = '';
508  }
509  $imgSplit[$k] = '<img ' . GeneralUtility::implodeAttributes($attribArray, 1, 1) . ' />';
510  }
511  }
512  }
513  // Return processed content:
514  return implode('', $imgSplit);
515  }
516 
525  public function TS_reglinks($value, $direction)
526  {
527  $retVal = '';
528  switch ($direction) {
529  case 'rte':
530  $retVal = $this->TS_AtagToAbs($value, 1);
531  break;
532  case 'db':
533  $siteURL = $this->siteUrl();
534  $blockSplit = $this->splitIntoBlock('A', $value);
535  foreach ($blockSplit as $k => $v) {
536  // Block
537  if ($k % 2) {
538  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
539  // If the url is local, remove url-prefix
540  if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
541  $attribArray['href'] = $this->relBackPath . substr($attribArray['href'], strlen($siteURL));
542  }
543  $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
544  $eTag = '</a>';
545  $blockSplit[$k] = $bTag . $this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]), $direction) . $eTag;
546  }
547  }
548  $retVal = implode('', $blockSplit);
549  break;
550  }
551  return $retVal;
552  }
553 
562  public function TS_links_db($value)
563  {
564  $conf = array();
565  // Split content into <a> tag blocks and process:
566  $blockSplit = $this->splitIntoBlock('A', $value);
567  foreach ($blockSplit as $k => $v) {
568  // If an A-tag was found:
569  if ($k % 2) {
570  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
571  $info = $this->urlInfoForLinkTags($attribArray['href']);
572  // Check options:
573  $attribArray_copy = $attribArray;
574  unset($attribArray_copy['href']);
575  unset($attribArray_copy['target']);
576  unset($attribArray_copy['class']);
577  unset($attribArray_copy['title']);
578  unset($attribArray_copy['data-htmlarea-external']);
579  // Unset "rteerror" and "style" attributes if "rteerror" is set!
580  if ($attribArray_copy['rteerror']) {
581  unset($attribArray_copy['style']);
582  unset($attribArray_copy['rteerror']);
583  }
584  // Remove additional parameters
585  if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'])) {
586  $parameters = array(
587  'conf' => &$conf,
588  'aTagParams' => &$attribArray_copy
589  );
590  foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['removeParams_PostProc'] as $objRef) {
591  $processor = GeneralUtility::getUserObj($objRef);
592  $attribArray_copy = $processor->removeParams($parameters, $this);
593  }
594  }
595  // Only if href, target, class and tile are the only attributes, we can alter the link!
596  if (empty($attribArray_copy)) {
597  // Quoting class and title attributes if they contain spaces
598  $attribArray['class'] = preg_match('/ /', $attribArray['class']) ? '"' . $attribArray['class'] . '"' : $attribArray['class'];
599  $attribArray['title'] = preg_match('/ /', $attribArray['title']) ? '"' . $attribArray['title'] . '"' : $attribArray['title'];
600  // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
601  // If data-htmlarea-external attribute is set, keep the href unchanged
602  if ($attribArray['data-htmlarea-external']) {
603  $href = $attribArray['href'];
604  } else {
605  $href = $info['url'] . ($info['query'] ? ',0,' . $info['query'] : '');
606  }
607  $bTag = '<link ' . $href . ($attribArray['target'] ? ' ' . $attribArray['target'] : ($attribArray['class'] || $attribArray['title'] ? ' -' : '')) . ($attribArray['class'] ? ' ' . $attribArray['class'] : ($attribArray['title'] ? ' -' : '')) . ($attribArray['title'] ? ' ' . $attribArray['title'] : '') . '>';
608  $eTag = '</link>';
609  // Modify parameters
610  if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'])) {
611  $parameters = array(
612  'conf' => &$conf,
613  'currentBlock' => $v,
614  'url' => $href,
615  'attributes' => $attribArray
616  );
617  foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksDb_PostProc'] as $objRef) {
618  $processor = GeneralUtility::getUserObj($objRef);
619  $blockSplit[$k] = $processor->modifyParamsLinksDb($parameters, $this);
620  }
621  } else {
622  $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
623  }
624  } else {
625  // ... otherwise store the link as a-tag.
626  // Unsetting 'rtekeep' attribute if that had been set.
627  unset($attribArray['rtekeep']);
628  if (!$attribArray['data-htmlarea-external']) {
629  $siteURL = $this->siteUrl();
630  // If the url is local, remove url-prefix
631  if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
632  $attribArray['href'] = $this->relBackPath . substr($attribArray['href'], strlen($siteURL));
633  }
634  // Check for FAL link-handler keyword
635  list($linkHandlerKeyword, $linkHandlerValue) = explode(':', $attribArray['href'], 2);
636  if ($linkHandlerKeyword === '?file') {
637  try {
638  $fileOrFolderObject = \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject(rawurldecode($linkHandlerValue));
639  if ($fileOrFolderObject instanceof \TYPO3\CMS\Core\Resource\FileInterface || $fileOrFolderObject instanceof \TYPO3\CMS\Core\Resource\Folder) {
640  $attribArray['href'] = $fileOrFolderObject->getPublicUrl();
641  }
642  } catch (\TYPO3\CMS\Core\Resource\Exception\ResourceDoesNotExistException $resourceDoesNotExistException) {
643  // The indentifier inserted in the RTE is already gone...
644  }
645  }
646  }
647  unset($attribArray['data-htmlarea-external']);
648  $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
649  $eTag = '</a>';
650  $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
651  }
652  }
653  }
654  return implode('', $blockSplit);
655  }
656 
665  public function TS_links_rte($value)
666  {
667  $conf = array();
668  $value = $this->TS_AtagToAbs($value);
669  // Split content by the TYPO3 pseudo tag "<link>":
670  $blockSplit = $this->splitIntoBlock('link', $value, 1);
671  $siteUrl = $this->siteUrl();
672  foreach ($blockSplit as $k => $v) {
673  $error = '';
674  $external = false;
675  // Block
676  if ($k % 2) {
677  // split away the first "<link" part
678  $typolink = explode(' ', substr($this->getFirstTag($v), 0, -1), 2)[1];
679  $tagCode = GeneralUtility::makeInstance(TypoLinkCodecService::class)->decode($typolink);
680 
681  $link_param = $tagCode['url'];
682  // Parsing the typolink data. This parsing is roughly done like in \TYPO3\CMS\Frontend\ContentObject->typoLink()
683  // Parse URL:
684  $pU = parse_url($link_param);
685  if (strstr($link_param, '@') && (!$pU['scheme'] || $pU['scheme'] == 'mailto')) {
686  // mailadr
687  $href = 'mailto:' . preg_replace('/^mailto:/i', '', $link_param);
688  } elseif ($link_param[0] === '#') {
689  // check if anchor
690  $href = $siteUrl . $link_param;
691  } else {
692  // Check for FAL link-handler keyword:
693  list($linkHandlerKeyword, $linkHandlerValue) = explode(':', trim($link_param), 2);
694  if ($linkHandlerKeyword === 'file' && !StringUtility::beginsWith($link_param, 'file://')) {
695  $href = $siteUrl . '?' . $linkHandlerKeyword . ':' . rawurlencode($linkHandlerValue);
696  } else {
697  $fileChar = (int)strpos($link_param, '/');
698  $urlChar = (int)strpos($link_param, '.');
699  // Detects if a file is found in site-root.
700  list($rootFileDat) = explode('?', $link_param);
701  $rFD_fI = pathinfo($rootFileDat);
702  if (trim($rootFileDat) && !strstr($link_param, '/') && (@is_file((PATH_site . $rootFileDat)) || GeneralUtility::inList('php,html,htm', strtolower($rFD_fI['extension'])))) {
703  $href = $siteUrl . $link_param;
704  } elseif (
705  (
706  $pU['scheme']
707  && !isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][$pU['scheme']])
708  )
709  || $urlChar && (!$fileChar || $urlChar < $fileChar)
710  ) {
711  // url (external): if has scheme or if a '.' comes before a '/'.
712  $href = $link_param;
713  if (!$pU['scheme']) {
714  $href = 'http://' . $href;
715  }
716  $external = true;
717  } elseif ($fileChar) {
718  // It is an internal file or folder
719  // Try to transform the href into a FAL reference
720  try {
721  $fileOrFolderObject = \TYPO3\CMS\Core\Resource\ResourceFactory::getInstance()->retrieveFileOrFolderObject($link_param);
722  } catch (\TYPO3\CMS\Core\Resource\Exception $exception) {
723  // Nothing to be done if file/folder not found or path invalid
724  $fileOrFolderObject = null;
725  }
726  if ($fileOrFolderObject instanceof \TYPO3\CMS\Core\Resource\Folder) {
727  // It's a folder
728  $folderIdentifier = $fileOrFolderObject->getIdentifier();
729  $href = $siteUrl . '?file:' . rawurlencode($folderIdentifier);
730  } elseif ($fileOrFolderObject instanceof \TYPO3\CMS\Core\Resource\FileInterface) {
731  // It's a file
732  $fileIdentifier = $fileOrFolderObject->getIdentifier();
733  $fileObject = $fileOrFolderObject->getStorage()->getFile($fileIdentifier);
734  $href = $siteUrl . '?file:' . $fileObject->getUid();
735  } else {
736  $href = $siteUrl . $link_param;
737  }
738  } else {
739  // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
740  // Splitting the parameter by ',' and if the array counts more than 1 element it's an id/type/parameters triplet
741  $pairParts = GeneralUtility::trimExplode(',', $link_param, true);
742  $idPart = $pairParts[0];
743  $link_params_parts = explode('#', $idPart);
744  $idPart = trim($link_params_parts[0]);
745  $sectionMark = trim($link_params_parts[1]);
746  if ((string)$idPart === '') {
747  $idPart = $this->recPid;
748  }
749  // If no id or alias is given, set it to class record pid
750  // Checking if the id-parameter is an alias.
751  if (!\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($idPart)) {
752  list($idPartR) = BackendUtility::getRecordsByField('pages', 'alias', $idPart);
753  $idPart = (int)$idPartR['uid'];
754  }
755  $page = BackendUtility::getRecord('pages', $idPart);
756  if (is_array($page)) {
757  // Page must exist...
758  $href = $siteUrl . '?id=' . $idPart . ($pairParts[2] ? $pairParts[2] : '') . ($sectionMark ? '#' . $sectionMark : '');
759  } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][array_shift(explode(':', $link_param))])) {
760  $href = $link_param;
761  } else {
762  $href = $siteUrl . '?id=' . $link_param;
763  $error = 'No page found: ' . $idPart;
764  }
765  }
766  }
767  }
768  // Setting the A-tag:
769  $bTag = '<a href="' . htmlspecialchars($href) . '"'
770  . ($tagCode['target'] ? ' target="' . htmlspecialchars($tagCode['target']) . '"' : '')
771  . ($tagCode['class'] ? ' class="' . htmlspecialchars($tagCode['class']) . '"' : '')
772  . ($tagCode['title'] ? ' title="' . htmlspecialchars($tagCode['title']) . '"' : '')
773  . ($external ? ' data-htmlarea-external="1"' : '')
774  . ($error ? ' rteerror="' . htmlspecialchars($error) . '" style="background-color: yellow; border:2px red solid; color: black;"' : '') . '>';
775  $eTag = '</a>';
776  // Modify parameters
777  if (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc']) && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'])) {
778  $parameters = array(
779  'conf' => &$conf,
780  'currentBlock' => $v,
781  'url' => $href,
782  'tagCode' => $tagCode,
783  'external' => $external,
784  'error' => $error
785  );
786  foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['modifyParams_LinksRte_PostProc'] as $objRef) {
787  $processor = GeneralUtility::getUserObj($objRef);
788  $blockSplit[$k] = $processor->modifyParamsLinksRte($parameters, $this);
789  }
790  } else {
791  $blockSplit[$k] = $bTag . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
792  }
793  }
794  }
795  // Return content:
796  return implode('', $blockSplit);
797  }
798 
805  public function TS_preserve_db($value)
806  {
807  if (!$this->preserveTags) {
808  return $value;
809  }
810  // Splitting into blocks for processing (span-tags are used for special tags)
811  $blockSplit = $this->splitIntoBlock('span', $value);
812  foreach ($blockSplit as $k => $v) {
813  // Block
814  if ($k % 2) {
815  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v));
816  if ($attribArray['specialtag']) {
817  $theTag = rawurldecode($attribArray['specialtag']);
818  $theTagName = $this->getFirstTagName($theTag);
819  $blockSplit[$k] = $theTag . $this->removeFirstAndLastTag($blockSplit[$k]) . '</' . $theTagName . '>';
820  }
821  }
822  }
823  return implode('', $blockSplit);
824  }
825 
832  public function TS_preserve_rte($value)
833  {
834  if (!$this->preserveTags) {
835  return $value;
836  }
837  $blockSplit = $this->splitIntoBlock($this->preserveTags, $value);
838  foreach ($blockSplit as $k => $v) {
839  // Block
840  if ($k % 2) {
841  $blockSplit[$k] = '<span specialtag="' . rawurlencode($this->getFirstTag($v)) . '">' . $this->removeFirstAndLastTag($blockSplit[$k]) . '</span>';
842  }
843  }
844  return implode('', $blockSplit);
845  }
846 
856  public function TS_transform_db($value, $css = false)
857  {
858  // Safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
859  $this->TS_transform_db_safecounter--;
860  if ($this->TS_transform_db_safecounter < 0) {
861  return $value;
862  }
863  // Split the content from RTE by the occurrence of these blocks:
864  $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,' . ($this->procOptions['preserveDIVSections'] ? 'DIV,' : '') . $this->blockElementList, $value);
865  $cc = 0;
866  $aC = count($blockSplit);
867  // Avoid superfluous linebreaks by transform_db after ending headListTag
868  while ($aC && trim($blockSplit[($aC - 1)]) === '') {
869  unset($blockSplit[$aC - 1]);
870  $aC = count($blockSplit);
871  }
872  // Traverse the blocks
873  foreach ($blockSplit as $k => $v) {
874  $cc++;
875  $lastBR = $cc == $aC ? '' : LF;
876  if ($k % 2) {
877  // Inside block:
878  // Init:
879  $tag = $this->getFirstTag($v);
880  $tagName = strtolower($this->getFirstTagName($v));
881  // Process based on the tag:
882  switch ($tagName) {
883  case 'blockquote':
884 
885  case 'dd':
886 
887  case 'div':
888 
889  case 'header':
890 
891  case 'section':
892 
893  case 'footer':
894 
895  case 'nav':
896 
897  case 'article':
898 
899  case 'aside':
900  $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]), $css) . '</' . $tagName . '>' . $lastBR;
901  break;
902  case 'ol':
903 
904  case 'ul':
905  if ($css) {
906  $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
907  }
908  break;
909  case 'table':
910  // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
911  if (!$this->procOptions['preserveTables'] && !$css) {
912  $blockSplit[$k] = $this->TS_transform_db($this->removeTables($blockSplit[$k]));
913  } else {
914  $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
915  }
916  break;
917  case 'h1':
918 
919  case 'h2':
920 
921  case 'h3':
922 
923  case 'h4':
924 
925  case 'h5':
926 
927  case 'h6':
928  if (!$css) {
929  $attribArray = $this->get_tag_attributes_classic($tag);
930  // Processing inner content here:
931  $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
932  $blockSplit[$k] = '<' . $tagName . ($attribArray['align'] ? ' align="' . htmlspecialchars($attribArray['align']) . '"' : '') . ($attribArray['class'] ? ' class="' . htmlspecialchars($attribArray['class']) . '"' : '') . '>' . $innerContent . '</' . $tagName . '>' . $lastBR;
933  } else {
934  // Eliminate true linebreaks inside Hx tags
935  $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
936  }
937  break;
938  default:
939  // Eliminate true linebreaks inside other headlist tags
940  $blockSplit[$k] = preg_replace(('/[' . LF . CR . ']+/'), ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
941  }
942  } else {
943  // NON-block:
944  if (trim($blockSplit[$k]) !== '') {
945  $blockSplit[$k] = preg_replace('/<hr\\/>/', '<hr />', $blockSplit[$k]);
946  // Remove linebreaks preceding hr tags
947  $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/', '<$1$2/>', $blockSplit[$k]);
948  // Remove linebreaks following hr tags
949  $blockSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>[' . LF . CR . ']+/', '<$1$2/>', $blockSplit[$k]);
950  // Replace other linebreaks with space
951  $blockSplit[$k] = preg_replace('/[' . LF . CR . ']+/', ' ', $blockSplit[$k]);
952  $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]) . $lastBR;
953  $blockSplit[$k] = $this->transformStyledATags($blockSplit[$k]);
954  } else {
955  unset($blockSplit[$k]);
956  }
957  }
958  }
959  $this->TS_transform_db_safecounter++;
960  return implode('', $blockSplit);
961  }
962 
969  public function transformStyledATags($value)
970  {
971  $blockSplit = $this->splitIntoBlock('A', $value);
972  foreach ($blockSplit as $k => $v) {
973  // If an A-tag was found
974  if ($k % 2) {
975  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
976  // If "style" attribute is set and rteerror is not set!
977  if ($attribArray['style'] && !$attribArray['rteerror']) {
978  $attribArray_copy['style'] = $attribArray['style'];
979  unset($attribArray['style']);
980  $bTag = '<span ' . GeneralUtility::implodeAttributes($attribArray_copy, 1) . '><a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
981  $eTag = '</a></span>';
982  $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
983  }
984  }
985  }
986  return implode('', $blockSplit);
987  }
988 
998  public function TS_transform_rte($value, $css = 0)
999  {
1000  // Split the content from database by the occurrence of the block elements
1001  $blockElementList = 'TABLE,BLOCKQUOTE,' . ($this->procOptions['preserveDIVSections'] ? 'DIV,' : '') . $this->blockElementList;
1002  $blockSplit = $this->splitIntoBlock($blockElementList, $value);
1003  // Traverse the blocks
1004  foreach ($blockSplit as $k => $v) {
1005  if ($k % 2) {
1006  // Inside one of the blocks:
1007  // Init:
1008  $tag = $this->getFirstTag($v);
1009  $tagName = strtolower($this->getFirstTagName($v));
1010  $attribArray = $this->get_tag_attributes_classic($tag);
1011  // Based on tagname, we do transformations:
1012  switch ($tagName) {
1013  case 'blockquote':
1014 
1015  case 'dd':
1016 
1017  case 'div':
1018 
1019  case 'header':
1020 
1021  case 'section':
1022 
1023  case 'footer':
1024 
1025  case 'nav':
1026 
1027  case 'article':
1028 
1029  case 'aside':
1030  $blockSplit[$k] = $tag . $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]), $css) . '</' . $tagName . '>';
1031  break;
1032  }
1033  $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]);
1034  } else {
1035  // NON-block:
1036  $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
1037  $onlyLineBreaks = (preg_match('/^[ ]*' . LF . '+[ ]*$/', $blockSplit[$k]) == 1);
1038  // If the line is followed by a block or is the last line:
1039  if (GeneralUtility::inList($blockElementList, $nextFTN) || !isset($blockSplit[$k + 1])) {
1040  // If the line contains more than just linebreaks, reduce the number of trailing linebreaks by 1
1041  if (!$onlyLineBreaks) {
1042  $blockSplit[$k] = preg_replace('/(' . LF . '*)' . LF . '[ ]*$/', '$1', $blockSplit[$k]);
1043  } else {
1044  // If the line contains only linebreaks, remove the leading linebreak
1045  $blockSplit[$k] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k]);
1046  }
1047  }
1048  // If $blockSplit[$k] is blank then unset the line, unless the line only contained linebreaks
1049  if ((string)$blockSplit[$k] === '' && !$onlyLineBreaks) {
1050  unset($blockSplit[$k]);
1051  } else {
1052  $blockSplit[$k] = $this->setDivTags($blockSplit[$k], $this->procOptions['useDIVasParagraphTagForRTE'] ? 'div' : 'p');
1053  }
1054  }
1055  }
1056  return implode(LF, $blockSplit);
1057  }
1058 
1066  public function TS_strip_db($value)
1067  {
1068  $value = strip_tags($value, '<' . implode('><', explode(',', 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')) . '>');
1069  return $value;
1070  }
1071 
1072  /***************************************************************
1073  *
1074  * Generic RTE transformation, analysis and helper functions
1075  *
1076  **************************************************************/
1084  public function getUrl($url)
1085  {
1086  return GeneralUtility::getUrl($url);
1087  }
1088 
1099  public function HTMLcleaner_db($content, $tagList = '')
1100  {
1101  if (!$tagList) {
1102  $keepTags = $this->getKeepTags('db');
1103  } else {
1104  $keepTags = $this->getKeepTags('db', $tagList);
1105  }
1106  // Default: remove unknown tags.
1107  $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0;
1108  // Default: re-convert literals to characters (that is &lt; to <)
1109  $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1;
1110  // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
1111  $addConfig = array();
1112  if (is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning'] || is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning'] || is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning']) {
1113  $addConfig['xhtml'] = 1;
1114  }
1115  return $this->HTMLcleaner($content, $keepTags, $kUknown, $hSC, $addConfig);
1116  }
1117 
1127  public function getKeepTags($direction = 'rte', $tagList = '')
1128  {
1129  if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
1130  // Setting up allowed tags:
1131  // If the $tagList input var is set, this will take precedence
1132  if ((string)$tagList !== '') {
1133  $keepTags = array_flip(GeneralUtility::trimExplode(',', $tagList, true));
1134  } else {
1135  // Default is to get allowed/denied tags from internal array of processing options:
1136  // Construct default list of tags to keep:
1137  $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
1138  $keepTags = array_flip(GeneralUtility::trimExplode(',', $typoScript_list . ',' . strtolower($this->procOptions['allowTags']), true));
1139  // For tags to deny, remove them from $keepTags array:
1140  $denyTags = GeneralUtility::trimExplode(',', $this->procOptions['denyTags'], true);
1141  foreach ($denyTags as $dKe) {
1142  unset($keepTags[$dKe]);
1143  }
1144  }
1145  // Based on the direction of content, set further options:
1146  switch ($direction) {
1147  case 'rte':
1148  if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1149  // Transform bold/italics tags to strong/em
1150  if (isset($keepTags['b'])) {
1151  $keepTags['b'] = array('remap' => 'STRONG');
1152  }
1153  if (isset($keepTags['i'])) {
1154  $keepTags['i'] = array('remap' => 'EM');
1155  }
1156  }
1157  // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
1158  list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
1159  break;
1160  case 'db':
1161  if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
1162  // Transform strong/em back to bold/italics:
1163  if (isset($keepTags['strong'])) {
1164  $keepTags['strong'] = array('remap' => 'b');
1165  }
1166  if (isset($keepTags['em'])) {
1167  $keepTags['em'] = array('remap' => 'i');
1168  }
1169  }
1170  // Setting up span tags if they are allowed:
1171  if (isset($keepTags['span'])) {
1172  $classes = array_merge(array(''), $this->allowedClasses);
1173  $keepTags['span'] = array(
1174  'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir,itemscope,itemtype,itemprop',
1175  'fixAttrib' => array(
1176  'class' => array(
1177  'list' => $classes,
1178  'removeIfFalse' => 1
1179  )
1180  ),
1181  'rmTagIfNoAttrib' => 1
1182  );
1183  if (!$this->procOptions['allowedClasses']) {
1184  unset($keepTags['span']['fixAttrib']['class']['list']);
1185  }
1186  }
1187  // Setting up font tags if they are allowed:
1188  if (isset($keepTags['font'])) {
1189  $colors = array_merge(array(''), GeneralUtility::trimExplode(',', $this->procOptions['allowedFontColors'], true));
1190  $keepTags['font'] = array(
1191  'allowedAttribs' => 'face,color,size',
1192  'fixAttrib' => array(
1193  'face' => array(
1194  'removeIfFalse' => 1
1195  ),
1196  'color' => array(
1197  'removeIfFalse' => 1,
1198  'list' => $colors
1199  ),
1200  'size' => array(
1201  'removeIfFalse' => 1
1202  )
1203  ),
1204  'rmTagIfNoAttrib' => 1
1205  );
1206  if (!$this->procOptions['allowedFontColors']) {
1207  unset($keepTags['font']['fixAttrib']['color']['list']);
1208  }
1209  }
1210  // Setting further options, getting them from the processiong options:
1211  $TSc = $this->procOptions['HTMLparser_db.'];
1212  if (!$TSc['globalNesting']) {
1213  $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
1214  }
1215  if (!$TSc['noAttrib']) {
1216  $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
1217  }
1218  // Transforming the array from TypoScript to regular array:
1219  list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
1220  break;
1221  }
1222  // Caching (internally, in object memory) the result unless tagList is set:
1223  if (!$tagList) {
1224  $this->getKeepTags_cache[$direction] = $keepTags;
1225  } else {
1226  return $keepTags;
1227  }
1228  }
1229  // Return result:
1230  return $this->getKeepTags_cache[$direction];
1231  }
1232 
1245  public function divideIntoLines($value, $count = 5, $returnArray = false)
1246  {
1247  // Setting configuration for processing:
1248  $allowTagsOutside = GeneralUtility::trimExplode(',', strtolower($this->procOptions['allowTagsOutside'] ? 'hr,' . $this->procOptions['allowTagsOutside'] : 'hr,img'), true);
1249  $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
1250  $divSplit = $this->splitIntoBlock('div,p', $value, 1);
1251  // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
1252  if ($this->procOptions['keepPDIVattribs']) {
1253  $keepAttribListArr = GeneralUtility::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), true);
1254  } else {
1255  $keepAttribListArr = array();
1256  }
1257  // Returns plainly the value if there was no div/p sections in it
1258  if (count($divSplit) <= 1 || $count <= 0) {
1259  // Wrap hr tags with LF's
1260  $newValue = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $value);
1261  $newValue = preg_replace('/' . LF . LF . '/i', LF, $newValue);
1262  $newValue = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $newValue);
1263  return $newValue;
1264  }
1265  // Traverse the splitted sections:
1266  foreach ($divSplit as $k => $v) {
1267  if ($k % 2) {
1268  // Inside
1269  $v = $this->removeFirstAndLastTag($v);
1270  // Fetching 'sub-lines' - which will explode any further p/div nesting...
1271  $subLines = $this->divideIntoLines($v, $count - 1, 1);
1272  // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
1273  if (is_array($subLines)) {
1274  } else {
1275  //... but if NO subsection was found, we process it as a TRUE line without erronous content:
1276  $subLines = array($subLines);
1277  // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
1278  if (!$this->procOptions['dontConvBRtoParagraph']) {
1279  $subLines = preg_split('/<br[[:space:]]*[\\/]?>/i', $v);
1280  }
1281  // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
1282  foreach ($subLines as $sk => $value) {
1283  // Clear up the subline for DB.
1284  $subLines[$sk] = $this->HTMLcleaner_db($subLines[$sk]);
1285  // Get first tag, attributes etc:
1286  $fTag = $this->getFirstTag($divSplit[$k]);
1287  $tagName = strtolower($this->getFirstTagName($divSplit[$k]));
1288  $attribs = $this->get_tag_attributes($fTag);
1289  // Keep attributes (lowercase)
1290  $newAttribs = array();
1291  if (!empty($keepAttribListArr)) {
1292  foreach ($keepAttribListArr as $keepA) {
1293  if (isset($attribs[0][$keepA])) {
1294  $newAttribs[$keepA] = $attribs[0][$keepA];
1295  }
1296  }
1297  }
1298  // ALIGN attribute:
1299  if (!$this->procOptions['skipAlign'] && trim($attribs[0]['align']) !== '' && strtolower($attribs[0]['align']) != 'left') {
1300  // Set to value, but not 'left'
1301  $newAttribs['align'] = strtolower($attribs[0]['align']);
1302  }
1303  // CLASS attribute:
1304  // Set to whatever value
1305  if (!$this->procOptions['skipClass'] && trim($attribs[0]['class']) !== '') {
1306  if (empty($this->allowedClasses) || in_array($attribs[0]['class'], $this->allowedClasses)) {
1307  $newAttribs['class'] = $attribs[0]['class'];
1308  } else {
1309  $classes = GeneralUtility::trimExplode(' ', $attribs[0]['class'], true);
1310  $newClasses = array();
1311  foreach ($classes as $class) {
1312  if (in_array($class, $this->allowedClasses)) {
1313  $newClasses[] = $class;
1314  }
1315  }
1316  if (!empty($newClasses)) {
1317  $newAttribs['class'] = implode(' ', $newClasses);
1318  }
1319  }
1320  }
1321  // Remove any line break char (10 or 13)
1322  $subLines[$sk] = preg_replace('/' . LF . '|' . CR . '/', '', $subLines[$sk]);
1323  // If there are any attributes or if we are supposed to remap the tag, then do so:
1324  if (!empty($newAttribs) && $remapParagraphTag !== '1') {
1325  if ($remapParagraphTag === 'P') {
1326  $tagName = 'p';
1327  }
1328  if ($remapParagraphTag === 'DIV') {
1329  $tagName = 'div';
1330  }
1331  $subLines[$sk] = '<' . trim($tagName . ' ' . $this->compileTagAttribs($newAttribs)) . '>' . $subLines[$sk] . '</' . $tagName . '>';
1332  }
1333  }
1334  }
1335  // Add the processed line(s)
1336  $divSplit[$k] = implode(LF, $subLines);
1337  // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
1338  // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
1339  // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
1340  if (trim(strip_tags($divSplit[$k])) == '&nbsp;' && !preg_match('/\\<(img)(\\s[^>]*)?\\/?>/si', $divSplit[$k]) && !preg_match('/\\<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
1341  $divSplit[$k] = '';
1342  }
1343  } else {
1344  // outside div:
1345  // Remove positions which are outside div/p tags and without content
1346  $divSplit[$k] = trim(strip_tags($divSplit[$k], '<' . implode('><', $allowTagsOutside) . '>'));
1347  // Wrap hr tags with LF's
1348  $divSplit[$k] = preg_replace('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', LF . '<$1$2/>' . LF, $divSplit[$k]);
1349  $divSplit[$k] = preg_replace('/' . LF . LF . '/i', LF, $divSplit[$k]);
1350  $divSplit[$k] = preg_replace('/(^' . LF . ')|(' . LF . '$)/i', '', $divSplit[$k]);
1351  if ((string)$divSplit[$k] === '') {
1352  unset($divSplit[$k]);
1353  }
1354  }
1355  }
1356  // Return value:
1357  return $returnArray ? $divSplit : implode(LF, $divSplit);
1358  }
1359 
1369  public function setDivTags($value, $dT = 'p')
1370  {
1371  // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
1372  $keepTags = $this->getKeepTags('rte');
1373  // Default: remove unknown tags.
1374  $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect';
1375  // Default: re-convert literals to characters (that is &lt; to <)
1376  $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1;
1377  $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte'] ? 1 : 0;
1378  // Divide the content into lines, based on LF:
1379  $parts = explode(LF, $value);
1380  foreach ($parts as $k => $v) {
1381  // Processing of line content:
1382  // If the line is blank, set it to &nbsp;
1383  if (trim($parts[$k]) === '') {
1384  $parts[$k] = '&nbsp;';
1385  } else {
1386  // Clean the line content:
1387  $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, $kUknown, $hSC);
1388  if ($convNBSP) {
1389  $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
1390  }
1391  }
1392  // Wrapping the line in <$dT> if not already wrapped and does not contain an hr tag
1393  if (!preg_match('/<(hr)(\\s[^>\\/]*)?[[:space:]]*\\/?>/i', $parts[$k])) {
1394  $testStr = strtolower(trim($parts[$k]));
1395  if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
1396  if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
1397  // Only set p-tags if there is not already div or p tags:
1398  $parts[$k] = '<' . $dT . '>' . $parts[$k] . '</' . $dT . '>';
1399  }
1400  }
1401  }
1402  }
1403  // Implode result:
1404  return implode(LF, $parts);
1405  }
1406 
1407 
1414  public function siteUrl()
1415  {
1416  return GeneralUtility::getIndpEnv('TYPO3_SITE_URL');
1417  }
1418 
1427  public function removeTables($value, $breakChar = '<br />')
1428  {
1429  // Splitting value into table blocks:
1430  $tableSplit = $this->splitIntoBlock('table', $value);
1431  // Traverse blocks of tables:
1432  foreach ($tableSplit as $k => $v) {
1433  if ($k % 2) {
1434  $tableSplit[$k] = '';
1435  $rowSplit = $this->splitIntoBlock('tr', $v);
1436  foreach ($rowSplit as $k2 => $v2) {
1437  if ($k2 % 2) {
1438  $cellSplit = $this->getAllParts($this->splitIntoBlock('td', $v2), 1, 0);
1439  foreach ($cellSplit as $k3 => $v3) {
1440  $tableSplit[$k] .= $v3 . $breakChar;
1441  }
1442  }
1443  }
1444  }
1445  }
1446  // Implode it all again:
1447  return implode($breakChar, $tableSplit);
1448  }
1449 
1457  public function defaultTStagMapping($code, $direction = 'rte')
1458  {
1459  if ($direction == 'db') {
1460  $code = $this->mapTags($code, array(
1461  // Map tags
1462  'strong' => 'b',
1463  'em' => 'i'
1464  ));
1465  }
1466  if ($direction == 'rte') {
1467  $code = $this->mapTags($code, array(
1468  // Map tags
1469  'b' => 'strong',
1470  'i' => 'em'
1471  ));
1472  }
1473  return $code;
1474  }
1475 
1483  public function getWHFromAttribs($attribArray)
1484  {
1485  $style = trim($attribArray['style']);
1486  if ($style) {
1487  $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
1488  // Width
1489  $reg = array();
1490  preg_match('/width' . $regex . '/i', $style, $reg);
1491  $w = (int)$reg[1];
1492  // Height
1493  preg_match('/height' . $regex . '/i', $style, $reg);
1494  $h = (int)$reg[1];
1495  }
1496  if (!$w) {
1497  $w = $attribArray['width'];
1498  }
1499  if (!$h) {
1500  $h = $attribArray['height'];
1501  }
1502  return array((int)$w, (int)$h);
1503  }
1504 
1511  public function urlInfoForLinkTags($url)
1512  {
1513  $info = array();
1514  $url = trim($url);
1515  if (substr(strtolower($url), 0, 7) == 'mailto:') {
1516  $info['url'] = trim(substr($url, 7));
1517  $info['type'] = 'email';
1518  } elseif (strpos($url, '?file:') !== false) {
1519  $info['type'] = 'file';
1520  $info['url'] = rawurldecode(substr($url, strpos($url, '?file:') + 1));
1521  } else {
1522  $curURL = $this->siteUrl();
1523  $urlLength = strlen($url);
1524  for ($a = 0; $a < $urlLength; $a++) {
1525  if ($url[$a] != $curURL[$a]) {
1526  break;
1527  }
1528  }
1529  $info['relScriptPath'] = substr($curURL, $a);
1530  $info['relUrl'] = substr($url, $a);
1531  $info['url'] = $url;
1532  $info['type'] = 'ext';
1533  $siteUrl_parts = parse_url($url);
1534  $curUrl_parts = parse_url($curURL);
1535  // Hosts should match
1536  if ($siteUrl_parts['host'] == $curUrl_parts['host'] && (!$info['relScriptPath'] || defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir)) {
1537  // If the script path seems to match or is empty (FE-EDIT)
1538  // New processing order 100502
1539  $uP = parse_url($info['relUrl']);
1540  if ($info['relUrl'] === '#' . $siteUrl_parts['fragment']) {
1541  $info['url'] = $info['relUrl'];
1542  $info['type'] = 'anchor';
1543  } elseif (!trim($uP['path']) || $uP['path'] === 'index.php') {
1544  // URL is a page (id parameter)
1545  $pp = preg_split('/^id=/', $uP['query']);
1546  $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
1547  $parameters = explode('&', $pp[1]);
1548  $id = array_shift($parameters);
1549  if ($id) {
1550  $info['pageid'] = $id;
1551  $info['cElement'] = $uP['fragment'];
1552  $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
1553  $info['type'] = 'page';
1554  $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
1555  }
1556  } else {
1557  $info['url'] = $info['relUrl'];
1558  $info['type'] = 'file';
1559  }
1560  } else {
1561  unset($info['relScriptPath']);
1562  unset($info['relUrl']);
1563  }
1564  }
1565  return $info;
1566  }
1567 
1575  public function TS_AtagToAbs($value, $dontSetRTEKEEP = false)
1576  {
1577  $blockSplit = $this->splitIntoBlock('A', $value);
1578  foreach ($blockSplit as $k => $v) {
1579  // Block
1580  if ($k % 2) {
1581  $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
1582  // Checking if there is a scheme, and if not, prepend the current url.
1583  // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
1584  if ($attribArray['href'] !== '') {
1585  $uP = parse_url(strtolower($attribArray['href']));
1586  if (!$uP['scheme']) {
1587  $attribArray['href'] = $this->siteUrl() . substr($attribArray['href'], strlen($this->relBackPath));
1588  } elseif ($uP['scheme'] != 'mailto') {
1589  $attribArray['data-htmlarea-external'] = 1;
1590  }
1591  } else {
1592  $attribArray['rtekeep'] = 1;
1593  }
1594  if (!$dontSetRTEKEEP) {
1595  $attribArray['rtekeep'] = 1;
1596  }
1597  $bTag = '<a ' . GeneralUtility::implodeAttributes($attribArray, 1) . '>';
1598  $eTag = '</a>';
1599  $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
1600  }
1601  }
1602  return implode('', $blockSplit);
1603  }
1604 
1613  protected function applyPlainImageModeSettings($imageInfo, $attribArray)
1614  {
1615  if ($this->procOptions['plainImageMode']) {
1616  // Perform corrections to aspect ratio based on configuration
1617  switch ((string)$this->procOptions['plainImageMode']) {
1618  case 'lockDimensions':
1619  $attribArray['width'] = $imageInfo[0];
1620  $attribArray['height'] = $imageInfo[1];
1621  break;
1622  case 'lockRatioWhenSmaller':
1623  if ($attribArray['width'] > $imageInfo[0]) {
1624  $attribArray['width'] = $imageInfo[0];
1625  }
1626  case 'lockRatio':
1627  if ($imageInfo[0] > 0) {
1628  $attribArray['height'] = round($attribArray['width'] * ($imageInfo[1] / $imageInfo[0]));
1629  }
1630  break;
1631  }
1632  }
1633  return $attribArray;
1634  }
1635 
1639  protected function getLogger()
1640  {
1642  $logManager = GeneralUtility::makeInstance(\TYPO3\CMS\Core\Log\LogManager::class);
1643 
1644  return $logManager->getLogger(get_class($this));
1645  }
1646 }