TYPO3
7.6
|
Public Member Functions | |
hook_indexContent (&$pObj) | |
backend_initIndexer ($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array=array(), $createCHash=false) | |
backend_setFreeIndexUid ($freeIndexUid, $freeIndexSetId=0) | |
backend_indexAsTYPO3Page ($title, $keywords, $description, $content, $charset, $mtime, $crdate=0, $recordUid=0) | |
init () | |
initializeExternalParsers () | |
indexTypo3PageContent () | |
splitHTMLContent ($content) | |
getHTMLcharset ($content) | |
convertHTMLToUtf8 ($content, $charset= '') | |
embracingTags ($string, $tagName, &$tagContent, &$stringAfter, &$paramList) | |
typoSearchTags (&$body) | |
extractLinks ($content) | |
extractHyperLinks ($html) | |
extractBaseHref ($html) | |
indexExternalUrl ($externalUrl) | |
getUrlHeaders ($url) | |
indexRegularDocument ($file, $force=false, $contentTmpFile= '', $altExtension= '') | |
readFileContent ($fileExtension, $absoluteFileName, $sectionPointer) | |
fileContentParts ($ext, $absFile) | |
splitRegularContent ($content) | |
charsetEntity2utf8 (&$contentArr, $charset) | |
processWordsInArrays ($contentArr) | |
bodyDescription ($contentArr) | |
indexAnalyze ($content) | |
analyzeHeaderinfo (&$retArr, $content, $key, $offset) | |
analyzeBody (&$retArr, $content) | |
metaphone ($word, $returnRawMetaphoneValue=false) | |
submitPage () | |
submit_grlist ($hash, $phash_x) | |
submit_section ($hash, $hash_t3) | |
removeOldIndexedPages ($phash) | |
submitFilePage ($hash, $file, $subinfo, $ext, $mtime, $ctime, $size, $content_md5h, $contentParts) | |
submitFile_grlist ($hash) | |
submitFile_section ($hash) | |
removeOldIndexedFiles ($phash) | |
checkMtimeTstamp ($mtime, $phash) | |
checkContentHash () | |
checkExternalDocContentHash ($hashGr, $content_md5h) | |
is_grlist_set ($phash_x) | |
update_grlist ($phash, $phash_x) | |
updateTstamp ($phash, $mtime=0) | |
updateSetId ($phash) | |
updateParsetime ($phash, $parsetime) | |
updateRootline () | |
getRootLineFields (array &$fieldArray) | |
includeCrawlerClass () | |
checkWordList ($wordListArray) | |
submitWords ($wordList, $phash) | |
freqMap ($freq) | |
setT3Hashes () | |
setExtHashes ($file, $subinfo=array()) | |
log_push ($msg, $key) | |
log_pull () | |
log_setTSlogMessage ($msg, $errorNum=0) | |
Public Attributes | |
$reasons | |
$excludeSections = 'script,style' | |
$external_parsers = array() | |
$defaultGrList = '0,-1' | |
$tstamp_maxAge = 0 | |
$tstamp_minAge = 0 | |
$maxExternalFiles = 0 | |
$forceIndexing = false | |
$crawlerActive = false | |
$defaultContentArray | |
$wordcount = 0 | |
$externalFileCounter = 0 | |
$conf = array() | |
$indexerConfig = array() | |
$hash = array() | |
$file_phash_arr = array() | |
$contentParts = array() | |
$content_md5h = '' | |
$internal_log = array() | |
$indexExternalUrl_content = '' | |
$cHashParams = array() | |
$freqRange = 32000 | |
$freqMax = 0.1 | |
$enableMetaphoneSearch = false | |
$storeMetaphoneInfoAsWords | |
$metaphoneContent = '' | |
$csObj | |
$metaphoneObj | |
$lexerObj | |
$flagBitMask | |
Protected Member Functions | |
createLocalPath ($sourcePath) | |
createLocalPathFromT3vars ($sourcePath) | |
createLocalPathUsingDomainURL ($sourcePath) | |
createLocalPathUsingAbsRefPrefix ($sourcePath) | |
createLocalPathFromAbsoluteURL ($sourcePath) | |
createLocalPathFromRelativeURL ($sourcePath) | |
addSpacesToKeywordList ($keywordList) | |
Static Protected Member Functions | |
static | isRelativeURL ($url) |
static | isAllowedLocalFile ($filePath) |
Indexing class for TYPO3 frontend
Definition at line 25 of file indexed_search/Classes/Indexer.php.
|
protected |
Makes sure that keywords are space-separated. This is impotant for their proper displaying as a part of fulltext index.
string | $keywordList |
Definition at line 2131 of file indexed_search/Classes/Indexer.php.
References GeneralUtility\trimExplode().
Referenced by Indexer\splitHTMLContent().
analyzeBody | ( | & | $retArr, |
$content | |||
) |
Calculates relevant information for bodycontent
array | $retArr | Index array, passed by reference |
array | $content | Standard content array |
Definition at line 1378 of file indexed_search/Classes/Indexer.php.
References IndexedSearchUtility\md5inthash(), and Indexer\metaphone().
Referenced by Indexer\indexAnalyze().
analyzeHeaderinfo | ( | & | $retArr, |
$content, | |||
$key, | |||
$offset | |||
) |
Calculates relevant information for headercontent
array | $retArr | Index array, passed by reference |
array | $content | Standard content array |
string | $key | Key from standard content array |
int | $offset | Bit-wise priority to type |
Definition at line 1347 of file indexed_search/Classes/Indexer.php.
References IndexedSearchUtility\md5inthash(), and Indexer\metaphone().
Referenced by Indexer\indexAnalyze().
backend_indexAsTYPO3Page | ( | $title, | |
$keywords, | |||
$description, | |||
$content, | |||
$charset, | |||
$mtime, | |||
$crdate = 0 , |
|||
$recordUid = 0 |
|||
) |
Indexing records as the content of a TYPO3 page.
string | $title | Title equivalent |
string | $keywords | Keywords equivalent |
string | $description | Description equivalent |
string | $content | The main content to index |
string | $charset | The charset of the title, keyword, description and body-content. MUST BE VALID, otherwise nothing is indexed! |
int | $mtime | Last modification time, in seconds |
int | $crdate | The creation date of the content, in seconds |
int | $recordUid | The record UID that the content comes from (for registration with the indexed rows) |
Definition at line 407 of file indexed_search/Classes/Indexer.php.
References Indexer\indexTypo3PageContent().
backend_initIndexer | ( | $id, | |
$type, | |||
$sys_language_uid, | |||
$MP, | |||
$uidRL, | |||
$cHash_array = array() , |
|||
$createCHash = false |
|||
) |
Initializing the "combined ID" of the page (phash) being indexed (or for which external media is attached)
int | $id | The page uid, &id= |
int | $type | The page type, &type= |
int | $sys_language_uid | sys_language uid, typically &L= |
string | $MP | The MP variable (Mount Points), &MP= |
array | $uidRL | Rootline array of only UIDs. |
array | $cHash_array | Array of GET variables to register with this indexing |
bool | $createCHash | If set, calculates a cHash value from the $cHash_array. Probably you will not do that since such cases are indexed through the frontend and the idea of this interface is to index non-cacheable pages from the backend! |
Definition at line 338 of file indexed_search/Classes/Indexer.php.
References GeneralUtility\implodeArrayForUrl(), Indexer\init(), and GeneralUtility\makeInstance().
backend_setFreeIndexUid | ( | $freeIndexUid, | |
$freeIndexSetId = 0 |
|||
) |
Sets the free-index uid. Can be called right after backend_initIndexer()
int | $freeIndexUid | Free index UID |
int | $freeIndexSetId | Set id - an integer identifying the "set" of indexing operations. |
Definition at line 388 of file indexed_search/Classes/Indexer.php.
bodyDescription | ( | $contentArr | ) |
Extracts the sample description text from the content array.
array | $contentArr | Content array |
Definition at line 1309 of file indexed_search/Classes/Indexer.php.
References MathUtility\forceIntegerInRange().
Referenced by Indexer\submitFilePage(), and Indexer\submitPage().
charsetEntity2utf8 | ( | & | $contentArr, |
$charset | |||
) |
Convert character set and HTML entities in the value of input content array keys
array | $contentArr | Standard content array |
string | $charset | Charset of the input content (converted to utf-8) |
Definition at line 1269 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\indexTypo3PageContent().
checkContentHash | ( | ) |
Check content hash in phash table
Definition at line 1772 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\indexTypo3PageContent().
checkExternalDocContentHash | ( | $hashGr, | |
$content_md5h | |||
) |
Check content hash for external documents Returns TRUE if the document needs to be indexed (that is, there was no result)
int | $hashGr | phash value to check (phash_grouping) |
int | $content_md5h | Content hash to check |
Definition at line 1793 of file indexed_search/Classes/Indexer.php.
References Indexer\$content_md5h, $GLOBALS, and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\indexRegularDocument().
checkMtimeTstamp | ( | $mtime, | |
$phash | |||
) |
Check the mtime / tstamp of the currently indexed page/file (based on phash) Return positive integer if the page needs to be indexed
int | $mtime | mtime value to test against limits and indexed page (usually this is the mtime of the cached document) |
int | $phash | "phash" used to select any already indexed page to see what its mtime is. |
Definition at line 1718 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, IndexedSearchUtility\isTableUsed(), Indexer\log_setTSlogMessage(), and Indexer\updateTstamp().
Referenced by Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
checkWordList | ( | $wordListArray | ) |
Adds new words to db
array | $wordListArray | Word List array (where each word has information about position etc). |
Definition at line 1945 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, IndexedSearchUtility\isTableUsed(), and Indexer\log_setTSlogMessage().
Referenced by Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
convertHTMLToUtf8 | ( | $content, | |
$charset = '' |
|||
) |
Converts a HTML document to utf-8
string | $content | HTML content, any charset |
string | $charset | Optional charset (otherwise extracted from HTML) |
Definition at line 666 of file indexed_search/Classes/Indexer.php.
References Indexer\getHTMLcharset().
|
protected |
Checks if the file is local
string | $sourcePath |
Definition at line 943 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\extractHyperLinks().
|
protected |
Attempts to create a local file path from the absolute URL without schema.
string | $sourcePath |
Definition at line 1038 of file indexed_search/Classes/Indexer.php.
|
protected |
Attempts to create a local file path from the relative URL.
string | $sourcePath |
Definition at line 1057 of file indexed_search/Classes/Indexer.php.
|
protected |
Attempts to create a local file path from T3VARs. This is useful for various download extensions that hide actual file name but still want the file to be indexed.
string | $sourcePath |
Definition at line 970 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and GeneralUtility\shortMD5().
|
protected |
Attempts to create a local file path by matching absRefPrefix. This requires TSFE. If TSFE is missing, this function does nothing.
string | $sourcePath |
Definition at line 1014 of file indexed_search/Classes/Indexer.php.
References $GLOBALS.
|
protected |
Attempts to create a local file path by matching a current request URL.
string | $sourcePath |
Definition at line 992 of file indexed_search/Classes/Indexer.php.
References GeneralUtility\getIndpEnv().
embracingTags | ( | $string, | |
$tagName, | |||
& | $tagContent, | ||
& | $stringAfter, | ||
& | $paramList | ||
) |
Finds first occurrence of embracing tags and returns the embraced content and the original string with the tag removed in the two passed variables. Returns FALSE if no match found. ie. useful for finding <title> of document or removing <script>-sections
string | $string | String to search in |
string | $tagName | Tag name, eg. "script |
string | $tagContent | Passed by reference: Content inside found tag |
string | $stringAfter | Passed by reference: Content after found tag |
string | $paramList | Passed by reference: Attributes of the found tag. |
Definition at line 691 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\splitHTMLContent().
extractBaseHref | ( | $html | ) |
Extracts the "base href" from content string.
string | $html | Content to analyze |
Definition at line 856 of file indexed_search/Classes/Indexer.php.
References GeneralUtility\makeInstance().
extractHyperLinks | ( | $html | ) |
Extracts all links to external documents from the HTML content string
string | $html |
Definition at line 827 of file indexed_search/Classes/Indexer.php.
References Indexer\createLocalPath(), and GeneralUtility\makeInstance().
Referenced by Indexer\extractLinks().
extractLinks | ( | $content | ) |
Extract links (hrefs) from HTML content and if indexable media is found, it is indexed.
string | $content | HTML content |
Definition at line 748 of file indexed_search/Classes/Indexer.php.
References $list, elseif, Indexer\extractHyperLinks(), GeneralUtility\getFileAbsFileName(), Indexer\includeCrawlerClass(), Indexer\indexExternalUrl(), Indexer\indexRegularDocument(), GeneralUtility\isAllowedAbsPath(), Indexer\log_setTSlogMessage(), and GeneralUtility\makeInstance().
Referenced by Indexer\indexTypo3PageContent().
fileContentParts | ( | $ext, | |
$absFile | |||
) |
Creates an array with pointers to divisions of document.
string | $ext | File extension |
string | $absFile | Absolute filename (must exist and be validated OK before calling function) |
Definition at line 1233 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\indexRegularDocument().
freqMap | ( | $freq | ) |
maps frequency from a real number in [0;1] to an integer in [0;$this->freqRange] with anything above $this->freqMax as 1 and back.
double | $freq | Frequency |
Definition at line 2016 of file indexed_search/Classes/Indexer.php.
References Indexer\$freqRange.
Referenced by Indexer\submitWords().
getHTMLcharset | ( | $content | ) |
Extract the charset value from HTML meta tag.
string | $content | HTML content |
Definition at line 650 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\convertHTMLToUtf8().
getRootLineFields | ( | array & | $fieldArray | ) |
Adding values for root-line fields. rl0, rl1 and rl2 are standard. A hook might add more.
array | $fieldArray | Field array, passed by reference |
Definition at line 1912 of file indexed_search/Classes/Indexer.php.
References $GLOBALS.
Referenced by Indexer\submit_section(), and Indexer\updateRootline().
getUrlHeaders | ( | $url | ) |
Getting HTTP request headers of URL
string | $url | The URL |
Definition at line 918 of file indexed_search/Classes/Indexer.php.
References $url, GeneralUtility\getUrl(), and GeneralUtility\trimExplode().
Referenced by Indexer\indexExternalUrl().
hook_indexContent | ( | & | $pObj | ) |
Parent Object (TSFE) Initialization
TypoScriptFrontendController | $pObj | Parent Object, passed by reference |
Definition at line 236 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, Indexer\$indexerConfig, Indexer\indexTypo3PageContent(), Indexer\init(), Indexer\log_pull(), Indexer\log_push(), and Indexer\log_setTSlogMessage().
includeCrawlerClass | ( | ) |
Includes the crawler class
Definition at line 1929 of file indexed_search/Classes/Indexer.php.
References GeneralUtility\requireOnce().
Referenced by Indexer\extractLinks().
indexAnalyze | ( | $content | ) |
Analyzes content to use for indexing,
array | $content | Standard content array: an array with the keys title,keywords,description and body, which all contain an array of words. |
Definition at line 1327 of file indexed_search/Classes/Indexer.php.
References Indexer\analyzeBody(), and Indexer\analyzeHeaderinfo().
Referenced by Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
indexExternalUrl | ( | $externalUrl | ) |
Index External URLs HTML content
string | $externalUrl | URL, eg. "http://typo3.org/ |
Definition at line 888 of file indexed_search/Classes/Indexer.php.
References GeneralUtility\getUrl(), Indexer\getUrlHeaders(), Indexer\indexRegularDocument(), GeneralUtility\tempnam(), and GeneralUtility\writeFile().
Referenced by Indexer\extractLinks().
indexRegularDocument | ( | $file, | |
$force = false , |
|||
$contentTmpFile = '' , |
|||
$altExtension = '' |
|||
) |
Indexing a regular document given as $file (relative to PATH_site, local file)
string | $file | Relative Filename, relative to PATH_site. It can also be an absolute path as long as it is inside the lockRootPath (validated with ::isAbsPath()). Finally, if $contentTmpFile is set, this value can be anything, most likely a URL |
bool | $force | If set, indexing is forced (despite content hashes, mtime etc). |
string | $contentTmpFile | Temporary file with the content to read it from (instead of $file). Used when the $file is a URL. |
string | $altExtension | File extension for temporary file. |
Definition at line 1109 of file indexed_search/Classes/Indexer.php.
References Indexer\$content_md5h, Indexer\$contentParts, Indexer\checkExternalDocContentHash(), Indexer\checkMtimeTstamp(), Indexer\checkWordList(), Indexer\fileContentParts(), GeneralUtility\getFileAbsFileName(), Indexer\indexAnalyze(), GeneralUtility\isAbsPath(), GeneralUtility\isAllowedAbsPath(), IndexedSearchUtility\isTableUsed(), Indexer\log_pull(), Indexer\log_push(), Indexer\log_setTSlogMessage(), IndexedSearchUtility\md5inthash(), GeneralUtility\milliseconds(), Indexer\processWordsInArrays(), Indexer\readFileContent(), Indexer\setExtHashes(), Indexer\submitFile_section(), Indexer\submitFilePage(), Indexer\submitWords(), Indexer\updateParsetime(), and Indexer\updateTstamp().
Referenced by Indexer\extractLinks(), and Indexer\indexExternalUrl().
indexTypo3PageContent | ( | ) |
Start indexing of the TYPO3 page
Definition at line 520 of file indexed_search/Classes/Indexer.php.
References Indexer\charsetEntity2utf8(), Indexer\checkContentHash(), Indexer\checkMtimeTstamp(), Indexer\checkWordList(), elseif, Indexer\extractLinks(), Indexer\indexAnalyze(), Indexer\is_grlist_set(), IndexedSearchUtility\isTableUsed(), Indexer\log_pull(), Indexer\log_push(), Indexer\log_setTSlogMessage(), IndexedSearchUtility\md5inthash(), GeneralUtility\milliseconds(), Indexer\processWordsInArrays(), Indexer\splitHTMLContent(), Indexer\submitPage(), Indexer\submitWords(), Indexer\update_grlist(), Indexer\updateParsetime(), Indexer\updateRootline(), Indexer\updateSetId(), and Indexer\updateTstamp().
Referenced by Indexer\backend_indexAsTYPO3Page(), and Indexer\hook_indexContent().
init | ( | ) |
Initializes the object. $this->conf MUST be set with proper values prior to this call!!!
Definition at line 448 of file indexed_search/Classes/Indexer.php.
References Indexer\$enableMetaphoneSearch, $GLOBALS, MathUtility\forceIntegerInRange(), GeneralUtility\getUserObj(), Indexer\initializeExternalParsers(), IndexedSearchUtility\isTableUsed(), GeneralUtility\makeInstance(), and Indexer\setT3Hashes().
Referenced by Indexer\backend_initIndexer(), and Indexer\hook_indexContent().
initializeExternalParsers | ( | ) |
Initialize external parsers
Definition at line 496 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and GeneralUtility\getUserObj().
Referenced by Indexer\init().
is_grlist_set | ( | $phash_x | ) |
Checks if a grlist record has been set for the phash value input (looking at the "real" phash of the current content, not the linked-to phash of the common search result page)
int | $phash_x | Phash integer to test. |
Definition at line 1809 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\indexTypo3PageContent().
|
staticprotected |
Checks if the path points to the file inside the web site
string | $filePath |
Definition at line 1087 of file indexed_search/Classes/Indexer.php.
References GeneralUtility\resolveBackPath().
|
staticprotected |
Checks if URL is relative.
string | $url |
Definition at line 1075 of file indexed_search/Classes/Indexer.php.
References $url.
log_pull | ( | ) |
Pull function wrapper for TT logging
Definition at line 2101 of file indexed_search/Classes/Indexer.php.
References $GLOBALS.
Referenced by Indexer\hook_indexContent(), Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
log_push | ( | $msg, | |
$key | |||
) |
Push function wrapper for TT logging
string | $msg | Title to set |
string | $key | Key (?) |
Definition at line 2089 of file indexed_search/Classes/Indexer.php.
References $GLOBALS.
Referenced by Indexer\hook_indexContent(), Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
log_setTSlogMessage | ( | $msg, | |
$errorNum = 0 |
|||
) |
Set log message function wrapper for TT logging
string | $msg | Message to set |
int | $errorNum | Error number |
Definition at line 2115 of file indexed_search/Classes/Indexer.php.
References $GLOBALS.
Referenced by Indexer\checkMtimeTstamp(), Indexer\checkWordList(), Indexer\extractLinks(), Indexer\hook_indexContent(), Indexer\indexRegularDocument(), Indexer\indexTypo3PageContent(), and Indexer\update_grlist().
metaphone | ( | $word, | |
$returnRawMetaphoneValue = false |
|||
) |
Creating metaphone based hash from input word
string | $word | Word to convert |
bool | $returnRawMetaphoneValue | If set, returns the raw metaphone value (not hashed) |
Definition at line 1409 of file indexed_search/Classes/Indexer.php.
References elseif, and IndexedSearchUtility\md5inthash().
Referenced by Indexer\analyzeBody(), and Indexer\analyzeHeaderinfo().
processWordsInArrays | ( | $contentArr | ) |
Processing words in the array from split*Content -functions
array | $contentArr | Array of content to index, see splitHTMLContent() and splitRegularContent() |
Definition at line 1289 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
readFileContent | ( | $fileExtension, | |
$absoluteFileName, | |||
$sectionPointer | |||
) |
Reads the content of an external file being indexed. The content from the external parser MUST be returned in utf-8!
string | $fileExtension | File extension, eg. "pdf", "doc" etc. |
string | $absoluteFileName | Absolute filename of file (must exist and be validated OK before calling function) |
string | $sectionPointer | Pointer to section (zero for all other than PDF which will have an indication of pages into which the document should be splitted.) |
Definition at line 1216 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\indexRegularDocument().
removeOldIndexedFiles | ( | $phash | ) |
Removes records for the indexed page, $phash
int | $phash | phash value to flush |
Definition at line 1694 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\submitFilePage().
removeOldIndexedPages | ( | $phash | ) |
Removes records for the indexed page, $phash
int | $phash | phash value to flush |
Definition at line 1557 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\submitPage().
setExtHashes | ( | $file, | |
$subinfo = array() |
|||
) |
Get search hash, external files
string | $file | File name / path which identifies it on the server |
array | $subinfo | Additional content identifying the (subpart of) content. For instance; PDF files are divided into groups of pages for indexing. |
Definition at line 2062 of file indexed_search/Classes/Indexer.php.
References Indexer\$hash, and IndexedSearchUtility\md5inthash().
Referenced by Indexer\indexRegularDocument().
setT3Hashes | ( | ) |
Get search hash, T3 pages
Definition at line 2038 of file indexed_search/Classes/Indexer.php.
References IndexedSearchUtility\md5inthash().
Referenced by Indexer\init().
splitHTMLContent | ( | $content | ) |
Splits HTML content and returns an associative array, with title, a list of metatags, and a list of words in the body.
string | $content | HTML content to index. To some degree expected to be made by TYPO3 (ei. splitting the header by ":") |
Definition at line 599 of file indexed_search/Classes/Indexer.php.
References Indexer\$defaultContentArray, Indexer\addSpacesToKeywordList(), Indexer\embracingTags(), GeneralUtility\get_tag_attributes(), and Indexer\typoSearchTags().
Referenced by Indexer\indexTypo3PageContent().
splitRegularContent | ( | $content | ) |
Splits non-HTML content (from external files for instance)
string | $content | Input content (non-HTML) to index. |
Definition at line 1250 of file indexed_search/Classes/Indexer.php.
References Indexer\$defaultContentArray.
submit_grlist | ( | $hash, | |
$phash_x | |||
) |
Stores gr_list in the database.
int | $hash | Search result record phash |
int | $phash_x | Actual phash of current content |
Definition at line 1516 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, Indexer\$hash, IndexedSearchUtility\isTableUsed(), and IndexedSearchUtility\md5inthash().
Referenced by Indexer\submitFile_grlist(), Indexer\submitPage(), and Indexer\update_grlist().
submit_section | ( | $hash, | |
$hash_t3 | |||
) |
Stores section $hash and $hash_t3 are the same for TYPO3 pages, but different when it is external files.
int | $hash | phash of TYPO3 parent search result record |
int | $hash_t3 | phash of the file indexation search record |
Definition at line 1538 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, Indexer\$hash, Indexer\getRootLineFields(), and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\submitFile_section(), and Indexer\submitPage().
submitFile_grlist | ( | $hash | ) |
Stores file gr_list for a file IF it does not exist already
int | $hash | phash value of file |
Definition at line 1660 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, Indexer\$hash, IndexedSearchUtility\isTableUsed(), IndexedSearchUtility\md5inthash(), and Indexer\submit_grlist().
submitFile_section | ( | $hash | ) |
Stores file section for a file IF it does not exist
int | $hash | phash value of file |
Definition at line 1677 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, Indexer\$hash, IndexedSearchUtility\isTableUsed(), and Indexer\submit_section().
Referenced by Indexer\indexRegularDocument().
submitFilePage | ( | $hash, | |
$file, | |||
$subinfo, | |||
$ext, | |||
$mtime, | |||
$ctime, | |||
$size, | |||
$content_md5h, | |||
$contentParts | |||
) |
Updates db with information about the file
array | $hash | Array with phash and phash_grouping keys for file |
string | $file | File name |
array | $subinfo | Array of "cHashParams" for files: This is for instance the page index for a PDF file (other document types it will be a zero) |
string | $ext | File extension determining the type of media. |
int | $mtime | Modification time of file. |
int | $ctime | Creation time of file. |
int | $size | Size of file in bytes |
int | $content_md5h | Content HASH value. |
array | $contentParts | Standard content array (using only title and body for a file) |
Definition at line 1591 of file indexed_search/Classes/Indexer.php.
References Indexer\$content_md5h, Indexer\$contentParts, $GLOBALS, Indexer\$hash, Indexer\bodyDescription(), IndexedSearchUtility\isTableUsed(), and Indexer\removeOldIndexedFiles().
Referenced by Indexer\indexRegularDocument().
submitPage | ( | ) |
Updates db with information about the page (TYPO3 page, not external media)
Definition at line 1438 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, Indexer\bodyDescription(), IndexedSearchUtility\isTableUsed(), Indexer\removeOldIndexedPages(), Indexer\submit_grlist(), and Indexer\submit_section().
Referenced by Indexer\indexTypo3PageContent().
submitWords | ( | $wordList, | |
$phash | |||
) |
Submits RELATIONS between words and phash
array | $wordList | Word list array |
int | $phash | phash value |
Definition at line 1984 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, Indexer\freqMap(), and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
typoSearchTags | ( | & | $body | ) |
Removes content that shouldn't be indexed according to TYPO3SEARCH-tags.
string | $body | HTML Content, passed by reference |
Definition at line 720 of file indexed_search/Classes/Indexer.php.
References elseif.
Referenced by Indexer\splitHTMLContent().
update_grlist | ( | $phash, | |
$phash_x | |||
) |
Check if an grlist-entry for this hash exists and if not so, write one.
int | $phash | phash of the search result that should be found |
int | $phash_x | The real phash of the current content. The two values are different when a page with userlogin turns out to contain the exact same content as another already indexed version of the page; This is the whole reason for the grlist table in fact... |
Definition at line 1827 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, IndexedSearchUtility\isTableUsed(), Indexer\log_setTSlogMessage(), IndexedSearchUtility\md5inthash(), and Indexer\submit_grlist().
Referenced by Indexer\indexTypo3PageContent().
updateParsetime | ( | $phash, | |
$parsetime | |||
) |
Update parsetime for phash row.
int | $phash | phash value. |
int | $parsetime | Parsetime value to set. |
Definition at line 1881 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
updateRootline | ( | ) |
Update section rootline for the page
Definition at line 1896 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, Indexer\getRootLineFields(), and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\indexTypo3PageContent().
updateSetId | ( | $phash | ) |
Update SetID of the index_phash record.
int | $phash | phash value |
Definition at line 1864 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\indexTypo3PageContent().
updateTstamp | ( | $phash, | |
$mtime = 0 |
|||
) |
Update tstamp for a phash row.
int | $phash | phash value |
int | $mtime | If set, update the mtime field to this value. |
Definition at line 1845 of file indexed_search/Classes/Indexer.php.
References $GLOBALS, and IndexedSearchUtility\isTableUsed().
Referenced by Indexer\checkMtimeTstamp(), Indexer\indexRegularDocument(), and Indexer\indexTypo3PageContent().
$cHashParams = array() |
Definition at line 175 of file indexed_search/Classes/Indexer.php.
$conf = array() |
Definition at line 123 of file indexed_search/Classes/Indexer.php.
$content_md5h = '' |
Definition at line 158 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\checkExternalDocContentHash(), Indexer\indexRegularDocument(), and Indexer\submitFilePage().
$contentParts = array() |
Definition at line 151 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\indexRegularDocument(), and Indexer\submitFilePage().
$crawlerActive = false |
Definition at line 96 of file indexed_search/Classes/Indexer.php.
$csObj |
Definition at line 209 of file indexed_search/Classes/Indexer.php.
$defaultContentArray |
Definition at line 103 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\splitHTMLContent(), and Indexer\splitRegularContent().
$defaultGrList = '0,-1' |
Definition at line 60 of file indexed_search/Classes/Indexer.php.
$enableMetaphoneSearch = false |
Definition at line 192 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\init().
$excludeSections = 'script,style' |
Definition at line 44 of file indexed_search/Classes/Indexer.php.
$external_parsers = array() |
Definition at line 51 of file indexed_search/Classes/Indexer.php.
$externalFileCounter = 0 |
Definition at line 118 of file indexed_search/Classes/Indexer.php.
$file_phash_arr = array() |
Definition at line 144 of file indexed_search/Classes/Indexer.php.
$flagBitMask |
Definition at line 228 of file indexed_search/Classes/Indexer.php.
$forceIndexing = false |
Definition at line 89 of file indexed_search/Classes/Indexer.php.
$freqMax = 0.1 |
Definition at line 187 of file indexed_search/Classes/Indexer.php.
$freqRange = 32000 |
Definition at line 182 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\freqMap().
$hash = array() |
Definition at line 137 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\setExtHashes(), Indexer\submit_grlist(), Indexer\submit_section(), Indexer\submitFile_grlist(), Indexer\submitFile_section(), and Indexer\submitFilePage().
$indexerConfig = array() |
Definition at line 130 of file indexed_search/Classes/Indexer.php.
Referenced by Indexer\hook_indexContent().
$indexExternalUrl_content = '' |
Definition at line 170 of file indexed_search/Classes/Indexer.php.
$internal_log = array() |
Definition at line 163 of file indexed_search/Classes/Indexer.php.
$lexerObj |
Definition at line 223 of file indexed_search/Classes/Indexer.php.
$maxExternalFiles = 0 |
Definition at line 82 of file indexed_search/Classes/Indexer.php.
$metaphoneContent = '' |
Definition at line 202 of file indexed_search/Classes/Indexer.php.
$metaphoneObj |
Definition at line 216 of file indexed_search/Classes/Indexer.php.
$reasons |
Definition at line 30 of file indexed_search/Classes/Indexer.php.
$storeMetaphoneInfoAsWords |
Definition at line 197 of file indexed_search/Classes/Indexer.php.
$tstamp_maxAge = 0 |
Definition at line 67 of file indexed_search/Classes/Indexer.php.
$tstamp_minAge = 0 |
Definition at line 75 of file indexed_search/Classes/Indexer.php.
$wordcount = 0 |
Definition at line 113 of file indexed_search/Classes/Indexer.php.