basic API for libxml regular expressions handling used for XML Schemas and validation.

Table of Contents

Structure xmlExpCtxt
struct _xmlExpCtxt The content of this structure is not made public by the API.
Typedef xmlExpCtxt * xmlExpCtxtPtr
Structure xmlExpNode
struct _xmlExpNode The content of this structure is not made public by the API.
Typedef xmlExpNode * xmlExpNodePtr
Enum xmlExpNodeType
Structure xmlRegExecCtxt
struct _xmlRegExecCtxt The content of this structure is not made public by the API.
Typedef xmlRegExecCtxt * xmlRegExecCtxtPtr
Structure xmlRegexp
struct _xmlRegexp The content of this structure is not made public by the API.
Typedef xmlRegexp * xmlRegexpPtr
int	xmlExpCtxtNbCons		(xmlExpCtxtPtr ctxt)
int	xmlExpCtxtNbNodes		(xmlExpCtxtPtr ctxt)
void	xmlExpDump			(xmlBufferPtr buf, 
xmlExpNodePtr expr)
xmlExpNodePtr	xmlExpExpDerive		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
xmlExpNodePtr sub)
void	xmlExpFree			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp)
void	xmlExpFreeCtxt			(xmlExpCtxtPtr ctxt)
int	xmlExpGetLanguage		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar ** langList,
int len)
int	xmlExpGetStart			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar ** tokList,
int len)
int	xmlExpIsNillable		(xmlExpNodePtr exp)
int	xmlExpMaxToken			(xmlExpNodePtr expr)
xmlExpNodePtr	xmlExpNewAtom		(xmlExpCtxtPtr ctxt, 
const xmlChar * name,
int len)
xmlExpCtxtPtr	xmlExpNewCtxt		(int maxNodes, 
xmlDictPtr dict)
xmlExpNodePtr	xmlExpNewOr		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr left,
xmlExpNodePtr right)
xmlExpNodePtr	xmlExpNewRange		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr subset,
int min,
int max)
xmlExpNodePtr	xmlExpNewSeq		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr left,
xmlExpNodePtr right)
xmlExpNodePtr	xmlExpParse		(xmlExpCtxtPtr ctxt, 
const char * expr)
void	xmlExpRef			(xmlExpNodePtr exp)
xmlExpNodePtr	xmlExpStringDerive	(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar * str,
int len)
int	xmlExpSubsume			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
xmlExpNodePtr sub)
Function type: xmlRegExecCallbacks
void	xmlRegExecCallbacks		(xmlRegExecCtxtPtr exec, 
const xmlChar * token,
void * transdata,
void * inputdata)
int	xmlRegExecErrInfo		(xmlRegExecCtxtPtr exec, 
const xmlChar ** string,
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal)
int	xmlRegExecNextValues		(xmlRegExecCtxtPtr exec, 
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal)
int	xmlRegExecPushString		(xmlRegExecCtxtPtr exec, 
const xmlChar * value,
void * data)
int	xmlRegExecPushString2		(xmlRegExecCtxtPtr exec, 
const xmlChar * value,
const xmlChar * value2,
void * data)
void	xmlRegFreeExecCtxt		(xmlRegExecCtxtPtr exec)
void	xmlRegFreeRegexp		(xmlRegexpPtr regexp)
xmlRegExecCtxtPtr	xmlRegNewExecCtxt	(xmlRegexpPtr comp, 
xmlRegExecCallbacks callback,
void * data)
xmlRegexpPtr	xmlRegexpCompile	(const xmlChar * regexp)
int	xmlRegexpExec			(xmlRegexpPtr comp, 
const xmlChar * content)
int	xmlRegexpIsDeterminist		(xmlRegexpPtr comp)
void	xmlRegexpPrint			(FILE * output, 
xmlRegexpPtr regexp)

Description

Structure xmlExpCtxt

Structure xmlExpCtxt
struct _xmlExpCtxt { The content of this structure is not made public by the API. }

Structure xmlExpNode

Structure xmlExpNode
struct _xmlExpNode { The content of this structure is not made public by the API. }

Enum xmlExpNodeType

Enum xmlExpNodeType {
    XML_EXP_EMPTY = 0
    XML_EXP_FORBID = 1
    XML_EXP_ATOM = 2
    XML_EXP_SEQ = 3
    XML_EXP_OR = 4
    XML_EXP_COUNT = 5
}

Structure xmlRegExecCtxt

Structure xmlRegExecCtxt
struct _xmlRegExecCtxt { The content of this structure is not made public by the API. }
A libxml progressive regular expression evaluation context

Structure xmlRegexp

Structure xmlRegexp
struct _xmlRegexp { The content of this structure is not made public by the API. }
A libxml regular expression, they can actually be far more complex thank the POSIX regex expressions.

Function: xmlExpCtxtNbCons

int	xmlExpCtxtNbCons		(xmlExpCtxtPtr ctxt)

Debugging facility provides the number of allocated nodes over lifetime

ctxt:an expression context
Returns:the number of nodes ever allocated or -1 in case of error

Function: xmlExpCtxtNbNodes

int	xmlExpCtxtNbNodes		(xmlExpCtxtPtr ctxt)

Debugging facility provides the number of allocated nodes at a that point

ctxt:an expression context
Returns:the number of nodes in use or -1 in case of error

Function: xmlExpDump

void	xmlExpDump			(xmlBufferPtr buf, 
xmlExpNodePtr expr)

Serialize the expression as compiled to the buffer

buf:a buffer to receive the output
expr:the compiled expression

Function: xmlExpExpDerive

xmlExpNodePtr	xmlExpExpDerive		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
xmlExpNodePtr sub)

Evaluates the expression resulting from @exp consuming a sub expression @sub Based on algebraic derivation and sometimes direct Brzozowski derivation it usually tatkes less than linear time and can handle expressions generating infinite languages.

ctxt:the expressions context
exp:the englobing expression
sub:the subexpression
Returns:the resulting expression or NULL in case of internal error, the result must be freed

Function: xmlExpFree

void	xmlExpFree			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp)

Dereference the expression

ctxt:the expression context
exp:the expression

Function: xmlExpFreeCtxt

void	xmlExpFreeCtxt			(xmlExpCtxtPtr ctxt)

Free an expression context

ctxt:an expression context

Function: xmlExpGetLanguage

int	xmlExpGetLanguage		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar ** langList,
int len)

Find all the strings used in @exp and store them in @list

ctxt:the expression context
exp:the expression
langList:where to store the tokens
len:the allocated length of @list
Returns:the number of unique strings found, -1 in case of errors and -2 if there is more than @len strings

Function: xmlExpGetStart

int	xmlExpGetStart			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar ** tokList,
int len)

Find all the strings that appears at the start of the languages accepted by @exp and store them in @list. E.g. for (a, b) | c it will return the list [a, c]

ctxt:the expression context
exp:the expression
tokList:where to store the tokens
len:the allocated length of @list
Returns:the number of unique strings found, -1 in case of errors and -2 if there is more than @len strings

Function: xmlExpIsNillable

int	xmlExpIsNillable		(xmlExpNodePtr exp)

Finds if the expression is nillable, i.e. if it accepts the empty sequqnce

exp:the expression
Returns:1 if nillable, 0 if not and -1 in case of error

Function: xmlExpMaxToken

int	xmlExpMaxToken			(xmlExpNodePtr expr)

Indicate the maximum number of input a expression can accept

expr:a compiled expression
Returns:the maximum length or -1 in case of error

Function: xmlExpNewAtom

xmlExpNodePtr	xmlExpNewAtom		(xmlExpCtxtPtr ctxt, 
const xmlChar * name,
int len)

Get the atom associated to this name from that context

ctxt:the expression context
name:the atom name
len:the atom name length in byte (or -1);
Returns:the node or NULL in case of error

Function: xmlExpNewCtxt

xmlExpCtxtPtr	xmlExpNewCtxt		(int maxNodes, 
xmlDictPtr dict)

Creates a new context for manipulating expressions

maxNodes:the maximum number of nodes
dict:optional dictionnary to use internally
Returns:the context or NULL in case of error

Function: xmlExpNewOr

xmlExpNodePtr	xmlExpNewOr		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr left,
xmlExpNodePtr right)

Get the atom associated to the choice @left | @right Note that @left and @right are consumed in the operation, to keep an handle on them use xmlExpRef() and use xmlExpFree() to release them, this is true even in case of failure (unless ctxt == NULL).

ctxt:the expression context
left:left expression
right:right expression
Returns:the node or NULL in case of error

Function: xmlExpNewRange

xmlExpNodePtr	xmlExpNewRange		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr subset,
int min,
int max)

Get the atom associated to the range (@subset){@min, @max} Note that @subset is consumed in the operation, to keep an handle on it use xmlExpRef() and use xmlExpFree() to release it, this is true even in case of failure (unless ctxt == NULL).

ctxt:the expression context
subset:the expression to be repeated
min:the lower bound for the repetition
max:the upper bound for the repetition, -1 means infinite
Returns:the node or NULL in case of error

Function: xmlExpNewSeq

xmlExpNodePtr	xmlExpNewSeq		(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr left,
xmlExpNodePtr right)

Get the atom associated to the sequence @left , @right Note that @left and @right are consumed in the operation, to keep an handle on them use xmlExpRef() and use xmlExpFree() to release them, this is true even in case of failure (unless ctxt == NULL).

ctxt:the expression context
left:left expression
right:right expression
Returns:the node or NULL in case of error

Function: xmlExpParse

xmlExpNodePtr	xmlExpParse		(xmlExpCtxtPtr ctxt, 
const char * expr)

Minimal parser for regexps, it understand the following constructs - string terminals - choice operator | - sequence operator , - subexpressions (...) - usual cardinality operators + * and ? - finite sequences { min, max } - infinite sequences { min, * } There is minimal checkings made especially no checking on strings values

ctxt:the expressions context
expr:the 0 terminated string
Returns:a new expression or NULL in case of failure

Function: xmlExpRef

void	xmlExpRef			(xmlExpNodePtr exp)

Increase the reference count of the expression

exp:the expression

Function: xmlExpStringDerive

xmlExpNodePtr	xmlExpStringDerive	(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
const xmlChar * str,
int len)

Do one step of Brzozowski derivation of the expression @exp with respect to the input string

ctxt:the expression context
exp:the expression
str:the string
len:the string len in bytes if available
Returns:the resulting expression or NULL in case of internal error

Function: xmlExpSubsume

int	xmlExpSubsume			(xmlExpCtxtPtr ctxt, 
xmlExpNodePtr exp,
xmlExpNodePtr sub)

Check whether @exp accepts all the languages accexpted by @sub the input being a subexpression.

ctxt:the expressions context
exp:the englobing expression
sub:the subexpression
Returns:1 if true 0 if false and -1 in case of failure.

Function type: xmlRegExecCallbacks

Function type: xmlRegExecCallbacks
void	xmlRegExecCallbacks		(xmlRegExecCtxtPtr exec, 
const xmlChar * token,
void * transdata,
void * inputdata)

Callback function when doing a transition in the automata

exec:the regular expression context
token:the current token string
transdata:transition data
inputdata:input data

Function: xmlRegExecErrInfo

int	xmlRegExecErrInfo		(xmlRegExecCtxtPtr exec, 
const xmlChar ** string,
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal)

Extract error informations from the regexp execution, the parameter @string will be updated with the value pushed and not accepted, the parameter @values must point to an array of @nbval string pointers on return nbval will contain the number of possible strings in that state and the @values array will be updated with them. The string values

exec:a regexp execution context generating an error
string:return value for the error string
nbval:pointer to the number of accepted values IN/OUT
nbneg:return number of negative transitions
values:pointer to the array of acceptable values
terminal:return value if this was a terminal state
Returns:will be freed with the @exec context and don't need to be deallocated. Returns: 0 in case of success or -1 in case of error.

Function: xmlRegExecNextValues

int	xmlRegExecNextValues		(xmlRegExecCtxtPtr exec, 
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal)

Extract informations from the regexp execution, the parameter @values must point to an array of @nbval string pointers on return nbval will contain the number of possible strings in that state and the @values array will be updated with them. The string values

exec:a regexp execution context
nbval:pointer to the number of accepted values IN/OUT
nbneg:return number of negative transitions
values:pointer to the array of acceptable values
terminal:return value if this was a terminal state
Returns:will be freed with the @exec context and don't need to be deallocated. Returns: 0 in case of success or -1 in case of error.

Function: xmlRegExecPushString

int	xmlRegExecPushString		(xmlRegExecCtxtPtr exec, 
const xmlChar * value,
void * data)

Push one input token in the execution context

exec:a regexp execution context or NULL to indicate the end
value:a string token input
data:data associated to the token to reuse in callbacks
Returns:1 if the regexp reached a final state, 0 if non-final, and a negative value in case of error.

Function: xmlRegExecPushString2

int	xmlRegExecPushString2		(xmlRegExecCtxtPtr exec, 
const xmlChar * value,
const xmlChar * value2,
void * data)

Push one input token in the execution context

exec:a regexp execution context or NULL to indicate the end
value:the first string token input
value2:the second string token input
data:data associated to the token to reuse in callbacks
Returns:1 if the regexp reached a final state, 0 if non-final, and a negative value in case of error.

Function: xmlRegFreeExecCtxt

void	xmlRegFreeExecCtxt		(xmlRegExecCtxtPtr exec)

Free the structures associated to a regular expression evaulation context.

exec:a regular expression evaulation context

Function: xmlRegFreeRegexp

void	xmlRegFreeRegexp		(xmlRegexpPtr regexp)

Free a regexp

regexp:the regexp

Function: xmlRegNewExecCtxt

xmlRegExecCtxtPtr	xmlRegNewExecCtxt	(xmlRegexpPtr comp, 
xmlRegExecCallbacks callback,
void * data)

Build a context used for progressive evaluation of a regexp.

comp:a precompiled regular expression
callback:a callback function used for handling progresses in the automata matching phase
data:the context data associated to the callback in this context
Returns:the new context

Function: xmlRegexpCompile

xmlRegexpPtr	xmlRegexpCompile	(const xmlChar * regexp)

Parses a regular expression conforming to XML Schemas Part 2 Datatype Appendix F and builds an automata suitable for testing strings against that regular expression

regexp:a regular expression string
Returns:the compiled expression or NULL in case of error

Function: xmlRegexpExec

int	xmlRegexpExec			(xmlRegexpPtr comp, 
const xmlChar * content)

Check if the regular expression generates the value

comp:the compiled regular expression
content:the value to check against the regular expression
Returns:1 if it matches, 0 if not and a negative value in case of error

Function: xmlRegexpIsDeterminist

int	xmlRegexpIsDeterminist		(xmlRegexpPtr comp)

Check if the regular expression is determinist

comp:the compiled regular expression
Returns:1 if it yes, 0 if not and a negative value in case of error

Function: xmlRegexpPrint

void	xmlRegexpPrint			(FILE * output, 
xmlRegexpPtr regexp)

Print the content of the compiled regular expression

output:the file for the output debug
regexp:the compiled regexp

Daniel Veillard