...
......
...
*/ if (node->prev == NULL && !TY_(nodeHasCM)(node->parent, CM_INLINE)) return yes; /*...
... */ if (node->prev && !TY_(nodeHasCM)(node->prev, CM_INLINE) && TY_(nodeIsElement)(node->prev)) return yes; /*...
*/ if (!node->prev && !node->parent->prev && !TY_(nodeHasCM)(node->parent->parent, CM_INLINE)) return yes; return no; } /** * Indicates whether or not trailing whitespace should be cleaned. */ static Bool CleanTrailingWhitespace(TidyDocImpl* doc, Node* node) { Node* next; if (!TY_(nodeIsText)(node)) return no; if (node->parent->type == DocTypeTag) return no; if (IsPreDescendant(node)) return no; if (node->parent->tag && node->parent->tag->parser == TY_(ParseScript)) return no; /* #523, prevent blank spaces after script if the next item is script. * This is actually more generalized as, if the next element is * a body level script, then indicate that we want to clean trailing * whitespace. */ if ( node->next && nodeIsSCRIPT(node->next) && nodeIsBODY(node->next->parent) ) return yes; next = node->next; /*...
*/ if (!next && !TY_(nodeHasCM)(node->parent, CM_INLINE)) return yes; /*...
...
*/ if (next->type == StartTag) return yes; /* ...*/ if (next->type == StartEndTag) return yes; /* evil adjacent text nodes, Tidy should not generate these :-( */ if (TY_(nodeIsText)(next) && next->start < next->end && TY_(IsWhite)(doc->lexer->lexbuf[next->start])) return yes; return no; } /***************************************************************************//* ** MARK: - Information Accumulation ***************************************************************************/ /** * Errors in positioning of form start or end tags * generally require human intervention to fix. * Issue #166 - repeated
hello world * to *
hello world
*
* Trims initial space, by moving it before the
* start tag, or if this element is the first in
* parent's content, then by discarding the space
*/
static void TrimInitialSpace( TidyDocImpl* doc, Node *element, Node *text )
{
Lexer* lexer = doc->lexer;
Node *prev, *node;
if ( TY_(nodeIsText)(text) &&
lexer->lexbuf[text->start] == ' ' &&
text->start < text->end )
{
if ( (element->tag->model & CM_INLINE) &&
!(element->tag->model & CM_FIELD) )
{
prev = element->prev;
if (TY_(nodeIsText)(prev))
{
if (prev->end == 0 || lexer->lexbuf[prev->end - 1] != ' ')
lexer->lexbuf[(prev->end)++] = ' ';
++(element->start);
}
else /* create new node */
{
node = TY_(NewNode)(lexer->allocator, lexer);
node->start = (element->start)++;
node->end = element->start;
lexer->lexbuf[node->start] = ' ';
TY_(InsertNodeBeforeElement)(element ,node);
DEBUG_LOG(SPRTF("TrimInitialSpace: Created text node, inserted before <%s>\n",
(element->element ? element->element : "unknown")));
}
}
/* discard the space in current node */
++(text->start);
}
}
/**
* This maps
* hello world
* to
* hello world
*
* If last child of element is a text node
* then trim trailing white space character
* moving it to after element's end tag.
*/
static void TrimTrailingSpace( TidyDocImpl* doc, Node *element, Node *last )
{
Lexer* lexer = doc->lexer;
byte c;
if (TY_(nodeIsText)(last))
{
if (last->end > last->start)
{
c = (byte) lexer->lexbuf[ last->end - 1 ];
if ( c == ' ' )
{
last->end -= 1;
if ( (element->tag->model & CM_INLINE) &&
!(element->tag->model & CM_FIELD) )
lexer->insertspace = yes;
}
}
}
}
/**
* Move initial and trailing space out.
* This routine maps:
* hello world
* to
* hello world
* and
* hello world
* to
* hello world
*/
static void TrimSpaces( TidyDocImpl* doc, Node *element)
{
Node* text = element->content;
if (nodeIsPRE(element) || IsPreDescendant(element))
return;
if (TY_(nodeIsText)(text))
TrimInitialSpace(doc, element, text);
text = element->last;
if (TY_(nodeIsText)(text))
TrimTrailingSpace(doc, element, text);
}
/***************************************************************************//*
** MARK: - Parsers Support
***************************************************************************/
/**
* Structure used by FindDescendant_cb.
*/
struct MatchingDescendantData
{
Node *found_node;
Bool *passed_marker_node;
/* input: */
TidyTagId matching_tagId;
Node *node_to_find;
Node *marker_node;
};
/**
* The main engine for FindMatchingDescendant.
*/
static NodeTraversalSignal FindDescendant_cb(TidyDocImpl* ARG_UNUSED(doc), Node* node, void *propagate)
{
struct MatchingDescendantData *cb_data = (struct MatchingDescendantData *)propagate;
if (TagId(node) == cb_data->matching_tagId)
{
/* make sure we match up 'unknown' tags exactly! */
if (cb_data->matching_tagId != TidyTag_UNKNOWN ||
(node->element != NULL &&
cb_data->node_to_find != NULL &&
cb_data->node_to_find->element != NULL &&
0 == TY_(tmbstrcmp)(cb_data->node_to_find->element, node->element)))
{
cb_data->found_node = node;
return ExitTraversal;
}
}
if (cb_data->passed_marker_node && node == cb_data->marker_node)
*cb_data->passed_marker_node = yes;
return VisitParent;
}
/**
* Search the parent chain (from `parent` upwards up to the root) for a node
* matching the given 'node'.
*
* When the search passes beyond the `marker_node` (which is assumed to sit
* in the parent chain), this will be flagged by setting the boolean
* referenced by `is_parent_of_marker` to `yes`.
*
* 'is_parent_of_marker' and 'marker_node' are optional parameters and may
* be NULL.
*/
static Node *FindMatchingDescendant( Node *parent, Node *node, Node *marker_node, Bool *is_parent_of_marker )
{
struct MatchingDescendantData cb_data = { 0 };
cb_data.matching_tagId = TagId(node);
cb_data.node_to_find = node;
cb_data.marker_node = marker_node;
assert(node);
if (is_parent_of_marker)
*is_parent_of_marker = no;
TY_(TraverseNodeTree)(NULL, parent, FindDescendant_cb, &cb_data);
return cb_data.found_node;
}
/**
* Finds the last list item for the given list, providing it in the
* in-out parameter. Returns yes or no if the item was the last list
* item.
*/
static Bool FindLastLI( Node *list, Node **lastli )
{
Node *node;
*lastli = NULL;
for ( node = list->content; node ; node = node->next )
if ( nodeIsLI(node) && node->type == StartTag )
*lastli=node;
return *lastli ? yes:no;
}
/***************************************************************************//*
** MARK: - Parser Stack
***************************************************************************/
/**
* Allocates and initializes the parser's stack.
*/
void TY_(InitParserStack)( TidyDocImpl* doc )
{
enum { default_size = 32 };
TidyParserMemory *content = (TidyParserMemory *) TidyAlloc( doc->allocator, sizeof(TidyParserMemory) * default_size );
doc->stack.content = content;
doc->stack.size = default_size;
doc->stack.top = -1;
}
/**
* Frees the parser's stack when done.
*/
void TY_(FreeParserStack)( TidyDocImpl* doc )
{
TidyFree( doc->allocator, doc->stack.content );
doc->stack.content = NULL;
doc->stack.size = 0;
doc->stack.top = -1;
}
/**
* Increase the stack size.
*/
static void growParserStack( TidyDocImpl* doc )
{
TidyParserMemory *content;
content = (TidyParserMemory *) TidyAlloc( doc->allocator, sizeof(TidyParserMemory) * doc->stack.size * 2 );
memcpy( content, doc->stack.content, sizeof(TidyParserMemory) * (doc->stack.top + 1) );
TidyFree(doc->allocator, doc->stack.content);
doc->stack.content = content;
doc->stack.size = doc->stack.size * 2;
}
/**
* Indicates whether or not the stack is empty.
*/
Bool TY_(isEmptyParserStack)( TidyDocImpl* doc )
{
return doc->stack.top < 0;
}
/**
* Peek at the parser memory.
*/
TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top];
}
/**
* Peek at the parser memory "identity" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top].identity;
}
/**
* Peek at the parser memory "mode" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc )
{
return doc->stack.content[doc->stack.top].mode;
}
/**
* Pop out a parser memory.
*/
TidyParserMemory TY_(popMemory)( TidyDocImpl* doc )
{
if ( !TY_(isEmptyParserStack)( doc ) )
{
TidyParserMemory data = doc->stack.content[doc->stack.top];
DEBUG_LOG(SPRTF("\n"
"<--POP original: %s @ %p\n"
" reentry: %s @ %p\n"
" stack depth: %lu @ %p\n"
" mode: %u\n"
" register 1: %i\n"
" register 2: %i\n\n",
data.original_node ? data.original_node->element : "none", data.original_node,
data.reentry_node ? data.reentry_node->element : "none", data.reentry_node,
doc->stack.top, &doc->stack.content[doc->stack.top],
data.mode,
data.register_1,
data.register_2
));
doc->stack.top = doc->stack.top - 1;
return data;
}
TidyParserMemory blank = { NULL };
return blank;
}
/**
* Push the parser memory to the stack.
*/
void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data )
{
if ( doc->stack.top == doc->stack.size - 1 )
growParserStack( doc );
doc->stack.top++;
doc->stack.content[doc->stack.top] = data;
DEBUG_LOG(SPRTF("\n"
"-->PUSH original: %s @ %p\n"
" reentry: %s @ %p\n"
" stack depth: %lu @ %p\n"
" mode: %u\n"
" register 1: %i\n"
" register 2: %i\n\n",
data.original_node ? data.original_node->element : "none", data.original_node,
data.reentry_node ? data.reentry_node->element : "none", data.reentry_node,
doc->stack.top, &doc->stack.content[doc->stack.top],
data.mode,
data.register_1,
data.register_2
));
}
/***************************************************************************//*
** MARK: Convenience Logging Macros
***************************************************************************/
#if defined(ENABLE_DEBUG_LOG)
# define DEBUG_LOG_COUNTERS \
static int depth_parser = 0;\
static int count_parser = 0;\
int old_mode = IgnoreWhitespace;
# define DEBUG_LOG_GET_OLD_MODE old_mode = mode;
# define DEBUG_LOG_REENTER_WITH_NODE(NODE) SPRTF("\n>>>Re-Enter %s-%u with '%s', +++mode: %u, depth: %d, cnt: %d\n", __FUNCTION__, __LINE__, NODE->element, mode, ++depth_parser, ++count_parser);
# define DEBUG_LOG_ENTER_WITH_NODE(NODE) SPRTF("\n>>>Enter %s-%u with '%s', +++mode: %u, depth: %d, cnt: %d\n", __FUNCTION__, __LINE__, NODE->element, mode, ++depth_parser, ++count_parser);
# define DEBUG_LOG_CHANGE_MODE SPRTF("+++%s-%u Changing mode to %u (was %u)\n", __FUNCTION__, __LINE__, mode, old_mode);
# define DEBUG_LOG_GOT_TOKEN(NODE) SPRTF("---%s-%u got token '%s' with mode '%u'.\n", __FUNCTION__, __LINE__, NODE ? NODE->element : NULL, mode);
# define DEBUG_LOG_EXIT_WITH_NODE(NODE) SPRTF("<<
*/
if ( nodeIsBR(node) )
TrimSpaces( doc, element );
TY_(InsertNodeAtEnd)(element, node);
if (node->implicit)
TY_(Report)(doc, element, node, INSERTING_TAG );
/* Issue #212 - WHY is this hard coded to 'IgnoreWhitespace' while an
effort has been made above to set a 'MixedContent' mode in some cases?
WHY IS THE 'mode' VARIABLE NOT USED HERE???? */
{
TidyParserMemory memory = {0};
memory.identity = TY_(ParseBlock);
memory.reentry_node = node;
memory.reentry_mode = mode;
memory.original_node = element;
TY_(pushMemory)(doc, memory);
DEBUG_LOG_EXIT_WITH_NODE(node);
}
return node;
}
/* discard unexpected tags */
if (node->type == EndTag)
TY_(PopInline)( doc, node ); /* if inline end tag */
TY_(Report)(doc, element, node, DISCARDING_UNEXPECTED );
TY_(FreeNode)( doc, node );
continue;
}
if (!(element->tag->model & CM_OPT))
TY_(Report)(doc, element, node, MISSING_ENDTAG_FOR);
if (element->tag->model & CM_OBJECT)
{
/* pop inline stack */
while ( lexer->istacksize > lexer->istackbase )
TY_(PopInline)( doc, NULL );
lexer->istackbase = istackbase;
}
TrimSpaces( doc, element );
DEBUG_LOG_EXIT;
return NULL;
}
/** MARK: TY_(ParseBody)
* Parses the `body` tag.
*
* This is a non-recursing parser. It uses the document's parser memory stack
* to send subsequent nodes back to the controller for dispatching to parsers.
* This parser is also re-enterable, so that post-processing can occur after
* such dispatching.
*/
Node* TY_(ParseBody)( TidyDocImpl* doc, Node *body, GetTokenMode mode )
{
Lexer* lexer = doc->lexer;
Node *node = NULL;
Bool checkstack = no;
Bool iswhitenode = no;
DEBUG_LOG_COUNTERS;
mode = IgnoreWhitespace;
checkstack = yes;
/*
If we're re-entering, then we need to setup from a previous state,
instead of starting fresh. We can pull what we need from the document's
stack.
*/
if ( body == NULL )
{
TidyParserMemory memory = TY_(popMemory)( doc );
node = memory.reentry_node; /* Throwaway, as main loop overrwrites anyway. */
DEBUG_LOG_REENTER_WITH_NODE(node);
body = memory.original_node;
checkstack = memory.register_1;
iswhitenode = memory.register_2;
DEBUG_LOG_GET_OLD_MODE;
mode = memory.mode;
DEBUG_LOG_CHANGE_MODE;
}
else
{
DEBUG_LOG_ENTER_WITH_NODE(body);
TY_(BumpObject)( doc, body->parent );
}
while ((node = TY_(GetToken)(doc, mode)) != NULL)
{
DEBUG_LOG_GOT_TOKEN(node);
/* find and discard multiple elements */
if (node->tag == body->tag && node->type == StartTag)
{
TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)(doc, node);
continue;
}
/* #538536 Extra endtags not detected */
if ( nodeIsHTML(node) )
{
if (TY_(nodeIsElement)(node) || lexer->seenEndHtml)
TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
else
lexer->seenEndHtml = 1;
TY_(FreeNode)( doc, node);
continue;
}
if ( lexer->seenEndBody &&
( node->type == StartTag ||
node->type == EndTag ||
node->type == StartEndTag ) )
{
TY_(Report)(doc, body, node, CONTENT_AFTER_BODY );
}
if ( node->tag == body->tag && node->type == EndTag )
{
body->closed = yes;
TrimSpaces(doc, body);
TY_(FreeNode)( doc, node);
lexer->seenEndBody = 1;
DEBUG_LOG_GET_OLD_MODE;
mode = IgnoreWhitespace;
DEBUG_LOG_CHANGE_MODE;
if ( nodeIsNOFRAMES(body->parent) )
break;
continue;
}
if ( nodeIsNOFRAMES(node) )
{
if (node->type == StartTag)
{
TidyParserMemory memory = {0};
TY_(InsertNodeAtEnd)(body, node);
memory.identity = TY_(ParseBody);
memory.original_node = body;
memory.reentry_node = node;
memory.register_1 = checkstack;
memory.register_2 = iswhitenode;
memory.mode = mode;
TY_(pushMemory)( doc, memory );
DEBUG_LOG_EXIT_WITH_NODE(node);
return node;
}
if (node->type == EndTag && nodeIsNOFRAMES(body->parent) )
{
TrimSpaces(doc, body);
TY_(UngetToken)( doc );
break;
}
}
if ( (nodeIsFRAME(node) || nodeIsFRAMESET(node))
&& nodeIsNOFRAMES(body->parent) )
{
TrimSpaces(doc, body);
TY_(UngetToken)( doc );
break;
}
iswhitenode = no;
if ( TY_(nodeIsText)(node) &&
node->end <= node->start + 1 &&
lexer->lexbuf[node->start] == ' ' )
iswhitenode = yes;
/* deal with comments etc. */
if (InsertMisc(body, node))
continue;
/* mixed content model permits text */
if (TY_(nodeIsText)(node))
{
if (iswhitenode && mode == IgnoreWhitespace)
{
TY_(FreeNode)( doc, node);
continue;
}
/* HTML 2 and HTML4 strict don't allow text here */
TY_(ConstrainVersion)(doc, ~(VERS_HTML40_STRICT | VERS_HTML20));
if (checkstack)
{
checkstack = no;
if ( TY_(InlineDup)(doc, node) > 0 )
continue;
}
TY_(InsertNodeAtEnd)(body, node);
DEBUG_LOG_GET_OLD_MODE;
mode = MixedContent;
DEBUG_LOG_CHANGE_MODE;
continue;
}
if (node->type == DocTypeTag)
{
InsertDocType(doc, body, node);
continue;
}
/* discard unknown and PARAM tags */
if ( node->tag == NULL || nodeIsPARAM(node) )
{
TY_(Report)(doc, body, node, DISCARDING_UNEXPECTED);
TY_(FreeNode)( doc, node);
continue;
}
/*
Netscape allows LI and DD directly in BODY
We infer UL or DL respectively and use this
Bool to exclude block-level elements so as
to match Netscape's observed behaviour.
*/
lexer->excludeBlocks = no;
if ((( nodeIsINPUT(node) ||
(!TY_(nodeHasCM)(node, CM_BLOCK) && !TY_(nodeHasCM)(node, CM_INLINE))
) && !TY_(IsHTML5Mode)(doc)) || nodeIsLI(node) )
{
/* avoid this error message being issued twice */
if (!(node->tag->model & CM_HEAD))
TY_(Report)(doc, body, node, TAG_NOT_ALLOWED_IN);
if (node->tag->model & CM_HTML)
{
/* copy body attributes if current body was inferred */
if ( nodeIsBODY(node) && body->implicit
&& body->attributes == NULL )
{
body->attributes = node->attributes;
node->attributes = NULL;
}
TY_(FreeNode)( doc, node);
continue;
}
if (node->tag->model & CM_HEAD)
{
MoveToHead(doc, body, node);
continue;
}
if (node->tag->model & CM_LIST)
{
TY_(UngetToken)( doc );
node = TY_(InferredTag)(doc, TidyTag_UL);
AddClassNoIndent(doc, node);
lexer->excludeBlocks = yes;
}
else if (node->tag->model & CM_DEFLIST)
{
TY_(UngetToken)( doc );
node = TY_(InferredTag)(doc, TidyTag_DL);
lexer->excludeBlocks = yes;
}
else if (node->tag->model & (CM_TABLE | CM_ROWGRP | CM_ROW))
{
/* http://tidy.sf.net/issue/2855621 */
if (node->type != EndTag) {
TY_(UngetToken)( doc );
node = TY_(InferredTag)(doc, TidyTag_TABLE);
}
lexer->excludeBlocks = yes;
}
else if ( nodeIsINPUT(node) )
{
TY_(UngetToken)( doc );
node = TY_(InferredTag)(doc, TidyTag_FORM);
lexer->excludeBlocks = yes;
}
else
{
if ( !TY_(nodeHasCM)(node, CM_ROW | CM_FIELD) )
{
TY_(UngetToken)( doc );
DEBUG_LOG_EXIT;
return NULL;
}
/* ignore