HTML Tidy 5.8.0
The HTACG Tidy HTML Project
Loading...
Searching...
No Matches
parser.h
Go to the documentation of this file.
1#ifndef __PARSER_H__
2#define __PARSER_H__
3
4/**************************************************************************//**
5 * @file
6 * HTML and XML Parsers.
7 *
8 * Tidy's HTML parser corrects many conditions and enforces certain user
9 * preferences during the parsing process. The XML parser produces a tree
10 * of nodes useful to Tidy but also suitable for use in other XML processing
11 * applications.
12 *
13 * @author HTACG, et al (consult git log)
14 *
15 * @copyright
16 * Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
17 * Institute of Technology, European Research Consortium for Informatics
18 * and Mathematics, Keio University) and HTACG.
19 * @par
20 * All Rights Reserved.
21 * @par
22 * See `tidy.h` for the complete license.
23 *
24 * @date Additional updates: consult git log
25 *
26 ******************************************************************************/
27
28#include "forward.h"
29
30/** @addtogroup internal_api */
31/** @{ */
32
33
34/***************************************************************************//**
35 ** @defgroup parser_h HTML and XML Parsing
36 **
37 ** These functions and structures form the internal API for document
38 ** parsing.
39 **
40 ** @{
41 ******************************************************************************/
42
43
44/**
45 * Is used to perform a node integrity check recursively after parsing
46 * an HTML or XML document.
47 * @note Actual performance of this check can be disabled by defining the
48 * macro NO_NODE_INTEGRITY_CHECK.
49 * @param node The root node for the integrity check.
50 * @returns Returns yes or no indicating integrity of the node structure.
51 */
52TY_PRIVATE Bool TY_(CheckNodeIntegrity)(Node *node);
53
54
55/**
56 * Indicates whether or not a text node ends with a space or newline.
57 * @note Implementation of this method is found in `pprint.c` for
58 * some reason.
59 * @param lexer A reference to the lexer used to lex the document.
60 * @param node The node to check.
61 * @returns The result of the check.
62 */
63TY_PRIVATE Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
64
65
66/**
67 * Used to check if a node uses CM_NEW, which determines how attributes
68 * without values should be printed. This was introduced to deal with
69 * user-defined tags e.g. ColdFusion.
70 * @param node The node to check.
71 * @returns The result of the check.
72 */
73TY_PRIVATE Bool TY_(IsNewNode)(Node *node);
74
75
76/**
77 * Transforms a given node to another element, for example, from a `p`
78 * to a `br`.
79 * @param doc The document which the node belongs to.
80 * @param node The node to coerce.
81 * @param tid The tag type to coerce the node into.
82 * @param obsolete If the old node was obsolete, a report will be generated.
83 * @param expected If the old node was not expected to be found in this
84 * particular location, a report will be generated.
85 */
86TY_PRIVATE void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool expected);
87
88
89/**
90 * Extract a node and its children from a markup tree.
91 * @param node The node to remove.
92 * @returns Returns the removed node.
93 */
94TY_PRIVATE Node *TY_(RemoveNode)(Node *node);
95
96
97/**
98 * Remove node from markup tree and discard it.
99 * @param doc The Tidy document from which to discarb the node.
100 * @param element The node to discard.
101 * @returns Returns the next node.
102 */
103TY_PRIVATE Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element);
104
105
106/**
107 * Insert node into markup tree as the firt element of content of element.
108 * @param element The new destination node.
109 * @param node The node to insert.
110 */
111TY_PRIVATE void TY_(InsertNodeAtStart)(Node *element, Node *node);
112
113
114/**
115 * Insert node into markup tree as the last element of content of element.
116 * @param element The new destination node.
117 * @param node The node to insert.
118 */
119TY_PRIVATE void TY_(InsertNodeAtEnd)(Node *element, Node *node);
120
121
122/**
123 * Insert node into markup tree before element.
124 * @param element The node before which the node is inserted.
125 * @param node The node to insert.
126 */
127TY_PRIVATE void TY_(InsertNodeBeforeElement)(Node *element, Node *node);
128
129
130/**
131 * Insert node into markup tree after element.
132 * @param element The node after which the node is inserted.
133 * @param node The node to insert.
134 */
135TY_PRIVATE void TY_(InsertNodeAfterElement)(Node *element, Node *node);
136
137
138/**
139 * Trims a single, empty element, returning the next node.
140 * @param doc The Tidy document.
141 * @param element The element to trim.
142 * @returns Returns the next node.
143 */
144TY_PRIVATE Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element );
145
146
147/**
148 * Trims a tree of empty elements recursively, returning the next node.
149 * @param doc The Tidy document.
150 * @param node The element to trim.
151 * @returns Returns the next node.
152 */
153TY_PRIVATE Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node);
154
155
156/**
157 * Indicates whether or not a text node is blank, meaning that it consists
158 * of nothing, or a single space.
159 * @param lexer The lexer used to lex the document.
160 * @param node The node to test.
161 * @returns Returns the result of the test.
162 */
163TY_PRIVATE Bool TY_(IsBlank)(Lexer *lexer, Node *node);
164
165
166/**
167 * Indicates whether or not a node is declared as containing javascript
168 * code.
169 * @param node The node to test.
170 * @returns Returns the result of the test.
171 */
172TY_PRIVATE Bool TY_(IsJavaScript)(Node *node);
173
174
175/**
176 * Parses a document after lexing using the HTML parser. It begins by properly
177 * configuring the overall HTML structure, and subsequently processes all
178 * remaining nodes. HTML is the root node.
179 * @param doc The Tidy document.
180 */
181TY_PRIVATE void TY_(ParseDocument)( TidyDocImpl* doc );
182
183
184/**
185 * Indicates whether or not whitespace is to be preserved in XHTML/XML
186 * documents.
187 * @param doc The Tidy document.
188 * @param element The node to test.
189 * @returns Returns the result of the test.
190 */
191TY_PRIVATE Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element );
192
193
194/**
195 * Parses a document after lexing using the XML parser.
196 * @param doc The Tidy document.
197 */
198TY_PRIVATE void TY_(ParseXMLDocument)( TidyDocImpl* doc );
199
200
201/** @} end parser_h group */
202/** @} end internal_api group */
203
204#endif /* __PARSER_H__ */
205
#define TY_PRIVATE
Definition forward.h:29
#define TY_(str)
Definition forward.h:23
TidyTagId
Known HTML element types.
Definition tidyenum.h:845
Bool
Definition tidyplatform.h:647