Add w3c html tidy

This commit is contained in:
Justine Tunney 2022-06-09 06:33:31 -07:00
parent ecc8962555
commit 3c7ae0fc72
63 changed files with 56239 additions and 0 deletions

View file

@ -138,6 +138,7 @@ include net/http/http.mk # │
include third_party/mbedtls/mbedtls.mk # │ include third_party/mbedtls/mbedtls.mk # │
include net/https/https.mk # │ include net/https/https.mk # │
include third_party/regex/regex.mk #─┘ include third_party/regex/regex.mk #─┘
include third_party/tidy/tidy.mk
include third_party/third_party.mk include third_party/third_party.mk
include libc/testlib/testlib.mk include libc/testlib/testlib.mk
include tool/viz/lib/vizlib.mk include tool/viz/lib/vizlib.mk

View file

@ -7,4 +7,12 @@
#include "libc/sysv/consts/s.h" #include "libc/sysv/consts/s.h"
#include "libc/sysv/consts/utime.h" #include "libc/sysv/consts/utime.h"
#include "libc/time/time.h" #include "libc/time/time.h"
#define st_atime st_atim.tv_sec
#define st_atime_nsec st_atim.tv_nsec
#define st_mtime st_mtim.tv_sec
#define st_mtime_nsec st_mtim.tv_nsec
#define st_ctime st_ctim.tv_sec
#define st_ctime_nsec st_ctim.tv_nsec
#endif #endif

View file

@ -24,6 +24,7 @@ o/$(MODE)/third_party: \
o/$(MODE)/third_party/smallz4 \ o/$(MODE)/third_party/smallz4 \
o/$(MODE)/third_party/sqlite3 \ o/$(MODE)/third_party/sqlite3 \
o/$(MODE)/third_party/stb \ o/$(MODE)/third_party/stb \
o/$(MODE)/third_party/tidy \
o/$(MODE)/third_party/xed \ o/$(MODE)/third_party/xed \
o/$(MODE)/third_party/zip \ o/$(MODE)/third_party/zip \
o/$(MODE)/third_party/zlib o/$(MODE)/third_party/zlib

101
third_party/tidy/.tidyrc vendored Normal file
View file

@ -0,0 +1,101 @@
# http://tidy.sourceforge.net/docs/quickref.html#clean
accessibility-check: 0
add-meta-charset: yes
add-xml-decl: no
add-xml-space: no
alt-text:
anchor-as-name: yes
ascii-chars: no
assume-xml-procins: no
bare: no
break-before-br: no
char-encoding: utf8
clean: yes
coerce-endtags: yes
css-prefix: c
custom-tags: no
decorate-inferred-ul: no
doctype: auto
drop-empty-elements: yes
drop-empty-paras: yes
drop-proprietary-attributes: no
enclose-block-text: yes
enclose-text: yes
error-file:
escape-cdata: no
escape-scripts: yes
fix-backslash: yes
fix-bad-comments: auto
fix-style-tags: no
fix-uri: yes
force-output: no
gdoc: no
gnu-emacs: yes
hide-comments: no
indent-attributes: no
indent-cdata: no
indent-spaces: 2
indent-with-tabs: no
indent: no
input-encoding: utf8
input-xml: no
join-classes: no
join-styles: yes
keep-tabs: no
keep-time: no
literal-attributes: no
logical-emphasis: no
lower-literals: yes
markup: yes
merge-divs: auto
merge-emphasis: yes
merge-spans: auto
mute-id: no
mute:
ncr: yes
new-blocklevel-tags:
new-empty-tags:
new-inline-tags:
new-pre-tags:
newline: LF
numeric-entities: no
omit-optional-tags: yes
output-bom: auto
output-encoding: utf8
output-file:
output-html: no
output-xhtml: no
output-xml: no
preserve-entities: no
priority-attributes:
punctuation-wrap: no
quiet: no
quote-ampersand: yes
quote-marks: no
quote-nbsp: yes
repeated-attributes: keep-last
replace-color: no
show-body-only: no
show-errors: 10
show-filename: no
show-info: yes
show-meta-change: no
show-warnings: yes
skip-nested: yes
sort-attributes: none
strict-tags-attributes: no
tab-size: 8
tidy-mark: no
uppercase-attributes: no
uppercase-tags: no
vertical-space: yes
warn-proprietary-attributes: no
word-2000: no
wrap-asp: yes
wrap-attributes: no
wrap-jste: yes
wrap-php: no
wrap-script-literals: no
wrap-sections: yes
wrap: 68
write-back: no

50
third_party/tidy/LICENSE.md vendored Normal file
View file

@ -0,0 +1,50 @@
# HTML Tidy
## HTML parser and pretty printer
Copyright (c) 1998-2016 World Wide Web Consortium
(Massachusetts Institute of Technology, European Research
Consortium for Informatics and Mathematics, Keio University).
All Rights Reserved.
Additional contributions (c) 2001-2016 University of Toronto, Terry Teague,
@geoffmcl, HTACG, and others.
### Contributing Author(s):
Dave Raggett <dsr@w3.org>
The contributing author(s) would like to thank all those who
helped with testing, bug fixes and suggestions for improvements.
This wouldn't have been possible without your help.
## COPYRIGHT NOTICE:
This software and documentation is provided "as is," and
the copyright holders and contributing author(s) make no
representations or warranties, express or implied, including
but not limited to, warranties of merchantability or fitness
for any particular purpose or that the use of the software or
documentation will not infringe any third party patents,
copyrights, trademarks or other rights.
The copyright holders and contributing author(s) will not be held
liable for any direct, indirect, special or consequential damages
arising out of any use of the software or documentation, even if
advised of the possibility of such damage.
Permission is hereby granted to use, copy, modify, and distribute
this source code, or portions hereof, documentation and executables,
for any purpose, without fee, subject to the following restrictions:
1. The origin of this source code must not be misrepresented.
2. Altered versions must be plainly marked as such and must
not be misrepresented as being the original source.
3. This Copyright notice may not be removed or altered from any
source or altered source distribution.
The copyright holders and contributing author(s) specifically
permit, without fee, and encourage the use of this source code
as a component for supporting the Hypertext Markup Language in
commercial products. If you use this source code in a product,
acknowledgement is not required but would be appreciated.

31
third_party/tidy/README.cosmo vendored Normal file
View file

@ -0,0 +1,31 @@
DESCRIPTION
HTML Tidy is a tool for spotting errors in HTML code. It's also able
to reformat source code in a configurable manner. It has outstanding
support for HTML5.
PROVENANCE
https://github.com/htacg/tidy-html5/
Commit: d08ddc2860aa95ba8e301343a30837f157977cba
Author: Jim Derry <balthisar@gmail.com>
Date: Tue Jan 25 10:17:15 2022 -0500
LICENSE
W3C License
This is a permissive license that only requires notice preservation in
sources. https://en.wikipedia.org/wiki/W3C_Software_Notice_and_License
Similar to the Apache 2.0 license any local changes must be documented
LOCAL CHANGES
No changes to tidy program. Only platform normalization.
- Obtain .tidyrc from /zip/.tidyrc rather than /etc/tidrc
- Configure tidyplatform.h
- Rename a few .h files to .inc
- Normalize header includes for cosmopolitan repo
- Delete ugly no-op macros, e.g. TIDY_CALL, TIDY_EXPORT, etc.
- Add clang-format off directives
- Delete trailing whitespace

3542
third_party/tidy/access.c vendored Normal file

File diff suppressed because it is too large Load diff

91
third_party/tidy/access.h vendored Normal file
View file

@ -0,0 +1,91 @@
#ifndef __ACCESS_H__
#define __ACCESS_H__
/* clang-format off */
/*********************************************************************
* carry out accessibility checks
*
* This module carries out processes for all accessibility checks. It
* traverses through all the content within the tree and evaluates the
* tags for accessibility.
*
* To perform the following checks, 'AccessibilityChecks' must be
* called AFTER the tree structure has been formed.
*
* If, in the command prompt or configuration file, there is no
* specification of which accessibility priorities to check, then no
* accessibility checks will be performed.
*
* The accessibility checks to perform depending on user's desire:
* 1. priority 1
* 2. priority 1 & 2
* 3. priority 1, 2, & 3
*
* Reference document: https://www.w3.org/TR/WAI-WEBCONTENT/
*
* Copyright University of Toronto
* Portions (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
* See `tidy.h` for the copyright notice.
* Programmed by: Mike Lam and Chris Ridpath
* Modifications by: Terry Teague (TRT)
* Further modifications: consult git log.
*********************************************************************/
#include "third_party/tidy/forward.h"
enum {
TEXTBUF_SIZE=128u
};
struct _TidyAccessImpl;
typedef struct _TidyAccessImpl TidyAccessImpl;
struct _TidyAccessImpl
{
/* gets set from Tidy variable AccessibilityCheckLevel */
int PRIORITYCHK; /**< */
/* Number of characters that are found within the concatenated text */
int counter;
/* list of characters in the text nodes found within a container element */
tmbchar textNode[ TEXTBUF_SIZE ];
/* The list of characters found within one text node */
tmbchar text[ TEXTBUF_SIZE ];
/* Number of frame elements found within a frameset */
int numFrames;
/* Number of 'longdesc' attributes found within a frameset */
int HasCheckedLongDesc;
int CheckedHeaders;
int ListElements;
int OtherListElements;
/* For 'USEMAP' identifier */
Bool HasUseMap;
Bool HasName;
Bool HasMap;
/* For tracking nodes that are deleted from the original parse tree - TRT */
/* Node *access_tree; */
Bool HasTH;
Bool HasValidFor;
Bool HasValidId;
Bool HasValidRowHeaders;
Bool HasValidColumnHeaders;
Bool HasInvalidRowHeader;
Bool HasInvalidColumnHeader;
int ForID;
};
void TY_(AccessibilityChecks)( TidyDocImpl* doc );
#endif /* __ACCESS_H__ */

130
third_party/tidy/alloc.c vendored Normal file
View file

@ -0,0 +1,130 @@
/* clang-format off */
/* clang-format off */
/* alloc.c -- Default memory allocation routines.
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
/* #define DEBUG_MEMORY very NOISY extra DEBUG of memory allocation, reallocation and free */
#include "third_party/tidy/tidy.h"
#include "third_party/tidy/forward.h"
#include "libc/stdio/stdio.h"
#include "libc/assert.h"
#include "third_party/tidy/sprtf.h"
static TidyMalloc g_malloc = NULL;
static TidyRealloc g_realloc = NULL;
static TidyFree g_free = NULL;
static TidyPanic g_panic = NULL;
#if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_MEMORY)
static int alloccnt = 0;
static int realloccnt = 0;
static int freecnt = 0;
#endif
Bool tidySetMallocCall( TidyMalloc fmalloc )
{
g_malloc = fmalloc;
return yes;
}
Bool tidySetReallocCall( TidyRealloc frealloc )
{
g_realloc = frealloc;
return yes;
}
Bool tidySetFreeCall( TidyFree ffree )
{
g_free = ffree;
return yes;
}
Bool tidySetPanicCall( TidyPanic fpanic )
{
g_panic = fpanic;
return yes;
}
static void defaultPanic( TidyAllocator* ARG_UNUSED(allocator), ctmbstr msg )
{
if ( g_panic )
g_panic( msg );
else
{
/* 2 signifies a serious error */
fprintf( stderr, "Fatal error: %s\n", msg );
#ifdef _DEBUG
assert(0);
#endif
exit(2);
}
}
static void* defaultAlloc( TidyAllocator* allocator, size_t size )
{
void *p = ( g_malloc ? g_malloc(size) : malloc(size) );
if ( !p )
defaultPanic( allocator,"Out of memory!");
#if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_MEMORY)
alloccnt++;
SPRTF("%d: alloc MEM %p, size %d\n", alloccnt, p, (int)size );
if (size == 0) {
SPRTF("NOTE: An allocation of ZERO bytes!!!!!!\n");
}
#endif
return p;
}
static void* defaultRealloc( TidyAllocator* allocator, void* mem, size_t newsize )
{
void *p;
if ( mem == NULL )
return defaultAlloc( allocator, newsize );
p = ( g_realloc ? g_realloc(mem, newsize) : realloc(mem, newsize) );
if (!p)
defaultPanic( allocator, "Out of memory!");
#if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_MEMORY)
realloccnt++;
SPRTF("%d: realloc MEM %p, size %d\n", realloccnt, p, (int)newsize );
#endif
return p;
}
static void defaultFree( TidyAllocator* ARG_UNUSED(allocator), void* mem )
{
if ( mem )
{
#if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_MEMORY)
freecnt++;
SPRTF("%d: free MEM %p\n", freecnt, mem );
#endif
if ( g_free )
g_free( mem );
else
free( mem );
}
}
static const TidyAllocatorVtbl defaultVtbl = {
defaultAlloc,
defaultRealloc,
defaultFree,
defaultPanic
};
TidyAllocator TY_(g_default_allocator) = {
&defaultVtbl
};
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

3670
third_party/tidy/attrdict.c vendored Normal file

File diff suppressed because it is too large Load diff

160
third_party/tidy/attrdict.h vendored Normal file
View file

@ -0,0 +1,160 @@
#ifndef __ATTRDICT_H__
#define __ATTRDICT_H__
/* clang-format off */
/* attrdict.h -- extended attribute information
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/access.h"
#include "third_party/tidy/tidy.h"
typedef struct _AttrVersion
{
TidyAttrId attribute;
uint versions;
} AttrVersion;
extern const AttrVersion TY_(W3CAttrsFor_A)[];
extern const AttrVersion TY_(W3CAttrsFor_ABBR)[];
extern const AttrVersion TY_(W3CAttrsFor_ACRONYM)[];
extern const AttrVersion TY_(W3CAttrsFor_ADDRESS)[];
extern const AttrVersion TY_(W3CAttrsFor_APPLET)[];
extern const AttrVersion TY_(W3CAttrsFor_AREA)[];
extern const AttrVersion TY_(W3CAttrsFor_B)[];
extern const AttrVersion TY_(W3CAttrsFor_BASE)[];
extern const AttrVersion TY_(W3CAttrsFor_BASEFONT)[];
extern const AttrVersion TY_(W3CAttrsFor_BDO)[];
extern const AttrVersion TY_(W3CAttrsFor_BIG)[];
extern const AttrVersion TY_(W3CAttrsFor_BLOCKQUOTE)[];
extern const AttrVersion TY_(W3CAttrsFor_BODY)[];
extern const AttrVersion TY_(W3CAttrsFor_BR)[];
extern const AttrVersion TY_(W3CAttrsFor_BUTTON)[];
extern const AttrVersion TY_(W3CAttrsFor_CAPTION)[];
extern const AttrVersion TY_(W3CAttrsFor_CENTER)[];
extern const AttrVersion TY_(W3CAttrsFor_CITE)[];
extern const AttrVersion TY_(W3CAttrsFor_CODE)[];
extern const AttrVersion TY_(W3CAttrsFor_COL)[];
extern const AttrVersion TY_(W3CAttrsFor_COLGROUP)[];
extern const AttrVersion TY_(W3CAttrsFor_DD)[];
extern const AttrVersion TY_(W3CAttrsFor_DEL)[];
extern const AttrVersion TY_(W3CAttrsFor_DFN)[];
extern const AttrVersion TY_(W3CAttrsFor_DIR)[];
extern const AttrVersion TY_(W3CAttrsFor_DIV)[];
extern const AttrVersion TY_(W3CAttrsFor_DL)[];
extern const AttrVersion TY_(W3CAttrsFor_DT)[];
extern const AttrVersion TY_(W3CAttrsFor_EM)[];
extern const AttrVersion TY_(W3CAttrsFor_FIELDSET)[];
extern const AttrVersion TY_(W3CAttrsFor_FONT)[];
extern const AttrVersion TY_(W3CAttrsFor_FORM)[];
extern const AttrVersion TY_(W3CAttrsFor_FRAME)[];
extern const AttrVersion TY_(W3CAttrsFor_FRAMESET)[];
extern const AttrVersion TY_(W3CAttrsFor_H1)[];
extern const AttrVersion TY_(W3CAttrsFor_H2)[];
extern const AttrVersion TY_(W3CAttrsFor_H3)[];
extern const AttrVersion TY_(W3CAttrsFor_H4)[];
extern const AttrVersion TY_(W3CAttrsFor_H5)[];
extern const AttrVersion TY_(W3CAttrsFor_H6)[];
extern const AttrVersion TY_(W3CAttrsFor_HEAD)[];
extern const AttrVersion TY_(W3CAttrsFor_HR)[];
extern const AttrVersion TY_(W3CAttrsFor_HTML)[];
extern const AttrVersion TY_(W3CAttrsFor_I)[];
extern const AttrVersion TY_(W3CAttrsFor_IFRAME)[];
extern const AttrVersion TY_(W3CAttrsFor_IMG)[];
extern const AttrVersion TY_(W3CAttrsFor_INPUT)[];
extern const AttrVersion TY_(W3CAttrsFor_INS)[];
extern const AttrVersion TY_(W3CAttrsFor_ISINDEX)[];
extern const AttrVersion TY_(W3CAttrsFor_KBD)[];
extern const AttrVersion TY_(W3CAttrsFor_LABEL)[];
extern const AttrVersion TY_(W3CAttrsFor_LEGEND)[];
extern const AttrVersion TY_(W3CAttrsFor_LI)[];
extern const AttrVersion TY_(W3CAttrsFor_LINK)[];
extern const AttrVersion TY_(W3CAttrsFor_LISTING)[];
extern const AttrVersion TY_(W3CAttrsFor_MAP)[];
extern const AttrVersion TY_(W3CAttrsFor_MATHML)[]; /* [i_a]2 */
extern const AttrVersion TY_(W3CAttrsFor_MENU)[];
extern const AttrVersion TY_(W3CAttrsFor_META)[];
extern const AttrVersion TY_(W3CAttrsFor_NEXTID)[];
extern const AttrVersion TY_(W3CAttrsFor_NOFRAMES)[];
extern const AttrVersion TY_(W3CAttrsFor_NOSCRIPT)[];
extern const AttrVersion TY_(W3CAttrsFor_OBJECT)[];
extern const AttrVersion TY_(W3CAttrsFor_OL)[];
extern const AttrVersion TY_(W3CAttrsFor_OPTGROUP)[];
extern const AttrVersion TY_(W3CAttrsFor_OPTION)[];
extern const AttrVersion TY_(W3CAttrsFor_P)[];
extern const AttrVersion TY_(W3CAttrsFor_PARAM)[];
extern const AttrVersion TY_(W3CAttrsFor_PICTURE)[]; /* Issue #151 - html5 */
extern const AttrVersion TY_(W3CAttrsFor_PLAINTEXT)[];
extern const AttrVersion TY_(W3CAttrsFor_PRE)[];
extern const AttrVersion TY_(W3CAttrsFor_Q)[];
extern const AttrVersion TY_(W3CAttrsFor_RB)[];
extern const AttrVersion TY_(W3CAttrsFor_RBC)[];
extern const AttrVersion TY_(W3CAttrsFor_RP)[];
extern const AttrVersion TY_(W3CAttrsFor_RT)[];
extern const AttrVersion TY_(W3CAttrsFor_RTC)[];
extern const AttrVersion TY_(W3CAttrsFor_RUBY)[];
extern const AttrVersion TY_(W3CAttrsFor_S)[];
extern const AttrVersion TY_(W3CAttrsFor_SAMP)[];
extern const AttrVersion TY_(W3CAttrsFor_SCRIPT)[];
extern const AttrVersion TY_(W3CAttrsFor_SELECT)[];
extern const AttrVersion TY_(W3CAttrsFor_SMALL)[];
extern const AttrVersion TY_(W3CAttrsFor_SPAN)[];
extern const AttrVersion TY_(W3CAttrsFor_STRIKE)[];
extern const AttrVersion TY_(W3CAttrsFor_STRONG)[];
extern const AttrVersion TY_(W3CAttrsFor_STYLE)[];
extern const AttrVersion TY_(W3CAttrsFor_SUB)[];
extern const AttrVersion TY_(W3CAttrsFor_SUP)[];
extern const AttrVersion TY_(W3CAttrsFor_SVG)[];
extern const AttrVersion TY_(W3CAttrsFor_TABLE)[];
extern const AttrVersion TY_(W3CAttrsFor_TBODY)[];
extern const AttrVersion TY_(W3CAttrsFor_TD)[];
extern const AttrVersion TY_(W3CAttrsFor_TEXTAREA)[];
extern const AttrVersion TY_(W3CAttrsFor_TFOOT)[];
extern const AttrVersion TY_(W3CAttrsFor_TH)[];
extern const AttrVersion TY_(W3CAttrsFor_THEAD)[];
extern const AttrVersion TY_(W3CAttrsFor_TITLE)[];
extern const AttrVersion TY_(W3CAttrsFor_TR)[];
extern const AttrVersion TY_(W3CAttrsFor_TT)[];
extern const AttrVersion TY_(W3CAttrsFor_U)[];
extern const AttrVersion TY_(W3CAttrsFor_UL)[];
extern const AttrVersion TY_(W3CAttrsFor_VAR)[];
extern const AttrVersion TY_(W3CAttrsFor_XMP)[];
extern const AttrVersion TY_(W3CAttrsFor_TRACK)[];
extern const AttrVersion TY_(W3CAttrsFor_SUMMARY)[];
extern const AttrVersion TY_(W3CAttrsFor_FIGCAPTION)[];
extern const AttrVersion TY_(W3CAttrsFor_HGROUP)[];
extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[];
extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[];
extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[];
extern const AttrVersion TY_(W3CAttrsFor_BDI)[];
extern const AttrVersion TY_(W3CAttrsFor_NAV)[];
extern const AttrVersion TY_(W3CAttrsFor_SECTION)[];
extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[];
extern const AttrVersion TY_(W3CAttrsFor_HEADER)[];
extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[];
extern const AttrVersion TY_(W3CAttrsFor_DIALOG)[];
extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[];
extern const AttrVersion TY_(W3CAttrsFor_MAIN)[];
extern const AttrVersion TY_(W3CAttrsFor_MARK)[];
extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[];
extern const AttrVersion TY_(W3CAttrsFor_MENUITEM)[];
extern const AttrVersion TY_(W3CAttrsFor_METER)[];
extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[];
extern const AttrVersion TY_(W3CAttrsFor_SLOT)[];
extern const AttrVersion TY_(W3CAttrsFor_TEMPLATE)[];
extern const AttrVersion TY_(W3CAttrsFor_TIME)[];
extern const AttrVersion TY_(W3CAttrsFor_DATA)[];
extern const AttrVersion TY_(W3CAttrsFor_DATALIST)[];
extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[];
extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[];
extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[];
extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[];
extern const AttrVersion TY_(W3CAttrsFor_EMBED)[];
extern const AttrVersion TY_(W3CAttrsFor_KEYGEN)[];
extern const AttrVersion TY_(W3CAttrsFor_WBR)[];
#endif /* __ATTRDICT_H__ */

2780
third_party/tidy/attrs.c vendored Normal file

File diff suppressed because it is too large Load diff

487
third_party/tidy/attrs.h vendored Normal file
View file

@ -0,0 +1,487 @@
#ifndef __ATTRS_H__
#define __ATTRS_H__
/* clang-format off */
/* attrs.h -- recognize HTML attributes
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/forward.h"
/* declaration for methods that check attribute values */
typedef void (AttrCheck)(TidyDocImpl* doc, Node *node, AttVal *attval);
struct _Attribute
{
TidyAttrId id;
tmbstr name;
AttrCheck* attrchk;
struct _Attribute* next;
};
/*
Anchor/Node linked list
*/
struct _Anchor
{
struct _Anchor *next;
Node *node;
char *name;
};
typedef struct _Anchor Anchor;
enum
{
ATTRIBUTE_HASH_SIZE=178u
};
struct _AttrHash
{
Attribute const* attr;
struct _AttrHash* next;
};
typedef struct _AttrHash AttrHash;
enum
{
ANCHOR_HASH_SIZE=1021u
};
/* Keeps a list of attributes that are sorted ahead of the others. */
typedef struct _priorityAttribs {
tmbstr* list;
uint count;
uint capacity;
} PriorityAttribs;
struct _TidyAttribImpl
{
/* anchor/node lookup */
Anchor* anchor_hash[ANCHOR_HASH_SIZE];
/* Declared literal attributes */
Attribute* declared_attr_list;
/* Prioritized list of attributes to write */
PriorityAttribs priorityAttribs;
AttrHash* hashtab[ATTRIBUTE_HASH_SIZE];
};
typedef struct _TidyAttribImpl TidyAttribImpl;
#define XHTML_NAMESPACE "http://www.w3.org/1999/xhtml"
AttrCheck TY_(CheckUrl);
/* public method for finding attribute definition by name */
const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval );
const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value );
AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value);
/* Add an item to the list of priority attributes to write first. */
void TY_(DefinePriorityAttribute)(TidyDocImpl* doc, ctmbstr name);
/* Start an iterator for priority attributes. */
TidyIterator TY_(getPriorityAttrList)( TidyDocImpl* doc );
/* Get the next priority attribute. */
ctmbstr TY_(getNextPriorityAttr)( TidyDocImpl* doc, TidyIterator* iter );
Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname );
/* Bool IsBool( TidyDocImpl* doc, ctmbstr attrname ); */
Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname );
/* may id or name serve as anchor? */
Bool TY_(IsAnchorElement)( TidyDocImpl* doc, Node* node );
/*
In CSS1, selectors can contain only the characters A-Z, 0-9, and
Unicode characters 161-255, plus dash (-); they cannot start with
a dash or a digit; they can also contain escaped characters and any
Unicode character as a numeric code (see next item).
The backslash followed by at most four hexadecimal digits (0..9A..F)
stands for the Unicode character with that number.
Any character except a hexadecimal digit can be escaped to remove its
special meaning, by putting a backslash in front.
#508936 - CSS class naming for -clean option
*/
Bool TY_(IsCSS1Selector)( ctmbstr buf );
Bool TY_(IsValidHTMLID)(ctmbstr id);
Bool TY_(IsValidXMLID)(ctmbstr id);
/* removes anchor for specific node */
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, ctmbstr name, Node *node );
/* free all anchors */
void TY_(FreeAnchors)( TidyDocImpl* doc );
/* public methods for inititializing/freeing attribute dictionary */
void TY_(InitAttrs)( TidyDocImpl* doc );
void TY_(FreeAttrTable)( TidyDocImpl* doc );
void TY_(FreeAttrPriorityList)( TidyDocImpl* doc );
void TY_(AppendToClassAttr)( TidyDocImpl* doc, AttVal *classattr, ctmbstr classname );
/*
the same attribute name can't be used
more than once in each element
*/
void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node* node, Bool isXml );
void TY_(SortAttributes)(TidyDocImpl* doc, Node* node, TidyAttrSortStrategy strat);
Bool TY_(IsBoolAttribute)( AttVal* attval );
Bool TY_(attrIsEvent)( AttVal* attval );
AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id );
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval);
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc);
/* 0 == TidyAttr_UNKNOWN */
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN)
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid))
#define AttrHasValue(attr) ((attr) && (attr)->value)
#define AttrValueIs(attr, val) (AttrHasValue(attr) && \
TY_(tmbstrcasecmp)((attr)->value, val) == 0)
#define AttrContains(attr, val) (AttrHasValue(attr) && \
TY_(tmbsubstr)((attr)->value, val) != NULL)
#define AttrVersions(attr) ((attr) && (attr)->dict ? (attr)->dict->versions : VERS_PROPRIETARY)
#define AttrsHaveSameId(a, b) (a && b && a->dict && b->dict && a->dict->id && \
b->dict->id && a->dict->id == b->dict->id)
#define attrIsABBR(av) AttrIsId( av, TidyAttr_ABBR )
#define attrIsACCEPT(av) AttrIsId( av, TidyAttr_ACCEPT )
#define attrIsACCEPT_CHARSET(av) AttrIsId( av, TidyAttr_ACCEPT_CHARSET )
#define attrIsACCESSKEY(av) AttrIsId( av, TidyAttr_ACCESSKEY )
#define attrIsACTION(av) AttrIsId( av, TidyAttr_ACTION )
#define attrIsADD_DATE(av) AttrIsId( av, TidyAttr_ADD_DATE )
#define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN )
#define attrIsALINK(av) AttrIsId( av, TidyAttr_ALINK )
#define attrIsALT(av) AttrIsId( av, TidyAttr_ALT )
#define attrIsARCHIVE(av) AttrIsId( av, TidyAttr_ARCHIVE )
#define attrIsAXIS(av) AttrIsId( av, TidyAttr_AXIS )
#define attrIsBACKGROUND(av) AttrIsId( av, TidyAttr_BACKGROUND )
#define attrIsBGCOLOR(av) AttrIsId( av, TidyAttr_BGCOLOR )
#define attrIsBGPROPERTIES(av) AttrIsId( av, TidyAttr_BGPROPERTIES )
#define attrIsBORDER(av) AttrIsId( av, TidyAttr_BORDER )
#define attrIsBORDERCOLOR(av) AttrIsId( av, TidyAttr_BORDERCOLOR )
#define attrIsBOTTOMMARGIN(av) AttrIsId( av, TidyAttr_BOTTOMMARGIN )
#define attrIsCELLPADDING(av) AttrIsId( av, TidyAttr_CELLPADDING )
#define attrIsCELLSPACING(av) AttrIsId( av, TidyAttr_CELLSPACING )
#define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET )
#define attrIsCHAR(av) AttrIsId( av, TidyAttr_CHAR )
#define attrIsCHAROFF(av) AttrIsId( av, TidyAttr_CHAROFF )
#define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET )
#define attrIsCHECKED(av) AttrIsId( av, TidyAttr_CHECKED )
#define attrIsCITE(av) AttrIsId( av, TidyAttr_CITE )
#define attrIsCLASS(av) AttrIsId( av, TidyAttr_CLASS )
#define attrIsCLASSID(av) AttrIsId( av, TidyAttr_CLASSID )
#define attrIsCLEAR(av) AttrIsId( av, TidyAttr_CLEAR )
#define attrIsCODE(av) AttrIsId( av, TidyAttr_CODE )
#define attrIsCODEBASE(av) AttrIsId( av, TidyAttr_CODEBASE )
#define attrIsCODETYPE(av) AttrIsId( av, TidyAttr_CODETYPE )
#define attrIsCOLOR(av) AttrIsId( av, TidyAttr_COLOR )
#define attrIsCOLS(av) AttrIsId( av, TidyAttr_COLS )
#define attrIsCOLSPAN(av) AttrIsId( av, TidyAttr_COLSPAN )
#define attrIsCOMPACT(av) AttrIsId( av, TidyAttr_COMPACT )
#define attrIsCONTENT(av) AttrIsId( av, TidyAttr_CONTENT )
#define attrIsCOORDS(av) AttrIsId( av, TidyAttr_COORDS )
#define attrIsDATA(av) AttrIsId( av, TidyAttr_DATA )
#define attrIsDATAFLD(av) AttrIsId( av, TidyAttr_DATAFLD )
#define attrIsDATAFORMATAS(av) AttrIsId( av, TidyAttr_DATAFORMATAS )
#define attrIsDATAPAGESIZE(av) AttrIsId( av, TidyAttr_DATAPAGESIZE )
#define attrIsDATASRC(av) AttrIsId( av, TidyAttr_DATASRC )
#define attrIsDATETIME(av) AttrIsId( av, TidyAttr_DATETIME )
#define attrIsDECLARE(av) AttrIsId( av, TidyAttr_DECLARE )
#define attrIsDEFER(av) AttrIsId( av, TidyAttr_DEFER )
#define attrIsDIR(av) AttrIsId( av, TidyAttr_DIR )
#define attrIsDISABLED(av) AttrIsId( av, TidyAttr_DISABLED )
#define attrIsENCODING(av) AttrIsId( av, TidyAttr_ENCODING )
#define attrIsENCTYPE(av) AttrIsId( av, TidyAttr_ENCTYPE )
#define attrIsFACE(av) AttrIsId( av, TidyAttr_FACE )
#define attrIsFOR(av) AttrIsId( av, TidyAttr_FOR )
#define attrIsFRAME(av) AttrIsId( av, TidyAttr_FRAME )
#define attrIsFRAMEBORDER(av) AttrIsId( av, TidyAttr_FRAMEBORDER )
#define attrIsFRAMESPACING(av) AttrIsId( av, TidyAttr_FRAMESPACING )
#define attrIsGRIDX(av) AttrIsId( av, TidyAttr_GRIDX )
#define attrIsGRIDY(av) AttrIsId( av, TidyAttr_GRIDY )
#define attrIsHEADERS(av) AttrIsId( av, TidyAttr_HEADERS )
#define attrIsHEIGHT(av) AttrIsId( av, TidyAttr_HEIGHT )
#define attrIsHREF(av) AttrIsId( av, TidyAttr_HREF )
#define attrIsHREFLANG(av) AttrIsId( av, TidyAttr_HREFLANG )
#define attrIsHSPACE(av) AttrIsId( av, TidyAttr_HSPACE )
#define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV )
#define attrIsID(av) AttrIsId( av, TidyAttr_ID )
#define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP )
#define attrIsITEMID(av) AttrIsId( av, TidyAttr_ITEMID )
#define attrIsITEMPROP(av) AttrIsId( av, TidyAttr_ITEMPROP )
#define attrIsITEMREF(av) AttrIsId( av, TidyAttr_ITEMREF )
#define attrIsITEMSCOPE(av) AttrIsId( av, TidyAttr_ITEMSCOPE )
#define attrIsITEMTYPE(av) AttrIsId( av, TidyAttr_ITEMTYPE )
#define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL )
#define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG )
#define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE )
#define attrIsLAST_MODIFIED(av) AttrIsId( av, TidyAttr_LAST_MODIFIED )
#define attrIsLAST_VISIT(av) AttrIsId( av, TidyAttr_LAST_VISIT )
#define attrIsLEFTMARGIN(av) AttrIsId( av, TidyAttr_LEFTMARGIN )
#define attrIsLINK(av) AttrIsId( av, TidyAttr_LINK )
#define attrIsLONGDESC(av) AttrIsId( av, TidyAttr_LONGDESC )
#define attrIsLOWSRC(av) AttrIsId( av, TidyAttr_LOWSRC )
#define attrIsMARGINHEIGHT(av) AttrIsId( av, TidyAttr_MARGINHEIGHT )
#define attrIsMARGINWIDTH(av) AttrIsId( av, TidyAttr_MARGINWIDTH )
#define attrIsMAXLENGTH(av) AttrIsId( av, TidyAttr_MAXLENGTH )
#define attrIsMEDIA(av) AttrIsId( av, TidyAttr_MEDIA )
#define attrIsMETHOD(av) AttrIsId( av, TidyAttr_METHOD )
#define attrIsMULTIPLE(av) AttrIsId( av, TidyAttr_MULTIPLE )
#define attrIsNAME(av) AttrIsId( av, TidyAttr_NAME )
#define attrIsNOHREF(av) AttrIsId( av, TidyAttr_NOHREF )
#define attrIsNORESIZE(av) AttrIsId( av, TidyAttr_NORESIZE )
#define attrIsNOSHADE(av) AttrIsId( av, TidyAttr_NOSHADE )
#define attrIsNOWRAP(av) AttrIsId( av, TidyAttr_NOWRAP )
#define attrIsOBJECT(av) AttrIsId( av, TidyAttr_OBJECT )
#define attrIsOnAFTERUPDATE(av) AttrIsId( av, TidyAttr_OnAFTERUPDATE )
#define attrIsOnBEFOREUNLOAD(av) AttrIsId( av, TidyAttr_OnBEFOREUNLOAD )
#define attrIsOnBEFOREUPDATE(av) AttrIsId( av, TidyAttr_OnBEFOREUPDATE )
#define attrIsOnBLUR(av) AttrIsId( av, TidyAttr_OnBLUR )
#define attrIsOnCHANGE(av) AttrIsId( av, TidyAttr_OnCHANGE )
#define attrIsOnCLICK(av) AttrIsId( av, TidyAttr_OnCLICK )
#define attrIsOnDATAAVAILABLE(av) AttrIsId( av, TidyAttr_OnDATAAVAILABLE )
#define attrIsOnDATASETCHANGED(av) AttrIsId( av, TidyAttr_OnDATASETCHANGED )
#define attrIsOnDATASETCOMPLETE(av) AttrIsId( av, TidyAttr_OnDATASETCOMPLETE )
#define attrIsOnDBLCLICK(av) AttrIsId( av, TidyAttr_OnDBLCLICK )
#define attrIsOnERRORUPDATE(av) AttrIsId( av, TidyAttr_OnERRORUPDATE )
#define attrIsOnFOCUS(av) AttrIsId( av, TidyAttr_OnFOCUS )
#define attrIsOnKEYDOWN(av) AttrIsId( av, TidyAttr_OnKEYDOWN )
#define attrIsOnKEYPRESS(av) AttrIsId( av, TidyAttr_OnKEYPRESS )
#define attrIsOnKEYUP(av) AttrIsId( av, TidyAttr_OnKEYUP )
#define attrIsOnLOAD(av) AttrIsId( av, TidyAttr_OnLOAD )
#define attrIsOnMOUSEDOWN(av) AttrIsId( av, TidyAttr_OnMOUSEDOWN )
#define attrIsOnMOUSEMOVE(av) AttrIsId( av, TidyAttr_OnMOUSEMOVE )
#define attrIsOnMOUSEOUT(av) AttrIsId( av, TidyAttr_OnMOUSEOUT )
#define attrIsOnMOUSEOVER(av) AttrIsId( av, TidyAttr_OnMOUSEOVER )
#define attrIsOnMOUSEUP(av) AttrIsId( av, TidyAttr_OnMOUSEUP )
#define attrIsOnRESET(av) AttrIsId( av, TidyAttr_OnRESET )
#define attrIsOnROWENTER(av) AttrIsId( av, TidyAttr_OnROWENTER )
#define attrIsOnROWEXIT(av) AttrIsId( av, TidyAttr_OnROWEXIT )
#define attrIsOnSELECT(av) AttrIsId( av, TidyAttr_OnSELECT )
#define attrIsOnSUBMIT(av) AttrIsId( av, TidyAttr_OnSUBMIT )
#define attrIsOnUNLOAD(av) AttrIsId( av, TidyAttr_OnUNLOAD )
#define attrIsPROFILE(av) AttrIsId( av, TidyAttr_PROFILE )
#define attrIsPROMPT(av) AttrIsId( av, TidyAttr_PROMPT )
#define attrIsRBSPAN(av) AttrIsId( av, TidyAttr_RBSPAN )
#define attrIsREADONLY(av) AttrIsId( av, TidyAttr_READONLY )
#define attrIsREL(av) AttrIsId( av, TidyAttr_REL )
#define attrIsREV(av) AttrIsId( av, TidyAttr_REV )
#define attrIsRIGHTMARGIN(av) AttrIsId( av, TidyAttr_RIGHTMARGIN )
#define attrIsROLE(av) AttrIsId( av, TidyAttr_ROLE )
#define attrIsROWS(av) AttrIsId( av, TidyAttr_ROWS )
#define attrIsROWSPAN(av) AttrIsId( av, TidyAttr_ROWSPAN )
#define attrIsRULES(av) AttrIsId( av, TidyAttr_RULES )
#define attrIsSCHEME(av) AttrIsId( av, TidyAttr_SCHEME )
#define attrIsSCOPE(av) AttrIsId( av, TidyAttr_SCOPE )
#define attrIsSCROLLING(av) AttrIsId( av, TidyAttr_SCROLLING )
#define attrIsSELECTED(av) AttrIsId( av, TidyAttr_SELECTED )
#define attrIsSHAPE(av) AttrIsId( av, TidyAttr_SHAPE )
#define attrIsSHOWGRID(av) AttrIsId( av, TidyAttr_SHOWGRID )
#define attrIsSHOWGRIDX(av) AttrIsId( av, TidyAttr_SHOWGRIDX )
#define attrIsSHOWGRIDY(av) AttrIsId( av, TidyAttr_SHOWGRIDY )
#define attrIsSIZE(av) AttrIsId( av, TidyAttr_SIZE )
#define attrIsSLOT(av) AttrIsId( av, TidyAttr_SLOT )
#define attrIsSPAN(av) AttrIsId( av, TidyAttr_SPAN )
#define attrIsSRC(av) AttrIsId( av, TidyAttr_SRC )
#define attrIsSTANDBY(av) AttrIsId( av, TidyAttr_STANDBY )
#define attrIsSTART(av) AttrIsId( av, TidyAttr_START )
#define attrIsSTYLE(av) AttrIsId( av, TidyAttr_STYLE )
#define attrIsSUMMARY(av) AttrIsId( av, TidyAttr_SUMMARY )
#define attrIsTABINDEX(av) AttrIsId( av, TidyAttr_TABINDEX )
#define attrIsTARGET(av) AttrIsId( av, TidyAttr_TARGET )
#define attrIsTEXT(av) AttrIsId( av, TidyAttr_TEXT )
#define attrIsTITLE(av) AttrIsId( av, TidyAttr_TITLE )
#define attrIsTOPMARGIN(av) AttrIsId( av, TidyAttr_TOPMARGIN )
#define attrIsTYPE(av) AttrIsId( av, TidyAttr_TYPE )
#define attrIsUSEMAP(av) AttrIsId( av, TidyAttr_USEMAP )
#define attrIsVALIGN(av) AttrIsId( av, TidyAttr_VALIGN )
#define attrIsVALUE(av) AttrIsId( av, TidyAttr_VALUE )
#define attrIsVALUETYPE(av) AttrIsId( av, TidyAttr_VALUETYPE )
#define attrIsVERSION(av) AttrIsId( av, TidyAttr_VERSION )
#define attrIsVLINK(av) AttrIsId( av, TidyAttr_VLINK )
#define attrIsVSPACE(av) AttrIsId( av, TidyAttr_VSPACE )
#define attrIsWIDTH(av) AttrIsId( av, TidyAttr_WIDTH )
#define attrIsWRAP(av) AttrIsId( av, TidyAttr_WRAP )
#define attrIsXMLNS(av) AttrIsId( av, TidyAttr_XMLNS )
#define attrIsXML_LANG(av) AttrIsId( av, TidyAttr_XML_LANG )
#define attrIsXML_SPACE(av) AttrIsId( av, TidyAttr_XML_SPACE )
#define attrIsARIA_ACTIVEDESCENDANT(av) AttrIsId( av, TidyAttr_ARIA_ACTIVEDESCENDANT )
#define attrIsARIA_ATOMIC(av) AttrIsId( av, TidyAttr_ARIA_ATOMIC )
#define attrIsARIA_AUTOCOMPLETE(av) AttrIsId( av, TidyAttr_ARIA_AUTOCOMPLETE )
#define attrIsARIA_BUSY(av) AttrIsId( av, TidyAttr_ARIA_BUSY )
#define attrIsARIA_CHECKED(av) AttrIsId( av, TidyAttr_ARIA_CHECKED )
#define attrIsARIA_CONTROLS(av) AttrIsId( av, TidyAttr_ARIA_CONTROLS )
#define attrIsARIA_DESCRIBEDBY(av) AttrIsId( av, TidyAttr_ARIA_DESCRIBEDBY )
#define attrIsARIA_DISABLED(av) AttrIsId( av, TidyAttr_ARIA_DISABLED )
#define attrIsARIA_DROPEFFECT(av) AttrIsId( av, TidyAttr_ARIA_DROPEFFECT )
#define attrIsARIA_EXPANDED(av) AttrIsId( av, TidyAttr_ARIA_EXPANDED )
#define attrIsARIA_FLOWTO(av) AttrIsId( av, TidyAttr_ARIA_FLOWTO )
#define attrIsARIA_GRABBED(av) AttrIsId( av, TidyAttr_ARIA_GRABBED )
#define attrIsARIA_HASPOPUP(av) AttrIsId( av, TidyAttr_ARIA_HASPOPUP )
#define attrIsARIA_HIDDEN(av) AttrIsId( av, TidyAttr_ARIA_HIDDEN )
#define attrIsARIA_INVALID(av) AttrIsId( av, TidyAttr_ARIA_INVALID )
#define attrIsARIA_LABEL(av) AttrIsId( av, TidyAttr_ARIA_LABEL )
#define attrIsARIA_LABELLEDBY(av) AttrIsId( av, TidyAttr_ARIA_LABELLEDBY )
#define attrIsARIA_LEVEL(av) AttrIsId( av, TidyAttr_ARIA_LEVEL )
#define attrIsARIA_LIVE(av) AttrIsId( av, TidyAttr_ARIA_LIVE )
#define attrIsARIA_MULTILINE(av) AttrIsId( av, TidyAttr_ARIA_MULTILINE )
#define attrIsARIA_MULTISELECTABLE(av) AttrIsId( av, TidyAttr_ARIA_MULTISELECTABLE )
#define attrIsARIA_ORIENTATION(av) AttrIsId( av, TidyAttr_ARIA_ORIENTATION )
#define attrIsARIA_OWNS(av) AttrIsId( av, TidyAttr_ARIA_OWNS )
#define attrIsARIA_POSINSET(av) AttrIsId( av, TidyAttr_ARIA_POSINSET )
#define attrIsARIA_PRESSED(av) AttrIsId( av, TidyAttr_ARIA_PRESSED )
#define attrIsARIA_READONLY(av) AttrIsId( av, TidyAttr_ARIA_READONLY )
#define attrIsARIA_RELEVANT(av) AttrIsId( av, TidyAttr_ARIA_RELEVANT )
#define attrIsARIA_REQUIRED(av) AttrIsId( av, TidyAttr_ARIA_REQUIRED )
#define attrIsARIA_SELECTED(av) AttrIsId( av, TidyAttr_ARIA_SELECTED )
#define attrIsARIA_SETSIZE(av) AttrIsId( av, TidyAttr_ARIA_SETSIZE )
#define attrIsARIA_SORT(av) AttrIsId( av, TidyAttr_ARIA_SORT )
#define attrIsARIA_VALUEMAX(av) AttrIsId( av, TidyAttr_ARIA_VALUEMAX )
#define attrIsARIA_VALUEMIN(av) AttrIsId( av, TidyAttr_ARIA_VALUEMIN )
#define attrIsARIA_VALUENOW(av) AttrIsId( av, TidyAttr_ARIA_VALUENOW )
#define attrIsARIA_VALUETEXT(av) AttrIsId( av, TidyAttr_ARIA_VALUETEXT )
#define attrIsSVG_FILL(av) AttrIsId( av, TidyAttr_FILL )
#define attrIsSVG_FILLRULE(av) AttrIsId( av, TidyAttr_FILLRULE )
#define attrIsSVG_STROKE(av) AttrIsId( av, TidyAttr_STROKE )
#define attrIsSVG_STROKEDASHARRAY(av) AttrIsId( av, TidyAttr_STROKEDASHARRAY )
#define attrIsSVG_STROKEDASHOFFSET(av) AttrIsId( av, TidyAttr_STROKEDASHOFFSET )
#define attrIsSVG_STROKELINECAP(av) AttrIsId( av, TidyAttr_STROKELINECAP )
#define attrIsSVG_STROKELINEJOIN(av) AttrIsId( av, TidyAttr_STROKELINEJOIN )
#define attrIsSVG_STROKEMITERLIMIT(av) AttrIsId( av, TidyAttr_STROKEMITERLIMIT )
#define attrIsSVG_STROKEWIDTH(av) AttrIsId( av, TidyAttr_STROKEWIDTH )
#define attrIsSVG_COLORINTERPOLATION(a) AttrIsId( a, TidyAttr_COLORINTERPOLATION )
#define attrIsSVG_COLORRENDERING(av) AttrIsId( av, TidyAttr_COLORRENDERING )
#define attrIsSVG_OPACITY(av) AttrIsId( av, TidyAttr_OPACITY )
#define attrIsSVG_STROKEOPACITY(av) AttrIsId( av, TidyAttr_STROKEOPACITY )
#define attrIsSVG_FILLOPACITY(av) AttrIsId( av, TidyAttr_FILLOPACITY )
/* Attribute Retrieval macros
*/
#define attrGetHREF( nod ) TY_(AttrGetById)( nod, TidyAttr_HREF )
#define attrGetSRC( nod ) TY_(AttrGetById)( nod, TidyAttr_SRC )
#define attrGetID( nod ) TY_(AttrGetById)( nod, TidyAttr_ID )
#define attrGetNAME( nod ) TY_(AttrGetById)( nod, TidyAttr_NAME )
#define attrGetSUMMARY( nod ) TY_(AttrGetById)( nod, TidyAttr_SUMMARY )
#define attrGetALT( nod ) TY_(AttrGetById)( nod, TidyAttr_ALT )
#define attrGetLONGDESC( nod ) TY_(AttrGetById)( nod, TidyAttr_LONGDESC )
#define attrGetUSEMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_USEMAP )
#define attrGetISMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_ISMAP )
#define attrGetLANGUAGE( nod ) TY_(AttrGetById)( nod, TidyAttr_LANGUAGE )
#define attrGetTYPE( nod ) TY_(AttrGetById)( nod, TidyAttr_TYPE )
#define attrGetVALUE( nod ) TY_(AttrGetById)( nod, TidyAttr_VALUE )
#define attrGetCONTENT( nod ) TY_(AttrGetById)( nod, TidyAttr_CONTENT )
#define attrGetTITLE( nod ) TY_(AttrGetById)( nod, TidyAttr_TITLE )
#define attrGetXMLNS( nod ) TY_(AttrGetById)( nod, TidyAttr_XMLNS )
#define attrGetDATAFLD( nod ) TY_(AttrGetById)( nod, TidyAttr_DATAFLD )
#define attrGetWIDTH( nod ) TY_(AttrGetById)( nod, TidyAttr_WIDTH )
#define attrGetHEIGHT( nod ) TY_(AttrGetById)( nod, TidyAttr_HEIGHT )
#define attrGetFOR( nod ) TY_(AttrGetById)( nod, TidyAttr_FOR )
#define attrGetSELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_SELECTED )
#define attrGetCHARSET( nod ) TY_(AttrGetById)( nod, TidyAttr_CHARSET )
#define attrGetCHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_CHECKED )
#define attrGetLANG( nod ) TY_(AttrGetById)( nod, TidyAttr_LANG )
#define attrGetTARGET( nod ) TY_(AttrGetById)( nod, TidyAttr_TARGET )
#define attrGetHTTP_EQUIV( nod ) TY_(AttrGetById)( nod, TidyAttr_HTTP_EQUIV )
#define attrGetREL( nod ) TY_(AttrGetById)( nod, TidyAttr_REL )
#define attrGetOnMOUSEMOVE( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEMOVE )
#define attrGetOnMOUSEDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEDOWN )
#define attrGetOnMOUSEUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEUP )
#define attrGetOnCLICK( nod ) TY_(AttrGetById)( nod, TidyAttr_OnCLICK )
#define attrGetOnMOUSEOVER( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOVER )
#define attrGetOnMOUSEOUT( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOUT )
#define attrGetOnKEYDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYDOWN )
#define attrGetOnKEYUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYUP )
#define attrGetOnKEYPRESS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYPRESS )
#define attrGetOnFOCUS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnFOCUS )
#define attrGetOnBLUR( nod ) TY_(AttrGetById)( nod, TidyAttr_OnBLUR )
#define attrGetBGCOLOR( nod ) TY_(AttrGetById)( nod, TidyAttr_BGCOLOR )
#define attrGetLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_LINK )
#define attrGetALINK( nod ) TY_(AttrGetById)( nod, TidyAttr_ALINK )
#define attrGetVLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_VLINK )
#define attrGetTEXT( nod ) TY_(AttrGetById)( nod, TidyAttr_TEXT )
#define attrGetSTYLE( nod ) TY_(AttrGetById)( nod, TidyAttr_STYLE )
#define attrGetABBR( nod ) TY_(AttrGetById)( nod, TidyAttr_ABBR )
#define attrGetCOLSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_COLSPAN )
#define attrGetFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_FONT )
#define attrGetBASEFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_BASEFONT )
#define attrGetROWSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_ROWSPAN )
#define attrGetROLE( nod ) TY_(AttrGetById)( nod, TidyAttr_ROLE )
#define attrGetARIA_ACTIVEDESCENDANT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ACTIVEDESCENDANT )
#define attrGetARIA_ATOMIC( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ATOMIC )
#define attrGetARIA_AUTOCOMPLETE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_AUTOCOMPLETE )
#define attrGetARIA_BUSY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_BUSY )
#define attrGetARIA_CHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_CHECKED )
#define attrGetARIA_CONTROLS( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_CONTROLS )
#define attrGetARIA_DESCRIBEDBY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DESCRIBEDBY )
#define attrGetARIA_DISABLED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DISABLED )
#define attrGetARIA_DROPEFFECT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DROPEFFECT )
#define attrGetARIA_EXPANDED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_EXPANDED )
#define attrGetARIA_FLOWTO( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_FLOWTO )
#define attrGetARIA_GRABBED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_GRABBED )
#define attrGetARIA_HASPOPUP( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_HASPOPUP )
#define attrGetARIA_HIDDEN( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_HIDDEN )
#define attrGetARIA_INVALID( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_INVALID )
#define attrGetARIA_LABEL( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LABEL )
#define attrGetARIA_LABELLEDBY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LABELLEDBY )
#define attrGetARIA_LEVEL( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LEVEL )
#define attrGetARIA_LIVE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LIVE )
#define attrGetARIA_MULTILINE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_MULTILINE )
#define attrGetARIA_MULTISELECTABLE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_MULTISELECTABLE )
#define attrGetARIA_ORIENTATION( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ORIENTATION )
#define attrGetARIA_OWNS( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_OWNS )
#define attrGetARIA_POSINSET( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_POSINSET )
#define attrGetARIA_PRESSED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_PRESSED )
#define attrGetARIA_READONLY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_READONLY )
#define attrGetARIA_RELEVANT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_RELEVANT )
#define attrGetARIA_REQUIRED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_REQUIRED )
#define attrGetARIA_SELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SELECTED )
#define attrGetARIA_SETSIZE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SETSIZE )
#define attrGetARIA_SORT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SORT )
#define attrGetARIA_VALUEMAX( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUEMAX )
#define attrGetARIA_VALUEMIN( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUEMIN )
#define attrGetARIA_VALUENOW( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUENOW )
#define attrGetARIA_VALUETEXT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUETEXT )
#endif /* __ATTRS_H__ */

230
third_party/tidy/buffio.c vendored Normal file
View file

@ -0,0 +1,230 @@
/* clang-format off */
/* buffio.c -- Treat buffer as an I/O stream.
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Requires buffer to automatically grow as bytes are added.
Must keep track of current read and write points.
*/
#include "third_party/tidy/tidy.h"
#include "third_party/tidy/tidybuffio.h"
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/assert.h"
#include "third_party/tidy/forward.h"
/**************
TIDY
**************/
static int insrc_getByte( void* appData )
{
TidyBuffer* buf = (TidyBuffer*) appData;
return tidyBufGetByte( buf );
}
static Bool insrc_eof( void* appData )
{
TidyBuffer* buf = (TidyBuffer*) appData;
return tidyBufEndOfInput( buf );
}
static void insrc_ungetByte( void* appData, byte bv )
{
TidyBuffer* buf = (TidyBuffer*) appData;
tidyBufUngetByte( buf, bv );
}
void tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf )
{
inp->getByte = insrc_getByte;
inp->eof = insrc_eof;
inp->ungetByte = insrc_ungetByte;
inp->sourceData = buf;
}
static void outsink_putByte( void* appData, byte bv )
{
TidyBuffer* buf = (TidyBuffer*) appData;
tidyBufPutByte( buf, bv );
}
void tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf )
{
outp->putByte = outsink_putByte;
outp->sinkData = buf;
}
void tidyBufInit( TidyBuffer* buf )
{
assert( buf != NULL );
tidyBufInitWithAllocator( buf, NULL );
}
void tidyBufAlloc( TidyBuffer* buf, uint allocSize )
{
tidyBufAllocWithAllocator( buf, NULL, allocSize );
}
void tidyBufInitWithAllocator( TidyBuffer* buf,
TidyAllocator *allocator )
{
assert( buf != NULL );
TidyClearMemory( buf, sizeof(TidyBuffer) );
buf->allocator = allocator ? allocator : &TY_(g_default_allocator);
}
void tidyBufAllocWithAllocator( TidyBuffer* buf,
TidyAllocator *allocator,
uint allocSize )
{
tidyBufInitWithAllocator( buf, allocator );
tidyBufCheckAlloc( buf, allocSize, 0 );
buf->next = 0;
}
void tidyBufFree( TidyBuffer* buf )
{
assert( buf != NULL );
TidyFree( buf->allocator, buf->bp );
tidyBufInitWithAllocator( buf, buf->allocator );
}
void tidyBufClear( TidyBuffer* buf )
{
assert( buf != NULL );
if ( buf->bp )
{
TidyClearMemory( buf->bp, buf->allocated );
buf->size = 0;
}
buf->next = 0;
}
/* Many users do not call tidyBufInit() or tidyBufAlloc() or their allocator
counterparts. So by default, set the default allocator.
*/
static void setDefaultAllocator( TidyBuffer* buf )
{
buf->allocator = &TY_(g_default_allocator);
}
/* Avoid thrashing memory by doubling buffer size
** until larger than requested size.
buf->allocated is bigger than allocSize+1 so that a trailing null byte is
always available.
*/
void tidyBufCheckAlloc( TidyBuffer* buf, uint allocSize, uint chunkSize )
{
assert( buf != NULL );
if ( !buf->allocator )
setDefaultAllocator( buf );
if ( 0 == chunkSize )
chunkSize = 256;
if ( allocSize+1 > buf->allocated )
{
byte* bp;
uint allocAmt = chunkSize;
if ( buf->allocated > 0 )
allocAmt = buf->allocated;
while ( allocAmt < allocSize+1 )
allocAmt *= 2;
bp = (byte*)TidyRealloc( buf->allocator, buf->bp, allocAmt );
if ( bp != NULL )
{
TidyClearMemory( bp + buf->allocated, allocAmt - buf->allocated );
buf->bp = bp;
buf->allocated = allocAmt;
}
}
}
/* Attach buffer to a chunk O' memory w/out allocation */
void tidyBufAttach( TidyBuffer* buf, byte* bp, uint size )
{
assert( buf != NULL );
buf->bp = bp;
buf->size = buf->allocated = size;
buf->next = 0;
if ( !buf->allocator )
setDefaultAllocator( buf );
}
/* Clear pointer to memory w/out deallocation */
void tidyBufDetach( TidyBuffer* buf )
{
tidyBufInitWithAllocator( buf, buf->allocator );
}
/**************
OUTPUT
**************/
void tidyBufAppend( TidyBuffer* buf, void* vp, uint size )
{
assert( buf != NULL );
if ( vp != NULL && size > 0 )
{
tidyBufCheckAlloc( buf, buf->size + size, 0 );
memcpy( buf->bp + buf->size, vp, size );
buf->size += size;
}
}
void tidyBufPutByte( TidyBuffer* buf, byte bv )
{
assert( buf != NULL );
tidyBufCheckAlloc( buf, buf->size + 1, 0 );
buf->bp[ buf->size++ ] = bv;
}
int tidyBufPopByte( TidyBuffer* buf )
{
int bv = EOF;
assert( buf != NULL );
if ( buf->size > 0 )
bv = buf->bp[ --buf->size ];
return bv;
}
/**************
INPUT
**************/
int tidyBufGetByte( TidyBuffer* buf )
{
int bv = EOF;
if ( ! tidyBufEndOfInput(buf) )
bv = buf->bp[ buf->next++ ];
return bv;
}
Bool tidyBufEndOfInput( TidyBuffer* buf )
{
return ( buf->next >= buf->size );
}
void tidyBufUngetByte( TidyBuffer* buf, byte bv )
{
if ( buf->next > 0 )
{
--buf->next;
assert( bv == buf->bp[ buf->next ] );
}
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

1032
third_party/tidy/charsets.c vendored Normal file

File diff suppressed because it is too large Load diff

22
third_party/tidy/charsets.h vendored Normal file
View file

@ -0,0 +1,22 @@
#ifndef __CHARSETS_H__
#define __CHARSETS_H__
#include "third_party/tidy/access.h"
#include "third_party/tidy/tidyplatform.h"
/* clang-format off */
/* charsets.h -- character set information and mappings
(c) 1998-2021 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
uint TY_(GetEncodingIdFromName)(ctmbstr name);
uint TY_(GetEncodingIdFromCodePage)(uint cp);
uint TY_(GetEncodingCodePageFromName)(ctmbstr name);
uint TY_(GetEncodingCodePageFromId)(uint id);
ctmbstr TY_(GetEncodingNameFromId)(uint id);
ctmbstr TY_(GetEncodingNameFromCodePage)(uint cp);
#endif /* __CHARSETS_H__ */

2861
third_party/tidy/clean.c vendored Normal file

File diff suppressed because it is too large Load diff

83
third_party/tidy/clean.h vendored Normal file
View file

@ -0,0 +1,83 @@
#ifndef __CLEAN_H__
#define __CLEAN_H__
#include "third_party/tidy/forward.h"
/* clang-format off */
/* clean.h -- clean up misuse of presentation markup
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
void TY_(FixNodeLinks)(Node *node);
void TY_(FreeStyles)( TidyDocImpl* doc );
/* Add class="foo" to node
*/
void TY_(AddStyleAsClass)( TidyDocImpl* doc, Node *node, ctmbstr stylevalue );
void TY_(AddStyleProperty)(TidyDocImpl* doc, Node *node, ctmbstr property );
void TY_(CleanDocument)( TidyDocImpl* doc );
/* simplifies <b><b> ... </b> ...</b> etc. */
void TY_(NestedEmphasis)( TidyDocImpl* doc, Node* node );
/* replace i by em and b by strong */
void TY_(EmFromI)( TidyDocImpl* doc, Node* node );
/*
Some people use dir or ul without an li
to indent the content. The pattern to
look for is a list with a single implicit
li. This is recursively replaced by an
implicit blockquote.
*/
void TY_(List2BQ)( TidyDocImpl* doc, Node* node );
/*
Replace implicit blockquote by div with an indent
taking care to reduce nested blockquotes to a single
div with the indent set to match the nesting depth
*/
void TY_(BQ2Div)( TidyDocImpl* doc, Node* node );
void TY_(DropSections)( TidyDocImpl* doc, Node* node );
/*
This is a major clean up to strip out all the extra stuff you get
when you save as web page from Word 2000. It doesn't yet know what
to do with VML tags, but these will appear as errors unless you
declare them as new tags, such as o:p which needs to be declared
as inline.
*/
void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node);
Bool TY_(IsWord2000)( TidyDocImpl* doc );
/* where appropriate move object elements from head to body */
void TY_(BumpObject)( TidyDocImpl* doc, Node *html );
Bool TY_(TidyMetaCharset)(TidyDocImpl* doc);
void TY_(DropComments)(TidyDocImpl* doc, Node* node);
void TY_(DropFontElements)(TidyDocImpl* doc, Node* node, Node **pnode);
void TY_(WbrToSpace)(TidyDocImpl* doc, Node* node);
void TY_(DowngradeTypography)(TidyDocImpl* doc, Node* node);
void TY_(ReplacePreformattedSpaces)(TidyDocImpl* doc, Node* node);
void TY_(NormalizeSpaces)(Lexer *lexer, Node *node);
void TY_(ConvertCDATANodes)(TidyDocImpl* doc, Node* node);
void TY_(FixAnchors)(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId);
void TY_(FixXhtmlNamespace)(TidyDocImpl* doc, Bool wantXmlns);
void TY_(FixLanguageInformation)(TidyDocImpl* doc, Node* node, Bool wantXmlLang, Bool wantLang);
/* Issue #567 - move style elements from body to head */
void TY_(CleanStyle)(TidyDocImpl* doc, Node *html);
/* Issue #692 - discard multiple titles */
void TY_(CleanHead)(TidyDocImpl* doc);
#endif /* __CLEAN_H__ */

2009
third_party/tidy/config.c vendored Normal file

File diff suppressed because it is too large Load diff

434
third_party/tidy/config.h vendored Normal file
View file

@ -0,0 +1,434 @@
#ifndef __CONFIG_H__
#define __CONFIG_H__
/* clang-format off */
/**************************************************************************//**
* @file
* Read configuration files and manage configuration properties.
*
* Config files associate a property name with a value.
*
* // comments can start at the beginning of a line
* # comments can start at the beginning of a line
* name: short values fit onto one line
* name: a really long value that
* continues on the next line
*
* Property names are case insensitive and should be less than 60 characters
* in length, and must start at the beginning of the line, as whitespace at
* the start of a line signifies a line continuation.
*
* @author HTACG, et al (consult git log)
*
* @copyright
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
* Institute of Technology, European Research Consortium for Informatics
* and Mathematics, Keio University) and HTACG.
* @par
* All Rights Reserved.
* @par
* See `tidy.h` for the complete license.
*
* @date Additional updates: consult git log
*
******************************************************************************/
#include "third_party/tidy/forward.h"
#include "third_party/tidy/tidy.h"
#include "third_party/tidy/streamio.h"
/** @addtogroup internal_api */
/** @{ */
/***************************************************************************//**
** @defgroup configuration_options Configuration Options
**
** This module organizes all of Tidy's configuration options, including
** picklist management, option setting and retrieval, option file utilities,
** and so on.
**
** @{
******************************************************************************/
/** Determines the maximum number of items in an option's picklist. PickLists
** may have up to 16 items. For some reason, this limit has been hard-coded
** into Tidy for some time. Feel free to increase this as needed.
*/
#define TIDY_PL_SIZE 16
/** Structs of this type contain information needed in order to present
** picklists, relate picklist entries to public enum values, and parse
** strings that are accepted in order to assign the value.
*/
typedef struct PickListItem {
ctmbstr label; /**< PickList label for this item. */
const int value; /**< The option value represented by this label. */
ctmbstr inputs[10]; /**< String values that can select this value. */
} PickListItem;
/** An array of PickListItems, fixed in size for in-code declarations.
** Arrays must be populated in 0 to 10 order, as the option value is assigned
** based on this index and *not* on the structures' value field. It remains
** a best practice, however, to assign a public enum value with the proper
** index value.
*/
typedef const PickListItem PickListItems[TIDY_PL_SIZE];
struct _tidy_option; /* forward */
/** The TidyOptionImpl type implements the `_tidy_option` structure.
*/
typedef struct _tidy_option TidyOptionImpl;
/** This typedef describes a function that is used for parsing the input
** given for a particular Tidy option.
*/
typedef Bool (ParseProperty)( TidyDocImpl* doc, const TidyOptionImpl* opt );
/** This structure defines the internal representation of a Tidy option.
*/
struct _tidy_option
{
TidyOptionId id; /**< The unique identifier for this option. */
TidyConfigCategory category; /**< The category of the option. */
ctmbstr name; /**< The name of the option. */
TidyOptionType type; /**< The date type for the option. */
ulong dflt; /**< Default value for TidyInteger and TidyBoolean */
ParseProperty* parser; /**< Function to parse input; read-only if NULL. */
PickListItems* pickList; /**< The picklist of possible values for this option. */
ctmbstr pdflt; /**< Default value for TidyString. */
};
/** Stored option values can be one of two internal types.
*/
typedef union
{
ulong v; /**< Value for TidyInteger and TidyBoolean */
char *p; /**< Value for TidyString */
} TidyOptionValue;
/** This type is used to define a structure for keeping track of the values
** for each option.
*/
typedef struct _tidy_config
{
TidyOptionValue value[ N_TIDY_OPTIONS + 1 ]; /**< Current config values. */
TidyOptionValue snapshot[ N_TIDY_OPTIONS + 1 ]; /**< Snapshot of values to be restored later. */
uint defined_tags; /**< Tracks user-defined tags. */
uint c; /**< Current char in input stream for reading options. */
StreamIn* cfgIn; /**< Current input source for reading options.*/
} TidyConfigImpl;
/** Used to build a table of documentation cross-references.
*/
typedef struct {
TidyOptionId opt; /**< Identifier. */
TidyOptionId const *links; /**< Cross references. Last element must be 'TidyUnknownOption'. */
} TidyOptionDoc;
/** Given an option name, return an instance of an option.
** @param optnam The option name to retrieve.
** @returns The instance of the requested option.
*/
const TidyOptionImpl* TY_(lookupOption)( ctmbstr optnam );
/** Given an option ID, return an instance of an option.
** @param optId The option ID to retrieve.
** @returns The instance of the requested option.
*/
const TidyOptionImpl* TY_(getOption)( TidyOptionId optId );
/** Given an option ID, indicates whether or not the option is a list.
** @param optId The option ID to check.
** @returns Returns yes if the option value is a list.
*/
const Bool TY_(getOptionIsList)( TidyOptionId optId );
/** Initiates an iterator to cycle through all of the available options.
** @param doc The Tidy document to get options.
** @returns An iterator token to be used with TY_(getNextOption)().
*/
TidyIterator TY_(getOptionList)( TidyDocImpl* doc );
/** Gets the next option provided by the iterator.
** @param doc The Tidy document to get options.
** @param iter The iterator token initialized by TY_(getOptionList)().
** @returns The instance of the next option.
*/
const TidyOptionImpl* TY_(getNextOption)( TidyDocImpl* doc, TidyIterator* iter );
/** Initiates an iterator to cycle through all of the available picklist
** possibilities.
** @param option An instance of an option for which to iterate a picklist.
** @returns An iterator token to be used with TY_(getNextOptionPick)().
*/
TidyIterator TY_(getOptionPickList)( const TidyOptionImpl* option );
/** Gets the next picklist possibility provided by the iterator.
** @param option The instance of the option for which to iterate a picklist.
** @param iter The iterator token initialized by TY_(getOptionPickList)().
** @returns The next picklist entry.
*/
ctmbstr TY_(getNextOptionPick)( const TidyOptionImpl* option, TidyIterator* iter );
#if SUPPORT_CONSOLE_APP
/** Returns the cross-reference information structure for optID, which is
** used for generating documentation.
** @param optId The option ID to get cross-reference information for.
** @returns Cross reference information.
*/
const TidyOptionDoc* TY_(OptGetDocDesc)( TidyOptionId optId );
#endif /* SUPPORT_CONSOLE_APP */
/** Initialize the configuration for the given Tidy document.
** @param doc The Tidy document.
*/
void TY_(InitConfig)( TidyDocImpl* doc );
/** Frees the configuration memory for the given Tidy document.
** @param doc The Tidy document.
*/
void TY_(FreeConfig)( TidyDocImpl* doc );
/** Gets the picklist label for a given value.
** @param optId the option id having a picklist to check.
** @param pick the picklist item to retrieve.
** @returns The label for the pick.
*/
ctmbstr TY_(GetPickListLabelForPick)( TidyOptionId optId, uint pick );
/** Sets the integer value for the given option Id.
** @param doc The Tidy document.
** @param optId The option ID to set.
** @param val The value to set.
** @returns Success or failure.
*/
Bool TY_(SetOptionInt)( TidyDocImpl* doc, TidyOptionId optId, ulong val );
/** Sets the bool value for the given option Id.
** @param doc The Tidy document.
** @param optId The option ID to set.
** @param val The value to set.
** @returns Success or failure.
*/
Bool TY_(SetOptionBool)( TidyDocImpl* doc, TidyOptionId optId, Bool val );
/** Resets the given option to its default value.
** @param doc The Tidy document.
** @param optId The option ID to set.
** @returns Success or failure.
*/
Bool TY_(ResetOptionToDefault)( TidyDocImpl* doc, TidyOptionId optId );
/** Resets all options in the document to their default values.
** @param doc The Tidy document.
*/
void TY_(ResetConfigToDefault)( TidyDocImpl* doc );
/** Stores a snapshot of all of the configuration values that can be
** restored later.
** @param doc The Tidy document.
*/
void TY_(TakeConfigSnapshot)( TidyDocImpl* doc );
/** Restores all of the configuration values to their snapshotted values.
** @param doc The Tidy document.
*/
void TY_(ResetConfigToSnapshot)( TidyDocImpl* doc );
/** Copies the configuration from one document to another.
** @param docTo The destination Tidy document.
** @param docFrom The source Tidy document.
*/
void TY_(CopyConfig)( TidyDocImpl* docTo, TidyDocImpl* docFrom );
/** Attempts to parse the given config file into the document.
** @param doc The Tidy document.
** @param cfgfil The file to load.
** @returns a file system error code.
*/
int TY_(ParseConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil );
/** Attempts to parse the given config file into the document, using
** the provided encoding.
** @param doc The Tidy document.
** @param cfgfil The file to load.
** @param charenc The name of the encoding to use for reading the file.
** @returns a file system error code.
*/
int TY_(ParseConfigFileEnc)( TidyDocImpl* doc,
ctmbstr cfgfil, ctmbstr charenc );
/** Saves the current configuration for options not having default values
** into the specified file.
** @param doc The Tidy document.
** @param cfgfil The file to save.
** @returns a file system error code.
*/
int TY_(SaveConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil );
/** Writes the current configuration for options not having default values
** into the specified sink.
** @param doc The Tidy document.
** @param sink The sink to save into.
** @returns a file system error code.
*/
int TY_(SaveConfigSink)( TidyDocImpl* doc, TidyOutputSink* sink );
/** Attempts to parse the provided value for the given option name. Returns
** false if unknown option, missing parameter, or the option doesn't
** use the parameter.
** @param doc The Tidy document.
** @param optnam The name of the option to be set.
** @param optVal The string value to attempt to parse.
** @returns Success or failure.
*/
Bool TY_(ParseConfigOption)( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optVal );
/** Attempts to parse the provided value for the given option id. Returns
** false if unknown option, missing parameter, or the option doesn't
** use the parameter.
** @param doc The Tidy document.
** @param optId The ID of the option to be set.
** @param optVal The string value to attempt to parse.
** @returns Success or failure.
*/
Bool TY_(ParseConfigValue)( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optVal );
/** Ensure that char encodings are self consistent.
** @param doc The Tidy document to adjust.
** @param encoding The encoding being applied.
** @returns A bool indicating success or failure.
*/
Bool TY_(AdjustCharEncoding)( TidyDocImpl* doc, int encoding );
/** Ensure that the configuration options are self consistent.
** THIS PROCESS IS DESTRUCTIVE TO THE USER STATE. It examines
** certain user-specified options and changes other options
** as a result. This means that documented API functions such
** as tidyOptGetValue() won't return the user-set values after
** this is used. As a result, *don't* just use this function
** at every opportunity, but only where needed, which is ONLY
** prior to parsing a stream, and again prior to saving a
** stream (because we reset after parsing.)
** @param doc The Tidy document to adjust.
*/
void TY_(AdjustConfig)( TidyDocImpl* doc );
/** Indicates whether or not the current configuration is completely default.
** @param doc The Tidy document.
** @returns The result.
*/
Bool TY_(ConfigDiffThanDefault)( TidyDocImpl* doc );
/** Indicates whether or not the current configuration is different from the
** stored snapshot.
** @param doc The Tidy document.
** @returns The result.
*/
Bool TY_(ConfigDiffThanSnapshot)( TidyDocImpl* doc );
/** Returns the character encoding ID for the given character encoding
** string.
** @param doc The Tidy document.
** @param charenc The name of the character encoding.
** @returns The Id of the character encoding.
*/
int TY_(CharEncodingId)( TidyDocImpl* doc, ctmbstr charenc );
/** Returns the full name of the encoding for the given ID.
** @param encoding The Id of the encoding.
** @returns The name of the character encoding.
*/
ctmbstr TY_(CharEncodingName)( int encoding );
/** Returns the Tidy command line option name of the encoding for the given ID.
** @param encoding The Id of the encoding.
** @returns The Tidy command line option representing the encoding.
*/
ctmbstr TY_(CharEncodingOptName)( int encoding );
/** Coordinates Config update and list data.
** @param doc The Tidy document.
** @param opt The option the list item is intended for.
** @param name The name of the new list item.
*/
void TY_(DeclareListItem)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name );
#ifdef _DEBUG
/* Debug lookup functions will be type-safe and assert option type match */
ulong TY_(_cfgGet)( TidyDocImpl* doc, TidyOptionId optId );
Bool TY_(_cfgGetBool)( TidyDocImpl* doc, TidyOptionId optId );
TidyTriState TY_(_cfgGetAutoBool)( TidyDocImpl* doc, TidyOptionId optId );
ctmbstr TY_(_cfgGetString)( TidyDocImpl* doc, TidyOptionId optId );
#define cfg(doc, id) TY_(_cfgGet)( (doc), (id) )
#define cfgBool(doc, id) TY_(_cfgGetBool)( (doc), (id) )
#define cfgAutoBool(doc, id) TY_(_cfgGetAutoBool)( (doc), (id) )
#define cfgStr(doc, id) TY_(_cfgGetString)( (doc), (id) )
#else
/* Release build macros for speed */
/** Access the raw, non-string uint value of the given option ID. */
#define cfg(doc, id) ((doc)->config.value[ (id) ].v)
/** Access the Bool value of the given option ID. */
#define cfgBool(doc, id) ((Bool) cfg(doc, id))
/** Access the TidyTriState value of the given option ID. */
#define cfgAutoBool(doc, id) ((TidyTriState) cfg(doc, id))
/** Access the string value of the given option ID. */
#define cfgStr(doc, id) ((ctmbstr) (doc)->config.value[ (id) ].p)
#endif /* _DEBUG */
/** @} configuration_options group */
/** @} internal_api addtogroup */
#endif /* __CONFIG_H__ */

2197
third_party/tidy/entities.c vendored Normal file

File diff suppressed because it is too large Load diff

19
third_party/tidy/entities.h vendored Normal file
View file

@ -0,0 +1,19 @@
#ifndef __ENTITIES_H__
#define __ENTITIES_H__
/* clang-format off */
/* entities.h -- recognize character entities
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/forward.h"
/* entity starting with "&" returns zero on error */
/* uint EntityCode( ctmbstr name, uint versions ); */
ctmbstr TY_(EntityName)( uint charCode, uint versions );
Bool TY_(EntityInfo)( ctmbstr name, Bool isXml, uint* code, uint* versions );
#endif /* __ENTITIES_H__ */

115
third_party/tidy/fileio.c vendored Normal file
View file

@ -0,0 +1,115 @@
/* clang-format off */
/* fileio.c -- does standard I/O
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Default implementations of Tidy input sources
and output sinks based on standard C FILE*.
*/
#include "third_party/tidy/forward.h"
#include "third_party/tidy/fileio.h"
#include "third_party/tidy/tidy.h"
#include "third_party/tidy/sprtf.h"
typedef struct _fp_input_source
{
FILE* fp;
TidyBuffer unget;
} FileSource;
static int filesrc_getByte( void* sourceData )
{
FileSource* fin = (FileSource*) sourceData;
int bv;
if ( fin->unget.size > 0 )
bv = tidyBufPopByte( &fin->unget );
else
bv = fgetc( fin->fp );
return bv;
}
static Bool filesrc_eof( void* sourceData )
{
FileSource* fin = (FileSource*) sourceData;
Bool isEOF = ( fin->unget.size == 0 );
if ( isEOF )
isEOF = feof( fin->fp ) != 0;
return isEOF;
}
static void filesrc_ungetByte( void* sourceData, byte bv )
{
FileSource* fin = (FileSource*) sourceData;
tidyBufPutByte( &fin->unget, bv );
}
#if SUPPORT_POSIX_MAPPED_FILES
# define initFileSource initStdIOFileSource
# define freeFileSource freeStdIOFileSource
#endif
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
{
FileSource* fin = NULL;
fin = (FileSource*) TidyAlloc( allocator, sizeof(FileSource) );
if ( !fin )
return -1;
TidyClearMemory( fin, sizeof(FileSource) );
fin->unget.allocator = allocator;
fin->fp = fp;
inp->getByte = filesrc_getByte;
inp->eof = filesrc_eof;
inp->ungetByte = filesrc_ungetByte;
inp->sourceData = fin;
return 0;
}
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
{
FileSource* fin = (FileSource*) inp->sourceData;
if ( closeIt && fin && fin->fp )
fclose( fin->fp );
tidyBufFree( &fin->unget );
if (fin)
TidyFree( fin->unget.allocator, fin );
}
void TY_(filesink_putByte)( void* sinkData, byte bv )
{
FILE* fout = (FILE*) sinkData;
fputc( bv, fout );
#if defined(ENABLE_DEBUG_LOG)
if (fileno(fout) != 2)
{
if (bv != 0x0d)
{
/*\
* avoid duplicate newline - SPRTF will translate an 0x0d to CRLF,
* and do the same with the following 0x0a
\*/
SPRTF("%c",bv);
}
}
#endif
}
void TY_(initFileSink)( TidyOutputSink* outp, FILE* fp )
{
outp->putByte = TY_(filesink_putByte);
outp->sinkData = fp;
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

44
third_party/tidy/fileio.h vendored Normal file
View file

@ -0,0 +1,44 @@
#ifndef __FILEIO_H__
#define __FILEIO_H__
/* clang-format off */
/** @file fileio.h - does standard C I/O
Implementation of a FILE* based TidyInputSource and
TidyOutputSink.
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/access.h"
#include "third_party/tidy/tidybuffio.h"
#ifdef __cplusplus
extern "C" {
#endif
/** Allocate and initialize file input source */
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
/** Free file input source */
void TY_(freeFileSource)( TidyInputSource* source, Bool closeIt );
#if SUPPORT_POSIX_MAPPED_FILES
/** Allocate and initialize file input source using Standard C I/O */
int TY_(initStdIOFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
/** Free file input source using Standard C I/O */
void TY_(freeStdIOFileSource)( TidyInputSource* source, Bool closeIt );
#endif
/** Initialize file output sink */
void TY_(initFileSink)( TidyOutputSink* sink, FILE* fp );
/* Needed for internal declarations */
void TY_(filesink_putByte)( void* sinkData, byte bv );
#ifdef __cplusplus
}
#endif
#endif /* __FILEIO_H__ */

74
third_party/tidy/forward.h vendored Normal file
View file

@ -0,0 +1,74 @@
#ifndef __FORWARD_H__
#define __FORWARD_H__
/* clang-format off */
/* forward.h -- Forward declarations for major Tidy structures
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Avoids many include file circular dependencies.
Try to keep this file down to the minimum to avoid
cross-talk between modules.
Header files include this file. C files include tidy-int.h.
*/
#include "third_party/tidy/tidyplatform.h"
#include "third_party/tidy/tidy.h"
/* Internal symbols are prefixed to avoid clashes with other libraries */
#define TYDYAPPEND(str1,str2) str1##str2
#define TY_(str) TYDYAPPEND(prvTidy,str)
/* Internal symbols are prefixed with 'hidden' attr, to avoid exporting */
#if defined(_WIN32) || defined(__CYGWIN__)
#define TY_PRIVATE
#else
#define TY_PRIVATE __attribute__((__visibility__("hidden")))
#endif
struct _StreamIn;
typedef struct _StreamIn StreamIn;
struct _StreamOut;
typedef struct _StreamOut StreamOut;
struct _TidyDocImpl;
typedef struct _TidyDocImpl TidyDocImpl;
struct _TidyMessageImpl;
typedef struct _TidyMessageImpl TidyMessageImpl;
/* @todo: this name isn't very instructive! */
struct _Dict;
typedef struct _Dict Dict;
struct _Attribute;
typedef struct _Attribute Attribute;
struct _AttVal;
typedef struct _AttVal AttVal;
struct _Node;
typedef struct _Node Node;
struct _IStack;
typedef struct _IStack IStack;
struct _Lexer;
typedef struct _Lexer Lexer;
extern TidyAllocator TY_(g_default_allocator);
/** Wrappers for easy memory allocation using an allocator */
#define TidyAlloc(allocator, size) ((allocator)->vtbl->alloc((allocator), (size)))
#define TidyRealloc(allocator, block, size) ((allocator)->vtbl->realloc((allocator), (block), (size)))
#define TidyFree(allocator, block) ((allocator)->vtbl->free((allocator), (block)))
#define TidyPanic(allocator, msg) ((allocator)->vtbl->panic((allocator), (msg)))
#define TidyClearMemory(block, size) memset((block), 0, (size))
#endif /* __FORWARD_H__ */

182
third_party/tidy/gdoc.c vendored Normal file
View file

@ -0,0 +1,182 @@
/* clang-format off */
/*
clean.c -- clean up misuse of presentation markup
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Filters from other formats such as Microsoft Word
often make excessive use of presentation markup such
as font tags, B, I, and the align attribute. By applying
a set of production rules, it is straight forward to
transform this to use CSS.
Some rules replace some of the children of an element by
style properties on the element, e.g.
<p><b>...</b></p> -> <p style="font-weight: bold">...</p>
Such rules are applied to the element's content and then
to the element itself until none of the rules more apply.
Having applied all the rules to an element, it will have
a style attribute with one or more properties.
Other rules strip the element they apply to, replacing
it by style properties on the contents, e.g.
<dir><li><p>...</li></dir> -> <p style="margin-left 1em">...
These rules are applied to an element before processing
its content and replace the current element by the first
element in the exposed content.
After applying both sets of rules, you can replace the
style attribute by a class value and style rule in the
document head. To support this, an association of styles
and class names is built.
A naive approach is to rely on string matching to test
when two property lists are the same. A better approach
would be to first sort the properties before matching.
*/
#include "third_party/tidy/tidy-int.h"
#include "third_party/tidy/gdoc.h"
#include "third_party/tidy/lexer.h"
#include "third_party/tidy/parser.h"
#include "third_party/tidy/tags.h"
#include "third_party/tidy/attrs.h"
#include "third_party/tidy/message.h"
#include "third_party/tidy/tmbstr.h"
#include "third_party/tidy/utf8.h"
/*
Extricate "element", replace it by its content and delete it.
*/
static void DiscardContainer( TidyDocImpl* doc, Node *element, Node **pnode)
{
if (element->content)
{
Node *node, *parent = element->parent;
element->last->next = element->next;
if (element->next)
{
element->next->prev = element->last;
}
else
parent->last = element->last;
if (element->prev)
{
element->content->prev = element->prev;
element->prev->next = element->content;
}
else
parent->content = element->content;
for (node = element->content; node; node = node->next)
node->parent = parent;
*pnode = element->content;
element->next = element->content = NULL;
TY_(FreeNode)(doc, element);
}
else
{
*pnode = TY_(DiscardElement)(doc, element);
}
}
static void CleanNode( TidyDocImpl* doc, Node *node )
{
Stack *stack = TY_(newStack)(doc, 16);
Node *child, *next;
if ( (child = node->content) )
{
while (child)
{
next = child->next;
if (TY_(nodeIsElement)(child))
{
if (nodeIsSTYLE(child))
TY_(DiscardElement)(doc, child);
if (nodeIsP(child) && !child->content)
TY_(DiscardElement)(doc, child);
else if (nodeIsSPAN(child))
DiscardContainer( doc, child, &next);
else if (nodeIsA(child) && !child->content)
{
AttVal *id = TY_(GetAttrByName)( child, "name" );
/* Recent Google Docs is using "id" instead of "name" in
** the exported html.
*/
if (!id)
id = TY_(GetAttrByName)( child, "id" );
if (id)
TY_(RepairAttrValue)( doc, child->parent, "id", id->value );
TY_(DiscardElement)(doc, child);
}
else
{
if (child->attributes)
TY_(DropAttrByName)( doc, child, "class" );
TY_(push)(stack,next);
child = child->content;
continue;
}
}
child = next ? next : TY_(pop)(stack);
}
TY_(freeStack)(stack);
}
}
/* insert meta element to force browser to recognize doc as UTF8 */
static void SetUTF8( TidyDocImpl* doc )
{
Node *head = TY_(FindHEAD)( doc );
if (head)
{
Node *node = TY_(InferredTag)(doc, TidyTag_META);
TY_(AddAttribute)( doc, node, "http-equiv", "Content-Type" );
TY_(AddAttribute)( doc, node, "content", "text/html; charset=UTF-8" );
TY_(InsertNodeAtStart)( head, node );
}
}
/* clean html exported by Google Docs
- strip the script element, as the style sheet is a mess
- strip class attributes
- strip span elements, leaving their content in place
- replace <a name=...></a> by id on parent element
- strip empty <p> elements
*/
void TY_(CleanGoogleDocument)( TidyDocImpl* doc )
{
/* placeholder. CleanTree()/CleanNode() will not
** zap root element
*/
CleanNode( doc, &doc->root );
SetUTF8( doc );
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

21
third_party/tidy/gdoc.h vendored Normal file
View file

@ -0,0 +1,21 @@
#ifndef __GDOC_H__
#define __GDOC_H__
#include "third_party/tidy/forward.h"
/* clang-format off */
/* gdoc.h -- clean up html exported by Google Docs
(c) 2012 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
- strip the script element, as the style sheet is a mess
- strip class attributes
- strip span elements, leaving their content in place
- replace <a name=...></a> by id on parent element
- strip empty <p> elements
*/
void TY_(CleanGoogleDocument)( TidyDocImpl* doc );
#endif /* __GDOC_H__ */

378
third_party/tidy/istack.c vendored Normal file
View file

@ -0,0 +1,378 @@
/* clang-format off */
/* istack.c -- inline stack for compatibility with Mosaic
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/tidy-int.h"
#include "third_party/tidy/lexer.h"
#include "third_party/tidy/attrs.h"
#include "third_party/tidy/streamio.h"
#include "third_party/tidy/tmbstr.h"
/* duplicate attributes */
AttVal *TY_(DupAttrs)( TidyDocImpl* doc, AttVal *attrs)
{
AttVal *newattrs;
if (attrs == NULL)
return attrs;
newattrs = TY_(NewAttribute)(doc);
*newattrs = *attrs;
newattrs->next = TY_(DupAttrs)( doc, attrs->next );
newattrs->attribute = TY_(tmbstrdup)(doc->allocator, attrs->attribute);
newattrs->value = TY_(tmbstrdup)(doc->allocator, attrs->value);
newattrs->dict = TY_(FindAttribute)(doc, newattrs);
newattrs->asp = attrs->asp ? TY_(CloneNode)(doc, attrs->asp) : NULL;
newattrs->php = attrs->php ? TY_(CloneNode)(doc, attrs->php) : NULL;
return newattrs;
}
static Bool IsNodePushable( Node *node )
{
if (node->tag == NULL)
return no;
if (!(node->tag->model & CM_INLINE))
return no;
if (node->tag->model & CM_OBJECT)
return no;
/*\ Issue #92: OLD problem of ins and del which are marked as both
* inline and block, thus should NOT ever be 'inserted'
\*/
if (nodeIsINS(node) || nodeIsDEL(node))
return no;
return yes;
}
/*
push a copy of an inline node onto stack
but don't push if implicit or OBJECT or APPLET
(implicit tags are ones generated from the istack)
One issue arises with pushing inlines when
the tag is already pushed. For instance:
<p><em>text
<p><em>more text
Shouldn't be mapped to
<p><em>text</em></p>
<p><em><em>more text</em></em>
*/
void TY_(PushInline)( TidyDocImpl* doc, Node *node )
{
Lexer* lexer = doc->lexer;
IStack *istack;
if (node->implicit)
return;
if ( !IsNodePushable(node) )
return;
if ( !nodeIsFONT(node) && TY_(IsPushed)(doc, node) )
return;
/* make sure there is enough space for the stack */
if (lexer->istacksize + 1 > lexer->istacklength)
{
if (lexer->istacklength == 0)
lexer->istacklength = 6; /* this is perhaps excessive */
lexer->istacklength = lexer->istacklength * 2;
lexer->istack = (IStack *)TidyDocRealloc(doc, lexer->istack,
sizeof(IStack)*(lexer->istacklength));
}
istack = &(lexer->istack[lexer->istacksize]);
istack->tag = node->tag;
istack->element = TY_(tmbstrdup)(doc->allocator, node->element);
istack->attributes = TY_(DupAttrs)( doc, node->attributes );
++(lexer->istacksize);
}
static void PopIStack( TidyDocImpl* doc )
{
Lexer* lexer = doc->lexer;
IStack *istack;
AttVal *av;
--(lexer->istacksize);
istack = &(lexer->istack[lexer->istacksize]);
while (istack->attributes)
{
av = istack->attributes;
istack->attributes = av->next;
TY_(FreeAttribute)( doc, av );
}
TidyDocFree(doc, istack->element);
istack->element = NULL; /* remove the freed element */
}
static void PopIStackUntil( TidyDocImpl* doc, TidyTagId tid )
{
Lexer* lexer = doc->lexer;
IStack *istack;
while (lexer->istacksize > 0)
{
PopIStack( doc );
istack = &(lexer->istack[lexer->istacksize]);
if ( istack->tag->id == tid )
break;
}
}
/* pop inline stack */
void TY_(PopInline)( TidyDocImpl* doc, Node *node )
{
Lexer* lexer = doc->lexer;
if (node)
{
if ( !IsNodePushable(node) )
return;
/* if node is </a> then pop until we find an <a> */
if ( nodeIsA(node) )
{
PopIStackUntil( doc, TidyTag_A );
return;
}
}
if (lexer->istacksize > 0)
{
PopIStack( doc );
/* #427822 - fix by Randy Waki 7 Aug 00 */
if (lexer->insert >= lexer->istack + lexer->istacksize)
lexer->insert = NULL;
}
}
Bool TY_(IsPushed)( TidyDocImpl* doc, Node *node )
{
Lexer* lexer = doc->lexer;
int i;
for (i = lexer->istacksize - 1; i >= 0; --i)
{
if (lexer->istack[i].tag == node->tag)
return yes;
}
return no;
}
/*
Test whether the last element on the stack has the same type than "node".
*/
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node )
{
Lexer* lexer = doc->lexer;
if ( element && !IsNodePushable(element) )
return no;
if (lexer->istacksize > 0) {
if (lexer->istack[lexer->istacksize - 1].tag == node->tag) {
return yes;
}
}
return no;
}
/*
This has the effect of inserting "missing" inline
elements around the contents of blocklevel elements
such as P, TD, TH, DIV, PRE etc. This procedure is
called at the start of ParseBlock. when the inline
stack is not empty, as will be the case in:
<i><h1>italic heading</h1></i>
which is then treated as equivalent to
<h1><i>italic heading</i></h1>
This is implemented by setting the lexer into a mode
where it gets tokens from the inline stack rather than
from the input stream.
*/
int TY_(InlineDup)( TidyDocImpl* doc, Node* node )
{
Lexer* lexer = doc->lexer;
int n;
if ((n = lexer->istacksize - lexer->istackbase) > 0)
{
lexer->insert = &(lexer->istack[lexer->istackbase]);
lexer->inode = node;
}
return n;
}
/*
defer duplicates when entering a table or other
element where the inlines shouldn't be duplicated
*/
void TY_(DeferDup)( TidyDocImpl* doc )
{
doc->lexer->insert = NULL;
doc->lexer->inode = NULL;
}
Node *TY_(InsertedToken)( TidyDocImpl* doc )
{
Lexer* lexer = doc->lexer;
Node *node;
IStack *istack;
uint n;
/* this will only be NULL if inode != NULL */
if (lexer->insert == NULL)
{
node = lexer->inode;
lexer->inode = NULL;
return node;
}
/*
If this is the "latest" node then update
the position, otherwise use current values
*/
if (lexer->inode == NULL)
{
lexer->lines = doc->docIn->curline;
lexer->columns = doc->docIn->curcol;
}
node = TY_(NewNode)(doc->allocator, lexer);
node->type = StartTag;
node->implicit = yes;
node->start = lexer->txtstart;
/* #431734 [JTidy bug #226261 (was 126261)] - fix by Gary Peskin 20 Dec 00 */
node->end = lexer->txtend; /* was : lexer->txtstart; */
istack = lexer->insert;
/* #if 0 && defined(_DEBUG) */
#if definedENABLE_DEBUG_LOG
if ( lexer->istacksize == 0 )
{
SPRTF( "WARNING: ZERO sized istack!\n" );
}
#endif
node->element = TY_(tmbstrdup)(doc->allocator, istack->element);
node->tag = istack->tag;
node->attributes = TY_(DupAttrs)( doc, istack->attributes );
/* advance lexer to next item on the stack */
n = (uint)(lexer->insert - &(lexer->istack[0]));
/* and recover state if we have reached the end */
if (++n < lexer->istacksize)
lexer->insert = &(lexer->istack[n]);
else
lexer->insert = NULL;
return node;
}
/*
We have two CM_INLINE elements pushed ... the first is closing,
but, like the browser, the second should be retained ...
Like <b>bold <i>bold and italics</b> italics only</i>
This function switches the tag positions on the stack,
returning 'yes' if both were found in the expected order.
*/
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node )
{
Lexer* lexer = doc->lexer;
if ( lexer
&& element && element->tag
&& node && node->tag
&& TY_(IsPushed)( doc, element )
&& TY_(IsPushed)( doc, node )
&& ((lexer->istacksize - lexer->istackbase) >= 2) )
{
/* we have a chance of succeeding ... */
int i;
for (i = (lexer->istacksize - lexer->istackbase - 1); i >= 0; --i)
{
if (lexer->istack[i].tag == element->tag) {
/* found the element tag - phew */
IStack *istack1 = &lexer->istack[i];
IStack *istack2 = NULL;
--i; /* back one more, and continue */
for ( ; i >= 0; --i)
{
if (lexer->istack[i].tag == node->tag)
{
/* found the element tag - phew */
istack2 = &lexer->istack[i];
break;
}
}
if ( istack2 )
{
/* perform the swap */
IStack tmp_istack = *istack2;
*istack2 = *istack1;
*istack1 = tmp_istack;
return yes;
}
}
}
}
return no;
}
/*
We want to push a specific a specific element on the stack,
but it may not be the last element, which InlineDup()
would handle. Return yes, if found and inserted.
*/
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element )
{
Lexer* lexer = doc->lexer;
int n, i;
if ( element
&& (element->tag != NULL)
&& ((n = lexer->istacksize - lexer->istackbase) > 0) )
{
for ( i = n - 1; i >=0; --i ) {
if (lexer->istack[i].tag == element->tag) {
/* found our element tag - insert it */
lexer->insert = &(lexer->istack[i]);
lexer->inode = node;
return yes;
}
}
}
return no;
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

666
third_party/tidy/language.c vendored Normal file
View file

@ -0,0 +1,666 @@
/* clang-format off */
/* language.c -- localization support for HTML Tidy.
Copyright 2015 HTACG
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/language.h"
#include "third_party/tidy/tmbstr.h"
#include "libc/assert.h"
#include "third_party/tidy/language_en.inc"
/**
* This structure type provides universal access to all of Tidy's strings.
* Note limit of 8, to be changed as more added...
*/
typedef struct {
Bool manually_set;
languageDefinition *currentLanguage;
languageDefinition *fallbackLanguage;
languageDefinition *languages[9];
} tidyLanguagesType;
/**
* This single structure contains all localizations. Note that we preset
* `.currentLanguage` to language_en, which is Tidy's default language.
*/
static tidyLanguagesType tidyLanguages = {
no, /* library language was NOT manually set */
&language_en, /* current language */
&language_en, /* first fallback language */
{
/* Required localization! */
&language_en,
#if SUPPORT_LOCALIZATIONS
/* These additional languages are installed. */
&language_en_gb,
&language_es,
&language_es_mx,
&language_pt_br,
&language_zh_cn,
&language_fr,
&language_de,
#endif
NULL /* This array MUST be null terminated. */
}
};
/**
* This structure maps old-fashioned Windows strings
* to proper POSIX names (modern Windows already uses
* POSIX names).
*/
static const tidyLocaleMapItemImpl localeMappings[] = {
{ "america", "en_us" },
{ "american english", "en_us" },
{ "american-english", "en_us" },
{ "american", "en_us" },
{ "aus", "en_au" },
{ "australia", "en_au" },
{ "australian", "en_au" },
{ "austria", "de_at" },
{ "aut", "de_at" },
{ "bel", "nl_be" },
{ "belgian", "nl_be" },
{ "belgium", "nl_be" },
{ "bra", "pt-br" },
{ "brazil", "pt-br" },
{ "britain", "en_gb" },
{ "can", "en_ca" },
{ "canada", "en_ca" },
{ "canadian", "en_ca" },
{ "che", "de_ch" },
{ "china", "zh_cn" },
{ "chinese-simplified", "zh" },
{ "chinese-traditional", "zh_tw" },
{ "chinese", "zh" },
{ "chn", "zh_cn" },
{ "chs", "zh" },
{ "cht", "zh_tw" },
{ "csy", "cs" },
{ "cze", "cs_cz" },
{ "czech", "cs_cz" },
{ "dan", "da" },
{ "danish", "da" },
{ "dea", "de_at" },
{ "denmark", "da_dk" },
{ "des", "de_ch" },
{ "deu", "de" },
{ "dnk", "da_dk" },
{ "dutch-belgian", "nl_be" },
{ "dutch", "nl" },
{ "ell", "el" },
{ "ena", "en_au" },
{ "enc", "en_ca" },
{ "eng", "eb_gb" },
{ "england", "en_gb" },
{ "english-american", "en_us" },
{ "english-aus", "en_au" },
{ "english-can", "en_ca" },
{ "english-nz", "en_nz" },
{ "english-uk", "eb_gb" },
{ "english-us", "en_us" },
{ "english-usa", "en_us" },
{ "english", "en" },
{ "enu", "en_us" },
{ "enz", "en_nz" },
{ "esm", "es-mx" },
{ "esn", "es" },
{ "esp", "es" },
{ "fin", "fi" },
{ "finland", "fi_fi" },
{ "finnish", "fi" },
{ "fra", "fr" },
{ "france", "fr_fr" },
{ "frb", "fr_be" },
{ "frc", "fr_ca" },
{ "french-belgian", "fr_be" },
{ "french-canadian", "fr_ca" },
{ "french-swiss", "fr_ch" },
{ "french", "fr" },
{ "frs", "fr_ch" },
{ "gbr", "en_gb" },
{ "german-austrian", "de_at" },
{ "german-swiss", "de_ch" },
{ "german", "de" },
{ "germany", "de_de" },
{ "grc", "el_gr" },
{ "great britain", "en_gb" },
{ "greece", "el_gr" },
{ "greek", "el" },
{ "hkg", "zh_hk" },
{ "holland", "nl_nl" },
{ "hong kong", "zh_hk" },
{ "hong-kong", "zh_hk" },
{ "hun", "hu" },
{ "hungarian", "hu" },
{ "hungary", "hu_hu" },
{ "iceland", "is_is" },
{ "icelandic", "is" },
{ "ireland", "en_ie" },
{ "irl", "en_ie" },
{ "isl", "is" },
{ "ita", "it" },
{ "ita", "it_it" },
{ "italian-swiss", "it_ch" },
{ "italian", "it" },
{ "italy", "it_it" },
{ "its", "it_ch" },
{ "japan", "ja_jp" },
{ "japanese", "ja" },
{ "jpn", "ja" },
{ "kor", "ko" },
{ "korea", "ko_kr" },
{ "korean", "ko" },
{ "mex", "es-mx" },
{ "mexico", "es-mx" },
{ "netherlands", "nl_nl" },
{ "new zealand", "en_nz" },
{ "new-zealand", "en_nz" },
{ "nlb", "nl_be" },
{ "nld", "nl" },
{ "non", "nn" },
{ "nor", "nb" },
{ "norway", "no" },
{ "norwegian-bokmal", "nb" },
{ "norwegian-nynorsk", "nn" },
{ "norwegian", "no" },
{ "nz", "en_nz" },
{ "nzl", "en_nz" },
{ "plk", "pl" },
{ "pol", "pl-pl" },
{ "poland", "pl-pl" },
{ "polish", "pl" },
{ "portugal", "pt-pt" },
{ "portuguese-brazil", "pt-br" },
{ "portuguese", "pt" },
{ "pr china", "zh_cn" },
{ "pr-china", "zh_cn" },
{ "prt", "pt-pt" },
{ "ptb", "pt-br" },
{ "ptg", "pt" },
{ "rus", "ru" },
{ "russia", "ru-ru" },
{ "russian", "ru" },
{ "sgp", "zh_sg" },
{ "singapore", "zh_sg" },
{ "sky", "sk" },
{ "slovak", "sk" },
{ "spain", "es-es" },
{ "spanish-mexican", "es-mx" },
{ "spanish-modern", "es" },
{ "spanish", "es" },
{ "sve", "sv" },
{ "svk", "sk-sk" },
{ "swe", "sv-se" },
{ "sweden", "sv-se" },
{ "swedish", "sv" },
{ "swiss", "de_ch" },
{ "switzerland", "de_ch" },
{ "taiwan", "zh_tw" },
{ "trk", "tr" },
{ "tur", "tr-tr" },
{ "turkey", "tr-tr" },
{ "turkish", "tr" },
{ "twn", "zh_tw" },
{ "uk", "en_gb" },
{ "united kingdom", "en_gb" },
{ "united states", "en_us" },
{ "united-kingdom", "en_gb" },
{ "united-states", "en_us" },
{ "us", "en_us" },
{ "usa", "en_us" },
/* MUST be last. */
{ NULL, NULL }
};
/**
* The real string lookup function.
*/
static ctmbstr tidyLocalizedStringImpl( uint messageType, languageDefinition *definition, uint plural )
{
int i;
languageDictionary *dictionary = &definition->messages;
uint pluralForm = definition->whichPluralForm(plural);
for (i = 0; (*dictionary)[i].value; ++i)
{
if ( (*dictionary)[i].key == messageType && (*dictionary)[i].pluralForm == pluralForm )
{
return (*dictionary)[i].value;
}
}
return NULL;
}
/**
* Provides a string given `messageType` in the current
* localization, returning the correct plural form given
* `quantity`.
*
* This isn't currently highly optimized; rewriting some
* of infrastructure to use hash lookups is a preferred
* future optimization.
*/
ctmbstr TY_(tidyLocalizedStringN)( uint messageType, uint quantity )
{
ctmbstr result;
result = tidyLocalizedStringImpl( messageType, tidyLanguages.currentLanguage, quantity);
if (!result && tidyLanguages.fallbackLanguage )
{
result = tidyLocalizedStringImpl( messageType, tidyLanguages.fallbackLanguage, quantity);
}
if (!result)
{
/* Fallback to en which is built in. */
result = tidyLocalizedStringImpl( messageType, &language_en, quantity);
}
if (!result)
{
/* Last resort: Fallback to en singular which is built in. */
result = tidyLocalizedStringImpl( messageType, &language_en, 1);
}
return result;
}
/**
* Provides a string given `messageType` in the current
* localization, in the non-plural form.
*
* This isn't currently highly optimized; rewriting some
* of infrastructure to use hash lookups is a preferred
* future optimization.
*/
ctmbstr TY_(tidyLocalizedString)( uint messageType )
{
return TY_(tidyLocalizedStringN)( messageType, 1 );
}
/**
* Retrieves the POSIX name for a string. Result is a static char so please
* don't try to free it. If the name looks like a cc_ll identifier, we will
* return it if there's no other match.
* @note this routine uses default allocator, see tidySetMallocCall.
*/
static tmbstr TY_(tidyNormalizedLocaleName)( ctmbstr locale )
{
uint i;
uint len;
static char result[6] = "xx_yy";
TidyAllocator * allocator = &TY_(g_default_allocator);
tmbstr search = TY_(tmbstrdup)( allocator, locale );
search = TY_(tmbstrtolower)(search);
/* See if our string matches a Windows name. */
for (i = 0; localeMappings[i].winName; ++i)
{
if ( strcmp( localeMappings[i].winName, search ) == 0 )
{
TidyFree( allocator, search );
search = TY_(tmbstrdup)( allocator, localeMappings[i].POSIXName );
break;
}
}
/* We're going to be stupid about this and trust the user, and
return just the first two characters if they exist and the
4th and 5th if they exist. The worst that can happen is a
junk language that doesn't exist and won't be set. */
len = strlen( search );
len = ( len <= 5 ? len : 5 );
for ( i = 0; i < len; i++ )
{
if ( i == 2 )
{
/* Either terminate the string or ensure there's an underscore */
if (len == 5) {
result[i] = '_';
}
else {
result[i] = '\0';
break; /* no need to copy after null */
}
}
else
{
result[i] = tolower( search[i] );
}
}
TidyFree( allocator, search );
return result;
}
/**
* Returns the languageDefinition if the languageCode is installed in Tidy,
* otherwise return NULL
*/
static languageDefinition *TY_(tidyTestLanguage)( ctmbstr languageCode )
{
uint i;
languageDefinition *testLang;
languageDictionary *testDict;
ctmbstr testCode;
for (i = 0; tidyLanguages.languages[i]; ++i)
{
testLang = tidyLanguages.languages[i];
testDict = &testLang->messages;
testCode = (*testDict)[0].value;
if ( strcmp(testCode, languageCode) == 0 )
return testLang;
}
return NULL;
}
/**
* Tells Tidy to use a different language for output.
* @param languageCode A Windows or POSIX language code, and must match
* a TIDY_LANGUAGE for an installed language.
* @result Indicates that a setting was applied, but not necessarily the
* specific request, i.e., true indicates a language and/or region
* was applied. If es_mx is requested but not installed, and es is
* installed, then es will be selected and this function will return
* true. However the opposite is not true; if es is requested but
* not present, Tidy will not try to select from the es_XX variants.
*/
Bool TY_(tidySetLanguage)( ctmbstr languageCode )
{
languageDefinition *dict1 = NULL;
languageDefinition *dict2 = NULL;
tmbstr wantCode = NULL;
char lang[3] = "";
if ( !languageCode || !(wantCode = TY_(tidyNormalizedLocaleName)( languageCode )) )
{
return no;
}
/* We want to use the specified language as the currentLanguage, and set
fallback language as necessary. We have either a two or five digit code,
either or both of which might be installed. Let's test both of them:
*/
dict1 = TY_(tidyTestLanguage( wantCode )); /* WANTED language */
if ( strlen( wantCode ) > 2 )
{
strncpy(lang, wantCode, 2);
lang[2] = '\0';
dict2 = TY_(tidyTestLanguage( lang ) ); /* BACKUP language? */
}
if ( dict1 && dict2 )
{
tidyLanguages.currentLanguage = dict1;
tidyLanguages.fallbackLanguage = dict2;
}
if ( dict1 && !dict2 )
{
tidyLanguages.currentLanguage = dict1;
tidyLanguages.fallbackLanguage = NULL;
}
if ( !dict1 && dict2 )
{
tidyLanguages.currentLanguage = dict2;
tidyLanguages.fallbackLanguage = NULL;
}
if ( !dict1 && !dict2 )
{
/* No change. */
}
return dict1 || dict2;
}
/**
* Gets the current language used by Tidy.
*/
ctmbstr TY_(tidyGetLanguage)()
{
languageDefinition *langDef = tidyLanguages.currentLanguage;
languageDictionary *langDict = &langDef->messages;
return (*langDict)[0].value;
}
/**
* Indicates whether or not the current language was set by a
* LibTidy user (yes) or internally by the library (no).
*/
Bool TY_(tidyGetLanguageSetByUser)()
{
return tidyLanguages.manually_set;
}
/**
* Specifies to LibTidy that the user (rather than the library)
* selected the current language.
*/
void TY_(tidySetLanguageSetByUser)( void )
{
tidyLanguages.manually_set = yes;
}
/**
* Provides a string given `messageType` in the default
* localization (which is `en`), for the given quantity.
*/
ctmbstr TY_(tidyDefaultStringN)( uint messageType, uint quantity )
{
return tidyLocalizedStringImpl( messageType, &language_en, quantity);
}
/**
* Provides a string given `messageType` in the default
* localization (which is `en`), for single plural form.
*/
ctmbstr TY_(tidyDefaultString)( uint messageType )
{
return tidyLocalizedStringImpl( messageType, &language_en, 1);
}
/**
* Determines the true size of the `language_en` array indicating the
* number of items in the array, _not_ the highest index.
*/
static const uint tidyStringKeyListSize()
{
static uint array_size = 0;
if ( array_size == 0 )
{
while ( language_en.messages[array_size].value != NULL ) {
array_size++;
}
}
return array_size;
}
/*
* Initializes the TidyIterator to point to the first item
* in Tidy's list of localization string keys. Note that
* these are provided for documentation generation purposes
* and probably aren't useful for LibTidy implementors.
*/
TidyIterator TY_(getStringKeyList)()
{
return (TidyIterator)(size_t)1;
}
/*
* Provides the next key value in Tidy's list of localized
* strings. Note that these are provided for documentation
* generation purposes and probably aren't useful to
* libtidy implementors.
*/
uint TY_(getNextStringKey)( TidyIterator* iter )
{
uint item = 0;
size_t itemIndex;
assert( iter != NULL );
itemIndex = (size_t)*iter;
if ( itemIndex > 0 && itemIndex <= tidyStringKeyListSize() )
{
item = language_en.messages[ itemIndex - 1 ].key;
itemIndex++;
}
*iter = (TidyIterator)( itemIndex <= tidyStringKeyListSize() ? itemIndex : (size_t)0 );
return item;
}
/**
* Determines the true size of the `localeMappings` array indicating the
* number of items in the array, _not_ the highest index.
*/
static const uint tidyLanguageListSize()
{
static uint array_size = 0;
if ( array_size == 0 )
{
while ( localeMappings[array_size].winName ) {
array_size++;
}
}
return array_size;
}
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's structure of Windows<->POSIX local mapping.
* Items can be retrieved with getNextWindowsLanguage();
*/
TidyIterator TY_(getWindowsLanguageList)()
{
return (TidyIterator)(size_t)1;
}
/**
* Returns the next record of type `localeMapItem` in
* Tidy's structure of Windows<->POSIX local mapping.
*/
const tidyLocaleMapItemImpl *TY_(getNextWindowsLanguage)( TidyIterator *iter )
{
const tidyLocaleMapItemImpl *item = NULL;
size_t itemIndex;
assert( iter != NULL );
itemIndex = (size_t)*iter;
if ( itemIndex > 0 && itemIndex <= tidyLanguageListSize() )
{
item = &localeMappings[ itemIndex -1 ];
itemIndex++;
}
*iter = (TidyIterator)( itemIndex <= tidyLanguageListSize() ? itemIndex : (size_t)0 );
return item;
}
/**
* Given a `tidyLocaleMapItemImpl, return the Windows name.
*/
ctmbstr TY_(TidyLangWindowsName)( const tidyLocaleMapItemImpl *item )
{
return item->winName;
}
/**
* Given a `tidyLocaleMapItemImpl, return the POSIX name.
*/
ctmbstr TY_(TidyLangPosixName)( const tidyLocaleMapItemImpl *item )
{
return item->POSIXName;
}
/**
* Determines the number of languages installed in Tidy.
*/
static const uint tidyInstalledLanguageListSize()
{
static uint array_size = 0;
if ( array_size == 0 )
{
while ( tidyLanguages.languages[array_size] ) {
array_size++;
}
}
return array_size;
}
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's list of installed language codes.
* Items can be retrieved with getNextInstalledLanguage();
*/
TidyIterator TY_(getInstalledLanguageList)()
{
return (TidyIterator)(size_t)1;
}
/**
* Returns the next installed language.
*/
ctmbstr TY_(getNextInstalledLanguage)( TidyIterator* iter )
{
ctmbstr item = NULL;
size_t itemIndex;
assert( iter != NULL );
itemIndex = (size_t)*iter;
if ( itemIndex > 0 && itemIndex <= tidyInstalledLanguageListSize() )
{
item = tidyLanguages.languages[itemIndex - 1]->messages[0].value;
itemIndex++;
}
*iter = (TidyIterator)( itemIndex <= tidyInstalledLanguageListSize() ? itemIndex : (size_t)0 );
return item;
}
/*
* end:
*/

223
third_party/tidy/language.h vendored Normal file
View file

@ -0,0 +1,223 @@
#ifndef language_h
#define language_h
/* clang-format off */
/*********************************************************************
* Localization support for HTML Tidy.
*
* This header provides the public (within libtidy) interface to
* basic localization support. To add your own localization, create
* a new `language_xx.h` file and add it to the struct in
* `language.c`.
*
* (c) 2015 HTACG
* See `tidy.h` for the copyright notice.
*********************************************************************/
#include "third_party/tidy/forward.h"
/** @name Exposed Data Structures */
/** @{ */
/**
* These enumerations are used within instances of `languageDefinition`
* structures to provide additional metadata, and are localizable
* therein.
*/
typedef enum {
/* Specifies the language code for a particular language. */
TIDY_LANGUAGE = 400,
/* Marker for the last key in the structure. */
TIDY_MESSAGE_TYPE_LAST
} tidyLanguage;
/**
* Describes a record for a localization string.
* - key must correspond with one of Tidy's enums (see `tidyMessageTypes`
* below)
* - pluralForm corresponds to gettext plural forms case (not singularity).
* Most entries should be case 0, representing the single case.:
* https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
*/
typedef struct languageDictionaryEntry {
uint key;
uint pluralForm;
ctmbstr value;
} languageDictionaryEntry;
/**
* For now we'll just use an array to hold all of the dictionary
* entries. In the future we can convert this to a hash structure
* which will make looking up strings faster.
*/
typedef languageDictionaryEntry const languageDictionary[600];
/**
* Finally, a complete language definition. The item `pluralForm`
* is a function pointer that will provide the correct plural
* form given the value `n`. The actual function is present in
* each language header and is language dependent.
*/
typedef struct languageDefinition {
uint (*whichPluralForm)(uint n);
languageDictionary messages;
} languageDefinition;
/**
* The function getNextWindowsLanguage() returns pointers to this type;
* it gives LibTidy implementors the ability to determine how Windows
* locale names are mapped to POSIX language codes.
*/
typedef struct tidyLocaleMapItemImpl {
ctmbstr winName;
ctmbstr POSIXName;
} tidyLocaleMapItemImpl;
/** @} */
/** @name Localization Related Functions */
/** @{ */
/**
** Determines the current locale without affecting the C locale.
** Tidy has always used the default C locale, and at this point
** in its development we're not going to tamper with that.
** @param result The buffer to use to return the string.
** Returns NULL on failure.
** @return The same buffer for convenience.
*/
tmbstr TY_(tidySystemLocale)(tmbstr result);
/**
* Tells Tidy to use a different language for output.
* @param languageCode A Windows or POSIX language code, and must match
* a TIDY_LANGUAGE for an installed language.
* @result Indicates that a setting was applied, but not necessarily the
* specific request, i.e., true indicates a language and/or region
* was applied. If es_mx is requested but not installed, and es is
* installed, then es will be selected and this function will return
* true. However the opposite is not true; if es is requested but
* not present, Tidy will not try to select from the es_XX variants.
*/
Bool TY_(tidySetLanguage)( ctmbstr languageCode );
/**
* Gets the current language used by Tidy.
*/
ctmbstr TY_(tidyGetLanguage)(void);
/**
* Indicates whether or not the current language was set by a
* LibTidy user or internally by the library. This flag prevents
* subsequently created instances of TidyDocument from changing the
* user's language.
* @returns Returns yes to indicate that the current language was
* specified by an API user.
*/
Bool TY_(tidyGetLanguageSetByUser)(void);
/**
* Specifies to LibTidy that the user (rather than the library)
* selected the current language. This flag prevents subsequently
* created instances of TidyDocument from changing the user's language.
*/
void TY_(tidySetLanguageSetByUser)( void );
/**
* Provides a string given `messageType` in the current
* localization for `quantity`.
*/
ctmbstr TY_(tidyLocalizedStringN)( uint messageType, uint quantity );
/**
* Provides a string given `messageType` in the current
* localization for the single case.
*/
ctmbstr TY_(tidyLocalizedString)( uint messageType );
/** @} */
/** @name Documentation Generation */
/** @{ */
/**
* Provides a string given `messageType` in the default
* localization (which is `en`), for the given quantity.
*/
ctmbstr TY_(tidyDefaultStringN)( uint messageType, uint quantity );
/**
* Provides a string given `messageType` in the default
* localization (which is `en`).
*/
ctmbstr TY_(tidyDefaultString)( uint messageType );
/*
* Initializes the TidyIterator to point to the first item
* in Tidy's list of localization string keys. Note that
* these are provided for documentation generation purposes
* and probably aren't useful for LibTidy implementors.
*/
TidyIterator TY_(getStringKeyList)(void);
/*
* Provides the next key value in Tidy's list of localized
* strings. Note that these are provided for documentation
* generation purposes and probably aren't useful to
* libtidy implementors.
*/
uint TY_(getNextStringKey)( TidyIterator* iter );
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's structure of Windows<->POSIX local mapping.
* Items can be retrieved with getNextWindowsLanguage();
*/
TidyIterator TY_(getWindowsLanguageList)(void);
/**
* Returns the next record of type `localeMapItem` in
* Tidy's structure of Windows<->POSIX local mapping.
*/
const tidyLocaleMapItemImpl *TY_(getNextWindowsLanguage)( TidyIterator* iter );
/**
* Given a `tidyLocaleMapItemImpl, return the Windows name.
*/
ctmbstr TY_(TidyLangWindowsName)( const tidyLocaleMapItemImpl *item );
/**
* Given a `tidyLocaleMapItemImpl, return the POSIX name.
*/
ctmbstr TY_(TidyLangPosixName)( const tidyLocaleMapItemImpl *item );
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's list of installed language codes.
* Items can be retrieved with getNextInstalledLanguage();
*/
TidyIterator TY_(getInstalledLanguageList)(void);
/**
* Returns the next installed language.
*/
ctmbstr TY_(getNextInstalledLanguage)( TidyIterator* iter );
/** @} */
#endif /* language_h */

2507
third_party/tidy/language_en.inc vendored Normal file

File diff suppressed because it is too large Load diff

4551
third_party/tidy/lexer.c vendored Normal file

File diff suppressed because it is too large Load diff

750
third_party/tidy/lexer.h vendored Normal file
View file

@ -0,0 +1,750 @@
#ifndef __LEXER_H__
#define __LEXER_H__
/* clang-format off */
/**************************************************************************//**
* @file
* Lexer for HTML and XML Parsers.
*
* Given an input source, it returns a sequence of tokens.
*
* GetToken(source) gets the next token
* UngetToken(source) provides one level undo
*
* The tags include an attribute list:
*
* - linked list of attribute/value nodes
* - each node has 2 NULL-terminated strings.
* - entities are replaced in attribute values
*
* white space is compacted if not in preformatted mode
* If not in preformatted mode then leading white space
* is discarded and subsequent white space sequences
* compacted to single space characters.
*
* If XmlTags is no then Tag names are folded to upper
* case and attribute names to lower case.
*
* Not yet done:
* - Doctype subset and marked sections
*
* @author HTACG, et al (consult git log)
*
* @copyright
* (c) 1998-2021 (W3C) MIT, ERCIM, Keio University, and HTACG.
* See tidy.h for the copyright notice.
* @par
* All Rights Reserved.
* @par
* See `tidy.h` for the complete license.
*
* @date Additional updates: consult git log
*
******************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
#include "third_party/tidy/forward.h"
/** @addtogroup internal_api */
/** @{ */
/***************************************************************************//**
** @defgroup lexer_h HTML and XML Lexing
**
** These functions and structures form the internal API for document
** lexing.
**
** @{
******************************************************************************/
/**
* Lexer character types.
*/
#define digit 1u
#define letter 2u
#define namechar 4u
#define white 8u
#define newline 16u
#define lowercase 32u
#define uppercase 64u
#define digithex 128u
/**
* node->type is one of these values
*/
typedef enum
{
RootNode,
DocTypeTag,
CommentTag,
ProcInsTag,
TextNode,
StartTag,
EndTag,
StartEndTag,
CDATATag,
SectionTag,
AspTag,
JsteTag,
PhpTag,
XmlDecl
} NodeType;
/**
* Lexer GetToken() states.
*/
typedef enum
{
LEX_CONTENT,
LEX_GT,
LEX_ENDTAG,
LEX_STARTTAG,
LEX_COMMENT,
LEX_DOCTYPE,
LEX_PROCINSTR,
LEX_CDATA,
LEX_SECTION,
LEX_ASP,
LEX_JSTE,
LEX_PHP,
LEX_XMLDECL
} LexerState;
/**
* ParseDocTypeDecl state constants.
*/
typedef enum
{
DT_INTERMEDIATE,
DT_DOCTYPENAME,
DT_PUBLICSYSTEM,
DT_QUOTEDSTRING,
DT_INTSUBSET
} ParseDocTypeDeclState;
/**
* Content model shortcut encoding.
* Descriptions are tentative.
*/
#define CM_UNKNOWN 0
#define CM_EMPTY (1 << 0) /**< Elements with no content. Map to HTML specification. */
#define CM_HTML (1 << 1) /**< Elements that appear outside of "BODY". */
#define CM_HEAD (1 << 2) /**< Elements that can appear within HEAD. */
#define CM_BLOCK (1 << 3) /**< HTML "block" elements. */
#define CM_INLINE (1 << 4) /**< HTML "inline" elements. */
#define CM_LIST (1 << 5) /**< Elements that mark list item ("LI"). */
#define CM_DEFLIST (1 << 6) /**< Elements that mark definition list item ("DL", "DT"). */
#define CM_TABLE (1 << 7) /**< Elements that can appear inside TABLE. */
#define CM_ROWGRP (1 << 8) /**< Used for "THEAD", "TFOOT" or "TBODY". */
#define CM_ROW (1 << 9) /**< Used for "TD", "TH" */
#define CM_FIELD (1 << 10) /**< Elements whose content must be protected against white space movement. Includes some elements that can found in forms. */
#define CM_OBJECT (1 << 11) /**< Used to avoid propagating inline emphasis inside some elements such as OBJECT or APPLET. */
#define CM_PARAM (1 << 12) /**< Elements that allows "PARAM". */
#define CM_FRAMES (1 << 13) /**< "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
#define CM_HEADING (1 << 14) /**< Heading elements (h1, h2, ...). */
#define CM_OPT (1 << 15) /**< Elements with an optional end tag. */
#define CM_IMG (1 << 16) /**< Elements that use "align" attribute for vertical position. */
#define CM_MIXED (1 << 17) /**< Elements with inline and block model. Used to avoid calling InlineDup. */
#define CM_NO_INDENT (1 << 18) /**< Elements whose content needs to be indented only if containing one CM_BLOCK element. */
#define CM_OBSOLETE (1 << 19) /**< Elements that are obsolete (such as "dir", "menu"). */
#define CM_NEW (1 << 20) /**< User defined elements. Used to determine how attributes without value should be printed. */
#define CM_OMITST (1 << 21) /**< Elements that cannot be omitted. */
#define CM_VOID (1 << 22) /**< Elements that are void per https://www.w3.org/TR/2011/WD-html-markup-20110113/syntax.html#syntax-elements. */
/**
* If the document uses just HTML 2.0 tags and attributes described
* it is HTML 2.0. Similarly for HTML 3.2 and the 3 flavors of HTML 4.0.
* If there are proprietary tags and attributes then describe it as
* HTML Proprietary. If it includes the xml-lang or xmlns attributes
* but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the
* flavors of Voyager (strict, loose or frameset).
*/
/* unknown */
#define xxxx 0u
/* W3C defined HTML/XHTML family document types */
#define HT20 1u
#define HT32 2u
#define H40S 4u
#define H40T 8u
#define H40F 16u
#define H41S 32u
#define H41T 64u
#define H41F 128u
#define X10S 256u
#define X10T 512u
#define X10F 1024u
#define XH11 2048u
#define XB10 4096u
/* proprietary stuff */
#define VERS_SUN 8192u
#define VERS_NETSCAPE 16384u
#define VERS_MICROSOFT 32768u
/* special flag */
#define VERS_XML 65536u
/* HTML5 */
#define HT50 131072u
#define XH50 262144u
/* compatibility symbols */
#define VERS_UNKNOWN (xxxx)
#define VERS_HTML20 (HT20)
#define VERS_HTML32 (HT32)
#define VERS_HTML40_STRICT (H40S|H41S|X10S)
#define VERS_HTML40_LOOSE (H40T|H41T|X10T)
#define VERS_FRAMESET (H40F|H41F|X10F)
#define VERS_XHTML11 (XH11)
#define VERS_BASIC (XB10)
/* HTML5 */
#define VERS_HTML5 (HT50|XH50)
/* meta symbols */
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET)
#define VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET)
#define VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME)
#define VERS_EVENTS (VERS_HTML40|VERS_XHTML11)
#define VERS_FROM32 (VERS_HTML32|VERS_HTML40|HT50)
#define VERS_FROM40 (VERS_HTML40|VERS_XHTML11|VERS_BASIC|VERS_HTML5)
#define VERS_XHTML (X10S|X10T|X10F|XH11|XB10|XH50)
/* strict */
#define VERS_STRICT (VERS_HTML5|VERS_HTML40_STRICT)
/* all W3C defined document types */
#define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50)
/* all proprietary types */
#define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)
/**
* Linked list of class names and styles
*/
struct _Style;
typedef struct _Style TagStyle;
struct _Style
{
tmbstr tag;
tmbstr tag_class;
tmbstr properties;
TagStyle *next;
};
/**
* Linked list of style properties
*/
struct _StyleProp;
typedef struct _StyleProp StyleProp;
struct _StyleProp
{
tmbstr name;
tmbstr value;
StyleProp *next;
};
/**
* Attribute/Value linked list node
*/
struct _AttVal
{
AttVal* next;
const Attribute* dict;
Node* asp;
Node* php;
int delim;
tmbstr attribute;
tmbstr value;
};
/**
* Mosaic handles inlines via a separate stack from other elements
* We duplicate this to recover from inline markup errors such as:
* ~~~
* <i>italic text
* <p>more italic text</b> normal text
* ~~~
* which for compatibility with Mosaic is mapped to:
* ~~~
* <i>italic text</i>
* <p><i>more italic text</i> normal text
* ~~~
* Note that any inline end tag pop's the effect of the current
* inline start tag, so that `</b>` pop's `<i>` in the above example.
*/
struct _IStack
{
IStack* next;
const Dict* tag; /**< tag's dictionary definition */
tmbstr element; /**< name (NULL for text nodes) */
AttVal* attributes;
};
/**
* HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl, etc., etc.
*/
struct _Node
{
Node* parent; /**< tree structure */
Node* prev;
Node* next;
Node* content;
Node* last;
AttVal* attributes;
const Dict* was; /**< old tag when it was changed */
const Dict* tag; /**< tag's dictionary definition */
tmbstr element; /**< name (NULL for text nodes) */
uint start; /**< start of span onto text array */
uint end; /**< end of span onto text array */
NodeType type; /**< TextNode, StartTag, EndTag etc. */
uint line; /**< current line of document */
uint column; /**< current column of document */
int idx; /**< general purpose register */
Bool closed; /**< true if closed by explicit end tag */
Bool implicit; /**< true if inferred */
Bool linebreak; /**< true if followed by a line break */
};
/**
* The following are private to the lexer.
* Use `NewLexer()` to create a lexer, and `FreeLexer()` to free it.
*/
struct _Lexer
{
uint lines; /**< lines seen */
uint columns; /**< at start of current token */
Bool waswhite; /**< used to collapse contiguous white space */
Bool pushed; /**< true after token has been pushed back */
Bool insertspace; /**< when space is moved after end tag */
Bool excludeBlocks; /**< Netscape compatibility */
Bool exiled; /**< true if moved out of table */
Bool isvoyager; /**< true if xmlns attribute on html element (i.e., "Voyager" was the W3C codename for XHTML). */
uint versions; /**< bit vector of HTML versions */
uint doctype; /**< version as given by doctype (if any) */
uint versionEmitted; /**< version of doctype emitted */
Bool bad_doctype; /**< e.g. if html or PUBLIC is missing */
uint txtstart; /**< start of current node */
uint txtend; /**< end of current node */
LexerState state; /**< state of lexer's finite state machine */
Node* token; /**< last token returned by GetToken() */
Node* itoken; /**< last duplicate inline returned by GetToken() */
Node* root; /**< remember root node of the document */
Node* parent; /**< remember parent node for CDATA elements */
Bool seenEndBody; /**< true if a `</body>` tag has been encountered */
Bool seenEndHtml; /**< true if a `</html>` tag has been encountered */
/*
Lexer character buffer
Parse tree nodes span onto this buffer
which contains the concatenated text
contents of all of the elements.
lexsize must be reset for each file.
*/
tmbstr lexbuf; /**< MB character buffer */
uint lexlength; /**< allocated */
uint lexsize; /**< used */
/* Inline stack for compatibility with Mosaic */
Node* inode; /**< for deferring text node */
IStack* insert; /**< for inferring inline tags */
IStack* istack;
uint istacklength; /**< allocated */
uint istacksize; /**< used */
uint istackbase; /**< start of frame */
TagStyle *styles; /**< used for cleaning up presentation markup */
TidyAllocator* allocator; /**< allocator */
};
/**
* modes for GetToken()
*/
typedef enum
{
IgnoreWhitespace, /**< */
MixedContent, /**< for elements which don't accept PCDATA */
Preformatted, /**< white space preserved as is */
IgnoreMarkup, /**< for CDATA elements such as script, style */
OtherNamespace, /**< */
CdataContent /**< */
} GetTokenMode;
/** @name Lexer Functions
* @{
*/
/**
* Choose what version to use for new doctype
*/
int TY_(HTMLVersion)( TidyDocImpl* doc );
/**
* Everything is allowed in proprietary version of HTML.
* This is handled here rather than in the tag/attr dicts
*/
void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
Bool TY_(IsWhite)(uint c);
Bool TY_(IsDigit)(uint c);
Bool TY_(IsLetter)(uint c);
Bool TY_(IsHTMLSpace)(uint c);
Bool TY_(IsNewline)(uint c);
Bool TY_(IsNamechar)(uint c);
Bool TY_(IsXMLLetter)(uint c);
Bool TY_(IsXMLNamechar)(uint c);
Bool TY_(IsUpper)(uint c);
uint TY_(ToLower)(uint c);
uint TY_(ToUpper)(uint c);
Lexer* TY_(NewLexer)( TidyDocImpl* doc );
void TY_(FreeLexer)( TidyDocImpl* doc );
/**
* Store character c as UTF-8 encoded byte stream
*/
void TY_(AddCharToLexer)( Lexer *lexer, uint c );
/**
* Used for elements and text nodes.
* - Element name is NULL for text nodes.
* - start and end are offsets into lexbuf,
* which contains the textual content of
* all elements in the parse tree.
* - parent and content allow traversal
* of the parse tree in any direction.
* - attributes are represented as a linked
* list of AttVal nodes which hold the
* strings for attribute/value pairs.
*/
Node* TY_(NewNode)( TidyAllocator* allocator, Lexer* lexer );
/**
* Used to clone heading nodes when split by an `<HR>`
*/
Node* TY_(CloneNode)( TidyDocImpl* doc, Node *element );
/**
* Free node's attributes
*/
void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node );
/**
* Doesn't repair attribute list linkage
*/
void TY_(FreeAttribute)( TidyDocImpl* doc, AttVal *av );
/**
* Detach attribute from node
*/
void TY_(DetachAttribute)( Node *node, AttVal *attr );
/**
* Detach attribute from node then free it.
*/
void TY_(RemoveAttribute)( TidyDocImpl* doc, Node *node, AttVal *attr );
/**
* Free document nodes by iterating through peers and recursing
* through children. Set `next` to `NULL` before calling `FreeNode()`
* to avoid freeing peer nodes. Doesn't patch up prev/next links.
*/
void TY_(FreeNode)( TidyDocImpl* doc, Node *node );
Node* TY_(TextToken)( Lexer *lexer );
/**
* Used for creating preformatted text from Word2000.
*/
Node* TY_(NewLineNode)( Lexer *lexer );
/**
* Used for adding a &nbsp; for Word2000.
*/
Node* TY_(NewLiteralTextNode)(Lexer *lexer, ctmbstr txt );
void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str );
Node* TY_(FindDocType)( TidyDocImpl* doc );
Node* TY_(FindHTML)( TidyDocImpl* doc );
Node* TY_(FindHEAD)( TidyDocImpl* doc );
Node* TY_(FindTITLE)(TidyDocImpl* doc);
Node* TY_(FindBody)( TidyDocImpl* doc );
Node* TY_(FindXmlDecl)(TidyDocImpl* doc);
/**
* Returns containing block element, if any
*/
Node* TY_(FindContainer)( Node* node );
/**
* Add meta element for Tidy.
*/
Bool TY_(AddGenerator)( TidyDocImpl* doc );
uint TY_(ApparentVersion)( TidyDocImpl* doc );
ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool isXhtml );
uint TY_(HTMLVersionNumberFromCode)( uint vers );
Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc );
Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc );
/**
* Fixup doctype if missing.
*/
Bool TY_(FixDocType)( TidyDocImpl* doc );
/**
* Ensure XML document starts with <?xml version="1.0"?>,and
* add encoding attribute if not using ASCII or UTF-8 output.
*/
Bool TY_(FixXmlDecl)( TidyDocImpl* doc );
Node* TY_(InferredTag)(TidyDocImpl* doc, TidyTagId id);
void TY_(UngetToken)( TidyDocImpl* doc );
Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode );
void TY_(InitMap)(void);
/**
* Create a new attribute.
*/
AttVal* TY_(NewAttribute)( TidyDocImpl* doc );
/**
* Create a new attribute with given name and value.
*/
AttVal* TY_(NewAttributeEx)( TidyDocImpl* doc, ctmbstr name, ctmbstr value,
int delim );
/**
* Insert attribute at the end of attribute list of a node.
*/
void TY_(InsertAttributeAtEnd)( Node *node, AttVal *av );
/**
* Insert attribute at the start of attribute list of a node.
*/
void TY_(InsertAttributeAtStart)( Node *node, AttVal *av );
/** @}
* @name Inline Stack Functions
* @{
*/
/**
* Duplicate attributes.
*/
AttVal* TY_(DupAttrs)( TidyDocImpl* doc, AttVal* attrs );
/**
* Push a copy of an inline node onto stack, but don't push if
* implicit or OBJECT or APPLET (implicit tags are ones generated
* from the istack).
*
* One issue arises with pushing inlines when the tag is already pushed.
* For instance:
* ~~~
* <p><em>text
* <p><em>more text
* ~~~
* Shouldn't be mapped to
* ~~~
* <p><em>text</em></p>
* <p><em><em>more text</em></em>
* ~~~
*/
void TY_(PushInline)( TidyDocImpl* doc, Node* node );
/**
* Pop inline stack.
*/
void TY_(PopInline)( TidyDocImpl* doc, Node* node );
Bool TY_(IsPushed)( TidyDocImpl* doc, Node* node );
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node );
/**
* This has the effect of inserting "missing" inline elements around the
* contents of blocklevel elements such as P, TD, TH, DIV, PRE etc. This
* procedure is called at the start of `ParseBlock`, when the inline
* stack is not empty, as will be the case in:
* ~~~
* <i><h1>italic heading</h1></i>
* ~~~
* which is then treated as equivalent to
* ~~~
* <h1><i>italic heading</i></h1>
* ~~~
* This is implemented by setting the lexer into a mode where it gets
* tokens from the inline stack rather than from the input stream.
*/
int TY_(InlineDup)( TidyDocImpl* doc, Node *node );
/**
* Defer duplicates when entering a table or other
* element where the inlines shouldn't be duplicated.
*/
void TY_(DeferDup)( TidyDocImpl* doc );
Node* TY_(InsertedToken)( TidyDocImpl* doc );
/**
* Stack manipulation for inline elements
*/
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node );
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element );
/** @}
* @name Generic stack of nodes.
* @{
*/
/**
* This typedef represents a stack of addresses to nodes. Tidy uses these to
* try to limit recursion by pushing nodes to a stack when possible instead
* of recursing.
*/
typedef struct _Stack {
int top; /**< Current top position. */
unsigned capacity; /**< Current capacity. Can be expanded. */
Node **firstNode; /** A pointer to the first pointer to a Node in an array of node addresses. */
TidyAllocator* allocator; /**< Tidy's allocator, used at instantiation and expanding. */
} Stack;
/**
* Create a new stack with a given starting capacity. If memory allocation
* fails, then the allocator will panic the program automatically.
*/
Stack* TY_(newStack)(TidyDocImpl *doc, uint capacity);
/**
* Increase the stack size. This will be called automatically when the
* current stack is full. If memory allocation fails, then the allocator
* will panic the program automatically.
*/
void TY_(growStack)(Stack *stack);
/**
* Stack is full when top is equal to the last index.
*/
Bool TY_(stackFull)(Stack *stack);
/**
* Stack is empty when top is equal to -1
*/
Bool TY_(stackEmpty)(Stack *stack);
/**
* Push an item to the stack.
*/
void TY_(push)(Stack *stack, Node *node);
/**
* Pop an item from the stack.
*/
Node* TY_(pop)(Stack *stack);
/**
* Peek at the stack.
*/
Node* TY_(peek)(Stack *stack);
/**
* Frees the stack when done.
*/
void TY_(freeStack)(Stack *stack);
/** @}
*/
#ifdef __cplusplus
}
#endif
/** @} end parser_h group */
/** @} end internal_api group */
#endif /* __LEXER_H__ */

333
third_party/tidy/mappedio.c vendored Normal file
View file

@ -0,0 +1,333 @@
/* clang-format off */
/* Interface to mmap style I/O
(c) 2006-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Originally contributed by Cory Nelson and Nuno Lopes
*/
/* keep these here to keep file non-empty */
#include "third_party/tidy/forward.h"
#include "libc/assert.h"
#include "libc/calls/struct/stat.h"
#include "libc/calls/struct/stat.h"
#include "libc/sysv/consts/prot.h"
#include "libc/calls/calls.h"
#include "libc/sysv/consts/map.h"
#include "third_party/tidy/mappedio.h"
#if SUPPORT_POSIX_MAPPED_FILES
#include "third_party/tidy/fileio.h"
typedef struct
{
TidyAllocator *allocator;
const byte *base;
size_t pos, size;
} MappedFileSource;
static int mapped_getByte( void* sourceData )
{
MappedFileSource* fin = (MappedFileSource*) sourceData;
return fin->base[fin->pos++];
}
static Bool mapped_eof( void* sourceData )
{
MappedFileSource* fin = (MappedFileSource*) sourceData;
return (fin->pos >= fin->size);
}
static void mapped_ungetByte( void* sourceData, byte ARG_UNUSED(bv) )
{
MappedFileSource* fin = (MappedFileSource*) sourceData;
fin->pos--;
}
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
{
MappedFileSource* fin;
struct stat sbuf;
int fd;
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
if ( !fin )
return -1;
fd = fileno(fp);
if ( fstat(fd, &sbuf) == -1
|| sbuf.st_size == 0
|| (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ,
MAP_SHARED, fd, 0)) == MAP_FAILED)
{
TidyFree( allocator, fin );
/* Fallback on standard I/O */
return TY_(initStdIOFileSource)( allocator, inp, fp );
}
fin->pos = 0;
fin->allocator = allocator;
fclose(fp);
inp->getByte = mapped_getByte;
inp->eof = mapped_eof;
inp->ungetByte = mapped_ungetByte;
inp->sourceData = fin;
return 0;
}
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
{
if ( inp->getByte == mapped_getByte )
{
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
munmap( (void*)fin->base, fin->size );
TidyFree( fin->allocator, fin );
}
else
TY_(freeStdIOFileSource)( inp, closeIt );
}
#endif /* SUPPORT_POSIX_MAPPED_FILES */
#if defined(_WIN32)
# if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */
# pragma warning(disable:4115) /* named type definition in parentheses in windows headers */
# endif
# include "streamio.h"
# include "tidy-int.h"
# include "message.h"
typedef struct _fp_input_mapped_source
{
TidyAllocator *allocator;
LONGLONG size, pos;
HANDLE file, map;
byte *view, *iter, *end;
unsigned int gran;
} MappedFileSource;
static int mapped_openView( MappedFileSource *data )
{
DWORD numb = ( ( data->size - data->pos ) > data->gran ) ?
data->gran : (DWORD)( data->size - data->pos );
if ( data->view )
{
UnmapViewOfFile( data->view );
data->view = NULL;
}
data->view = MapViewOfFile( data->map, FILE_MAP_READ,
(DWORD)( data->pos >> 32 ),
(DWORD)data->pos, numb );
if ( !data->view ) return -1;
data->iter = data->view;
data->end = data->iter + numb;
return 0;
}
static int mapped_getByte( void *sourceData )
{
MappedFileSource *data = sourceData;
if ( !data->view || data->iter >= data->end )
{
data->pos += data->gran;
if ( data->pos >= data->size || mapped_openView(data) != 0 )
return EndOfStream;
}
return *( data->iter++ );
}
static Bool mapped_eof( void *sourceData )
{
MappedFileSource *data = sourceData;
return ( data->pos >= data->size );
}
static void mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) )
{
MappedFileSource *data = sourceData;
if ( data->iter >= data->view )
{
--data->iter;
return;
}
if ( data->pos < data->gran )
{
assert(0);
return;
}
data->pos -= data->gran;
mapped_openView( data );
}
static int initMappedFileSource( TidyAllocator *allocator, TidyInputSource* inp, HANDLE fp )
{
MappedFileSource* fin = NULL;
inp->getByte = mapped_getByte;
inp->eof = mapped_eof;
inp->ungetByte = mapped_ungetByte;
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
if ( !fin )
return -1;
# if defined(__MINGW32__)
{
DWORD lowVal, highVal;
lowVal = GetFileSize(fp, &highVal);
if ((lowVal == INVALID_FILE_SIZE) && (GetLastError() != NO_ERROR))
{
TidyFree(allocator, fin);
return -1;
}
fin->size = highVal;
fin->size = (fin->size << 32);
fin->size += lowVal;
}
# else /* NOT a MinGW build */
# if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */
{
LARGE_INTEGER* pli = (LARGE_INTEGER *)&fin->size;
(DWORD)pli->LowPart = GetFileSize( fp, (DWORD *)&pli->HighPart );
if ( GetLastError() != NO_ERROR || fin->size <= 0 )
{
TidyFree(allocator, fin);
return -1;
}
}
# else
if ( !GetFileSizeEx( fp, (LARGE_INTEGER*)&fin->size )
|| fin->size <= 0 )
{
TidyFree(allocator, fin);
return -1;
}
# endif
# endif /* MinGW y/n */
fin->map = CreateFileMapping( fp, NULL, PAGE_READONLY, 0, 0, NULL );
if ( !fin->map )
{
TidyFree(allocator, fin);
return -1;
}
{
SYSTEM_INFO info;
GetSystemInfo( &info );
fin->gran = info.dwAllocationGranularity;
}
fin->allocator = allocator;
fin->pos = 0;
fin->view = NULL;
fin->iter = NULL;
fin->end = NULL;
if ( mapped_openView( fin ) != 0 )
{
CloseHandle( fin->map );
TidyFree( allocator, fin );
return -1;
}
fin->file = fp;
inp->sourceData = fin;
return 0;
}
static void freeMappedFileSource( TidyInputSource* inp, Bool closeIt )
{
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
if ( closeIt && fin && fin->file != INVALID_HANDLE_VALUE )
{
if ( fin->view )
UnmapViewOfFile( fin->view );
CloseHandle( fin->map );
CloseHandle( fin->file );
}
TidyFree( fin->allocator, fin );
}
StreamIn* MappedFileInput ( TidyDocImpl* doc, HANDLE fp, int encoding )
{
StreamIn *in = TY_(initStreamIn)( doc, encoding );
if ( initMappedFileSource( doc->allocator, &in->source, fp ) != 0 )
{
TY_(freeStreamIn)( in );
return NULL;
}
in->iotype = FileIO;
return in;
}
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ) {
int status = -ENOENT;
HANDLE fin = CreateFileA( filnam, GENERIC_READ, FILE_SHARE_READ, NULL,
OPEN_EXISTING, 0, NULL );
# if PRESERVE_FILE_TIMES
LONGLONG actime, modtime;
TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
if ( fin != INVALID_HANDLE_VALUE && cfgBool(doc,TidyKeepFileTimes) &&
GetFileTime(fin, NULL, (FILETIME*)&actime, (FILETIME*)&modtime) )
{
# define TY_I64(str) TYDYAPPEND(str,LL)
# if _MSC_VER < 1300 && !defined(__GNUC__) /* less than msvc++ 7.0 */
# undef TY_I64
# define TY_I64(str) TYDYAPPEND(str,i64)
# endif
doc->filetimes.actime =
(time_t)( ( actime - TY_I64(116444736000000000)) / 10000000 );
doc->filetimes.modtime =
(time_t)( ( modtime - TY_I64(116444736000000000)) / 10000000 );
}
# endif /* PRESERVE_FILE_TIMES */
if ( fin != INVALID_HANDLE_VALUE )
{
StreamIn* in = MappedFileInput( doc, fin,
cfg( doc, TidyInCharEncoding ) );
if ( !in )
{
CloseHandle( fin );
return -ENOMEM;
}
status = TY_(DocParseStream)( doc, in );
freeMappedFileSource( &in->source, yes );
TY_(freeStreamIn)( in );
}
else /* Error message! */
TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
return status;
}
#endif /* defined(_WIN32) */

16
third_party/tidy/mappedio.h vendored Normal file
View file

@ -0,0 +1,16 @@
#ifndef __TIDY_MAPPED_IO_H__
#define __TIDY_MAPPED_IO_H__
/* clang-format off */
/* Interface to mmap style I/O
(c) 2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#if defined(_WIN32)
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam );
#endif
#endif /* __TIDY_MAPPED_IO_H__ */

1612
third_party/tidy/message.c vendored Normal file

File diff suppressed because it is too large Load diff

318
third_party/tidy/message.h vendored Normal file
View file

@ -0,0 +1,318 @@
#ifndef __MESSAGE_H__
#define __MESSAGE_H__
/* clang-format off */
/******************************************************************************
* @file
* Provides General Message Writing Routines
*
* This module handles LibTidy's high level output routines, as well as
* provides lookup functions and management for keys used for retrieval
* of these messages.
*
* LibTidy emits two general types of output:
*
* - Reports, which contain data relating to what Tidy discovered in your
* source file, and/or what Tidy did to your source file. In some cases
* general information about your source file is emitted as well. Reports
* are emitted in the current output buffer, but LibTidy users will probably
* prefer to hook into a callback in order to take advantage of the data
* that are available in a more flexible way.
*
* - Dialogue, consisting of footnotes related to your source file, and of
* general information that's not related to your source file in particular.
* This is also written to the current output buffer when appropriate, and
* available via callbacks.
*
* Report information typically takes the form of a warning, an error, info,
* etc., and the output routines keep track of the count of these.
*
* The preferred way of handling Tidy diagnostics output is either
* - define a new output sink, or
* - use a message filter callback routine.
*
* @author HTACG, et al (consult git log)
*
* @copyright
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
* Institute of Technology, European Research Consortium for Informatics
* and Mathematics, Keio University) and HTACG.
* @par
* All Rights Reserved.
* @par
* See `tidy.h` for the complete license.
*
* @date Additional updates: consult git log
*
******************************************************************************/
#include "third_party/tidy/forward.h"
#include "third_party/tidy/config.h"
/** @addtogroup internal_api */
/** @{ */
/***************************************************************************//**
** @defgroup message_releaseinfo Tidy Release Information
**
** These functions return information about the current release version date
** and version number. Note that the latest release date or the highest
** version number alone do not guarantee the latest Tidy release, as we may
** backport important fixes to older releases of Tidy.
**
** @{
******************************************************************************/
/**
* Returns the release date of this instance of HTML Tidy.
*/
ctmbstr TY_(ReleaseDate)(void);
/**
* Returns the release version of this instance of HTML Tidy.
*/
ctmbstr TY_(tidyLibraryVersion)(void);
/** @} message_releaseinfo group */
/***************************************************************************//**
** @defgroup message_reporting Report and Dialogue Writing Functions
**
** These simple functions perform the vast majority of Tidy's output, and
** one these should be your first choice when adding your own output.
**
** A report is typically diagnostic output that is generated each time Tidy
** detects an issue in your document or makes a change. A dialogue is a piece
** of information such as a summary, a footnote, or other non-tabular data.
** Some of these functions emit multiple reports or dialogue in order to
** effect a summary.
**
** @{
******************************************************************************/
/** @name General Report Writing
** If one of the convenience reporting functions does not fit your required
** message signature, then this designated reporting function will fit the
** bill. Be sure to see if a message formatter exists that can handle the
** variable arguments.
*/
/** @{ */
/**
* The designated report writing function. When a proper formatter exists,
* this one function can handle all report output.
*/
void TY_(Report)(TidyDocImpl* doc, Node *element, Node *node, uint code, ...);
/** @} */
/** @name Convenience Reporting Functions
** These convenience reporting functions are able to handle the bulk of Tidy's
** necessary reporting, and avoid the danger of using a variadic if you are
** unfamiliar with Tidy.
*/
/** @{ */
void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code );
void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code);
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c );
void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding);
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name );
void TY_(ReportSurrogateError)(TidyDocImpl* doc, uint code, uint c1, uint c2);
void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option );
/** @} */
/** @name General Dialogue Writing
** These functions produce dialogue output such as individual messages, or
** several messages in summary form.
*/
/** @{ */
/**
* Emits a single dialogue message, and is capable of accepting a variadic
* that is passed to the correct message formatter as needed.
*/
void TY_(Dialogue)( TidyDocImpl* doc, uint code, ... );
/** @} */
/** @name Output Dialogue Information */
/** @{ */
/**
* Outputs the footnotes and other dialogue information after document cleanup
* is complete. LibTidy users might consider capturing these individually in
* the message callback rather than capturing this entire buffer.
* Called by `tidyErrorSummary()`, in console.
* @todo: This name is a bit misleading and should probably be renamed to
* indicate its focus on printing footnotes.
*/
void TY_(ErrorSummary)( TidyDocImpl* doc );
/**
* Outputs document HTML version and version-related information as the final
* report(s) in the report table.
* Called by `tidyRunDiagnostics()`, from console.
* Called by `tidyDocReportDoctype()`, currently unused.
*/
void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
/**
* Reports the number of warnings and errors found in the document as dialogue
* information.
* Called by `tidyRunDiagnostics()`, from console.
*/
void TY_(ReportNumWarnings)( TidyDocImpl* doc );
/** @} */
/** @} message_reporting group */
/***************************************************************************//**
** @defgroup message_mutinging Message Muting
**
** Message types included in the `mute` option will be be printed in
** messageOut().
**
** @{
******************************************************************************/
/** Maintains a list of messages not to display. */
typedef struct _mutedMessages {
tidyStrings* list; /**< A list of messages that won't be output. */
uint count; /**< Current count of the list. */
uint capacity; /**< Current capacity of the list. */
} TidyMutedMessages;
/** Frees the list of muted messages.
** @param doc The Tidy document.
*/
void TY_(FreeMutedMessageList)( TidyDocImpl* doc );
/** Adds a new message ID to the list of muted messages.
** @param doc The Tidy document.
** @param opt The option that is defining the muted message.
** @param name The message code as a string.
*/
void TY_(DefineMutedMessage)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name );
/** Start an iterator for muted messages.
** @param doc The Tidy document.
** @returns Returns an iterator token.
*/
TidyIterator TY_(getMutedMessageList)( TidyDocImpl* doc );
/** Get the next priority attribute.
** @param doc The Tidy document.
** @param iter The iterator token.
** @returns The next priority attribute.
*/
ctmbstr TY_(getNextMutedMessage)( TidyDocImpl* doc, TidyIterator* iter );
/** @} message_muting group */
/***************************************************************************//**
** @defgroup message_keydiscovery Key Discovery
**
** LibTidy users may want to use `TidyReportCallback` to enable their own
** localization lookup features. Because Tidy's report codes are enums the
** specific values can change over time. Using these functions provides the
** ability for LibTidy users to use LibTidy's enum values as strings for
** lookup purposes.
**
** @{
******************************************************************************/
/**
* This function returns a string representing the enum value name that can
* be used as a lookup key independent of changing string values.
* `TidyReportCallback` will return this general string as the report
* message key.
*/
ctmbstr TY_(tidyErrorCodeAsKey)(uint code);
/**
* Given an error code string, return the integer value of it, or UINT_MAX
* as an error flag.
*/
uint TY_(tidyErrorCodeFromKey)(ctmbstr code);
/**
* Initializes the TidyIterator to point to the first item
* in Tidy's list of error codes that can be return with
* `TidyReportFilter3`.
* Items can be retrieved with getNextErrorCode();
*/
TidyIterator TY_(getErrorCodeList)(void);
/**
* Returns the next error code having initialized the iterator
* with `getErrorCodeList()`. You can use tidyErrorCodeAsKey
* to determine the key for this value.
*/
uint TY_(getNextErrorCode)( TidyIterator* iter );
/** @} message_keydiscovery group */
/** @} internal_api addtogroup */
/* accessibility flaws */
#define BA_MISSING_IMAGE_ALT 1
#define BA_MISSING_LINK_ALT 2
#define BA_MISSING_SUMMARY 4
#define BA_MISSING_IMAGE_MAP 8
#define BA_USING_FRAMES 16
#define BA_USING_NOFRAMES 32
#define BA_INVALID_LINK_NOFRAMES 64 /* WAI [6.5.1.4] */
#define BA_WAI (1 << 31)
/* presentation flaws */
#define USING_SPACER 1
#define USING_LAYER 2
#define USING_NOBR 4
#define USING_FONT 8
#define USING_BODY 16
/* badchar bit field */
#define BC_VENDOR_SPECIFIC_CHARS 1
#define BC_INVALID_SGML_CHARS 2
#define BC_INVALID_UTF8 4
#define BC_INVALID_UTF16 8
#define BC_ENCODING_MISMATCH 16 /* fatal error */
#define BC_INVALID_URI 32
#define BC_INVALID_NCR 64
/* other footnote bit field (temporary until formalized) */
#define FN_TRIM_EMPTY_ELEMENT 1
/* Lexer and I/O Macros */
#define REPLACED_CHAR 0
#define DISCARDED_CHAR 1
#endif /* __MESSAGE_H__ */

685
third_party/tidy/messageobj.c vendored Normal file
View file

@ -0,0 +1,685 @@
/* clang-format off */
/* messageobj.c
* Provides an external, extensible API for message reporting.
*
* (c) 2017 HTACG
* See tidy.h for the copyright notice.
*/
#include "third_party/tidy/messageobj.h"
#include "third_party/tidy/message.h"
#include "third_party/tidy/tidy-int.h"
#include "libc/assert.h"
#include "third_party/tidy/tmbstr.h"
/*********************************************************************
* BuildArgArray Support - declarations and forward declarations
*********************************************************************/
/** A record of a single argument and its type. An array these
** represents the arguments supplied to a format string, ordered
** in the same position as they occur in the format string. Because
** older versions of Windows don't support positional arguments,
** Tidy doesn't either.
*/
#define FORMAT_LENGTH 21
struct printfArg {
TidyFormatParameterType type; /* type of the argument */
int formatStart; /* where the format starts */
int formatLength; /* length of the format */
char format[FORMAT_LENGTH]; /* buffer for the format */
union { /* the argument */
int i;
uint ui;
double d;
const char *s;
} u;
};
/** Returns a pointer to an allocated array of `printfArg` given a format
** string and a va_list, or NULL if not successful or no parameters were
** given. Parameter `rv` will return with the count of zero or more
** parameters if successful, else -1.
**
*/
static struct printfArg *BuildArgArray( TidyDocImpl *doc, ctmbstr fmt, va_list ap, int *rv );
/*********************************************************************
* Tidy Message Object Support
*********************************************************************/
/** Create an internal representation of a Tidy message with all of
** the information that that we know about the message.
**
** The function signature doesn't have to stay static and is a good
** place to add instantiation if expanding the API.
**
** We currently know the doc, node, code, line, column, level, and
** args, will pre-calculate all of the other members upon creation.
** This ensures that we can use members directly, immediately,
** without having to use accessors internally.
**
** If any message callback filters are setup by API clients, they
** will be called here.
**
** This version serves as the designated initializer and as such
** requires every known parameter.
*/
static TidyMessageImpl *tidyMessageCreateInitV( TidyDocImpl *doc,
Node *node,
uint code,
int line,
int column,
TidyReportLevel level,
va_list args )
{
TidyMessageImpl *result = TidyDocAlloc(doc, sizeof(TidyMessageImpl));
TidyDoc tdoc = tidyImplToDoc(doc);
va_list args_copy;
enum { sizeMessageBuf=2048 };
ctmbstr pattern;
uint i = 0;
/* Things we know... */
result->tidyDoc = doc;
result->tidyNode = node;
result->code = code;
result->line = line;
result->column = column;
result->level = level;
/* Is #719 - set 'muted' before any callbacks. */
result->muted = no;
i = 0;
while ((doc->muted.list) && (doc->muted.list[i] != 0))
{
if (doc->muted.list[i] == code)
{
result->muted = yes;
break;
}
i++;
}
/* Things we create... */
va_copy(args_copy, args);
result->arguments = BuildArgArray(doc, tidyDefaultString(code), args_copy, &result->argcount);
va_end(args_copy);
result->messageKey = TY_(tidyErrorCodeAsKey)(code);
result->messageFormatDefault = tidyDefaultString(code);
result->messageFormat = tidyLocalizedString(code);
result->messageDefault = TidyDocAlloc(doc, sizeMessageBuf);
va_copy(args_copy, args);
TY_(tmbvsnprintf)(result->messageDefault, sizeMessageBuf, result->messageFormatDefault, args_copy);
va_end(args_copy);
result->message = TidyDocAlloc(doc, sizeMessageBuf);
va_copy(args_copy, args);
TY_(tmbvsnprintf)(result->message, sizeMessageBuf, result->messageFormat, args_copy);
va_end(args_copy);
/* Some things already hit us localized, and some things need to be
localized here. Look for these codewords and replace them here.
*/
TY_(strrep)(result->messageDefault, "STRING_PLAIN_TEXT", tidyDefaultString(STRING_PLAIN_TEXT));
TY_(strrep)(result->message, "STRING_PLAIN_TEXT", tidyLocalizedString(STRING_PLAIN_TEXT));
TY_(strrep)(result->messageDefault, "STRING_XML_DECLARATION", tidyDefaultString(STRING_XML_DECLARATION));
TY_(strrep)(result->message, "STRING_XML_DECLARATION", tidyLocalizedString(STRING_XML_DECLARATION));
TY_(strrep)(result->messageDefault, "STRING_ERROR_COUNT_WARNING", tidyDefaultStringN(STRING_ERROR_COUNT_WARNING, doc->warnings));
TY_(strrep)(result->message, "STRING_ERROR_COUNT_WARNING", tidyLocalizedStringN(STRING_ERROR_COUNT_WARNING, doc->warnings));
TY_(strrep)(result->messageDefault, "STRING_ERROR_COUNT_ERROR", tidyDefaultStringN(STRING_ERROR_COUNT_ERROR, doc->errors));
TY_(strrep)(result->message, "STRING_ERROR_COUNT_ERROR", tidyLocalizedStringN(STRING_ERROR_COUNT_ERROR, doc->errors));
result->messagePosDefault = TidyDocAlloc(doc, sizeMessageBuf);
result->messagePos = TidyDocAlloc(doc, sizeMessageBuf);
if ( cfgBool(doc, TidyEmacs) && cfgStr(doc, TidyEmacsFile) )
{
/* Change formatting to be parsable by GNU Emacs */
TY_(tmbsnprintf)(result->messagePosDefault, sizeMessageBuf, "%s:%d:%d: ", cfgStr(doc, TidyEmacsFile), line, column);
TY_(tmbsnprintf)(result->messagePos, sizeMessageBuf, "%s:%d:%d: ", cfgStr(doc, TidyEmacsFile), line, column);
}
else if ( cfgBool(doc, TidyShowFilename) && cfgStr(doc, TidyEmacsFile) )
{
/* Include filename in output */
TY_(tmbsnprintf)(result->messagePosDefault, sizeMessageBuf, tidyDefaultString(FN_LINE_COLUMN_STRING),
cfgStr(doc, TidyEmacsFile), line, column);
TY_(tmbsnprintf)(result->messagePos, sizeMessageBuf, tidyLocalizedString(FN_LINE_COLUMN_STRING),
cfgStr(doc, TidyEmacsFile), line, column);
}
else
{
/* traditional format */
TY_(tmbsnprintf)(result->messagePosDefault, sizeMessageBuf, tidyDefaultString(LINE_COLUMN_STRING), line, column);
TY_(tmbsnprintf)(result->messagePos, sizeMessageBuf, tidyLocalizedString(LINE_COLUMN_STRING), line, column);
}
result->messagePrefixDefault = tidyDefaultString(level);
result->messagePrefix = tidyLocalizedString(level);
if ( line > 0 && column > 0 )
pattern = "%s%s%s"; /* pattern if there's location information */
else
pattern = "%.0s%s%s"; /* otherwise if there isn't */
if ( level > TidyFatal )
pattern = "%.0s%.0s%s"; /* dialog doesn't have pos or prefix */
result->messageOutputDefault = TidyDocAlloc(doc, sizeMessageBuf);
TY_(tmbsnprintf)(result->messageOutputDefault, sizeMessageBuf, pattern,
result->messagePosDefault, result->messagePrefixDefault,
result->messageDefault);
result->messageOutput = TidyDocAlloc(doc, sizeMessageBuf);
TY_(tmbsnprintf)(result->messageOutput, sizeMessageBuf, pattern,
result->messagePos, result->messagePrefix,
result->message);
if ( ( cfgBool(doc, TidyMuteShow) == yes ) && level <= TidyFatal )
{
/*\ Issue #655 - Unsafe to use output buffer as one of the va_list
* input parameters in some snprintf implementations.
\*/
ctmbstr pc = TY_(tidyErrorCodeAsKey)(code);
i = TY_(tmbstrlen)(result->messageOutputDefault);
if (i < sizeMessageBuf)
TY_(tmbsnprintf)(result->messageOutputDefault + i, sizeMessageBuf - i, " (%s)", pc );
i = TY_(tmbstrlen)(result->messageOutput);
if (i < sizeMessageBuf)
TY_(tmbsnprintf)(result->messageOutput + i, sizeMessageBuf - i, " (%s)", pc );
}
result->allowMessage = yes;
/* reportFilter is a simple error filter that provides minimal information
to callback functions, and includes the message buffer in LibTidy's
configured localization. As it's a "legacy" API, it does not receive
TidyDialogue messages.*/
if ( (result->level <= TidyFatal) && doc->reportFilter )
{
result->allowMessage = result->allowMessage & doc->reportFilter( tdoc, result->level, result->line, result->column, result->messageOutput );
}
/* reportCallback is intended to allow LibTidy users to localize messages
via their own means by providing a key and the parameters to fill it.
As it's a "legacy" API, it does not receive TidyDialogue messages. */
if ( (result->level <= TidyFatal) && doc->reportCallback )
{
TidyDoc tdoc = tidyImplToDoc( doc );
va_copy(args_copy, args);
result->allowMessage = result->allowMessage & doc->reportCallback( tdoc, result->level, result->line, result->column, result->messageKey, args_copy );
va_end(args_copy);
}
/* messageCallback is the newest interface to interrogate Tidy's
emitted messages. */
if ( doc->messageCallback )
{
result->allowMessage = result->allowMessage & doc->messageCallback( tidyImplToMessage(result) );
}
return result;
}
TidyMessageImpl *TY_(tidyMessageCreate)( TidyDocImpl *doc,
uint code,
TidyReportLevel level,
... )
{
TidyMessageImpl *result;
va_list args;
va_start(args, level);
result = tidyMessageCreateInitV(doc, NULL, code, 0, 0, level, args);
va_end(args);
return result;
}
TidyMessageImpl *TY_(tidyMessageCreateWithNode)( TidyDocImpl *doc,
Node *node,
uint code,
TidyReportLevel level,
... )
{
TidyMessageImpl *result;
va_list args_copy;
int line = ( node ? node->line :
( doc->lexer ? doc->lexer->lines : 0 ) );
int col = ( node ? node->column :
( doc->lexer ? doc->lexer->columns : 0 ) );
va_start(args_copy, level);
result = tidyMessageCreateInitV(doc, node, code, line, col, level, args_copy);
va_end(args_copy);
return result;
}
TidyMessageImpl *TY_(tidyMessageCreateWithLexer)( TidyDocImpl *doc,
uint code,
TidyReportLevel level,
... )
{
TidyMessageImpl *result;
va_list args_copy;
int line = ( doc->lexer ? doc->lexer->lines : 0 );
int col = ( doc->lexer ? doc->lexer->columns : 0 );
va_start(args_copy, level);
result = tidyMessageCreateInitV(doc, NULL, code, line, col, level, args_copy);
va_end(args_copy);
return result;
}
void TY_(tidyMessageRelease)( TidyMessageImpl *message )
{
if ( !message )
return;
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->arguments );
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messageDefault );
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->message );
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messagePosDefault );
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messagePos );
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messageOutputDefault );
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messageOutput );
TidyDocFree(tidyDocToImpl(message->tidyDoc), message); /* Issue #597 - and discard the message structure */
}
/*********************************************************************
* Modern Message Callback Functions
*********************************************************************/
TidyDocImpl* TY_(getMessageDoc)( TidyMessageImpl message )
{
return message.tidyDoc;
}
uint TY_(getMessageCode)( TidyMessageImpl message )
{
return message.code;
}
ctmbstr TY_(getMessageKey)( TidyMessageImpl message )
{
return message.messageKey;
}
int TY_(getMessageLine)( TidyMessageImpl message )
{
return message.line;
}
int TY_(getMessageColumn)( TidyMessageImpl message )
{
return message.column;
}
TidyReportLevel TY_(getMessageLevel)( TidyMessageImpl message )
{
return message.level;
}
Bool TY_(getMessageIsMuted)( TidyMessageImpl message )
{
return message.muted;
}
ctmbstr TY_(getMessageFormatDefault)( TidyMessageImpl message )
{
return message.messageFormatDefault;
}
ctmbstr TY_(getMessageFormat)( TidyMessageImpl message )
{
return message.messageFormat;
}
ctmbstr TY_(getMessageDefault)( TidyMessageImpl message )
{
return message.messageDefault;
}
ctmbstr TY_(getMessage)( TidyMessageImpl message )
{
return message.message;
}
ctmbstr TY_(getMessagePosDefault)( TidyMessageImpl message )
{
return message.messagePosDefault;
}
ctmbstr TY_(getMessagePos)( TidyMessageImpl message )
{
return message.messagePos;
}
ctmbstr TY_(getMessagePrefixDefault)( TidyMessageImpl message )
{
return message.messagePrefixDefault;
}
ctmbstr TY_(getMessagePrefix)( TidyMessageImpl message )
{
return message.messagePrefix;
}
ctmbstr TY_(getMessageOutputDefault)( TidyMessageImpl message )
{
return message.messageOutputDefault;
}
ctmbstr TY_(getMessageOutput)( TidyMessageImpl message )
{
return message.messageOutput;
}
/*********************************************************************
* Message Argument Interrogation
*********************************************************************/
TidyIterator TY_(getMessageArguments)( TidyMessageImpl message )
{
if (message.argcount > 0)
return (TidyIterator) (size_t)1;
else
return (TidyIterator) (size_t)0;
}
TidyMessageArgument TY_(getNextMessageArgument)( TidyMessageImpl message, TidyIterator* iter )
{
size_t item = 0;
size_t itemIndex;
assert( iter != NULL );
itemIndex = (size_t)*iter;
if ( itemIndex >= 1 && itemIndex <= (size_t)message.argcount )
{
item = itemIndex;
itemIndex++;
}
/* Just as TidyIterator is really just a dumb, one-based index, the
TidyMessageArgument is really just a dumb, zero-based index; however
this type of iterator and opaque interrogation is simply how Tidy
does things. */
*iter = (TidyIterator)( itemIndex <= (size_t)message.argcount ? itemIndex : (size_t)0 );
return (TidyMessageArgument)item;
}
TidyFormatParameterType TY_(getArgType)( TidyMessageImpl message, TidyMessageArgument* arg )
{
int argNum = (int)(size_t)*arg - 1;
assert( argNum <= message.argcount );
return message.arguments[argNum].type;
}
ctmbstr TY_(getArgFormat)( TidyMessageImpl message, TidyMessageArgument* arg )
{
int argNum = (int)(size_t)*arg - 1;
assert( argNum <= message.argcount );
return message.arguments[argNum].format;
}
ctmbstr TY_(getArgValueString)( TidyMessageImpl message, TidyMessageArgument* arg )
{
int argNum = (int)(size_t)*arg - 1;
assert( argNum <= message.argcount );
assert( message.arguments[argNum].type == tidyFormatType_STRING);
return message.arguments[argNum].u.s;
}
uint TY_(getArgValueUInt)( TidyMessageImpl message, TidyMessageArgument* arg )
{
int argNum = (int)(size_t)*arg - 1;
assert( argNum <= message.argcount );
assert( message.arguments[argNum].type == tidyFormatType_UINT);
return message.arguments[argNum].u.ui;
}
int TY_(getArgValueInt)( TidyMessageImpl message, TidyMessageArgument* arg )
{
int argNum = (int)(size_t)*arg - 1;
assert( argNum <= message.argcount );
assert( message.arguments[argNum].type == tidyFormatType_INT);
return message.arguments[argNum].u.i;
}
double TY_(getArgValueDouble)( TidyMessageImpl message, TidyMessageArgument* arg )
{
int argNum = (int)(size_t)*arg - 1;
assert( argNum <= message.argcount );
assert( message.arguments[argNum].type == tidyFormatType_DOUBLE);
return message.arguments[argNum].u.d;
}
/*********************************************************************
* BuildArgArray support
* Adapted loosely from Mozilla `prprf.c`, Mozilla Public License:
* - https://www.mozilla.org/en-US/MPL/2.0/
*********************************************************************/
/** Returns a pointer to an allocated array of `printfArg` given a format
** string and a va_list, or NULL if not successful or no parameters were
** given. Parameter `rv` will return with the count of zero or more
** parameters if successful, else -1.
**
** We'll also be sure to use the document's allocator if specified, thus
** the requirement to pass in a TidyDocImpl.
**
** Currently Tidy only uses %c, %d, %s, %u, %X, although doubles are
** supported as well. Unsupported arguments will result in failure as
** described above.
*/
static struct printfArg* BuildArgArray( TidyDocImpl *doc, ctmbstr fmt, va_list ap, int* rv )
{
int number = 0; /* the quantity of valid arguments found; returned as rv. */
int cn = -1; /* keeps track of which parameter index is current. */
int i = 0; /* typical index. */
int pos = -1; /* starting position of current argument. */
const char* p; /* current position in format string. */
char c; /* current character. */
struct printfArg* nas;
/* first pass: determine number of valid % to allocate space. */
p = fmt;
*rv = 0;
while( ( c = *p++ ) != 0 )
{
if( c != '%' )
continue;
if( ( c = *p++ ) == '%' ) /* skip %% case */
continue;
else
number++;
}
if( number == 0 )
return NULL;
nas = (struct printfArg*)TidyDocAlloc( doc, number * sizeof( struct printfArg ) );
if( !nas )
{
*rv = -1;
return NULL;
}
for( i = 0; i < number; i++ )
{
nas[i].type = tidyFormatType_UNKNOWN;
}
/* second pass: set nas[].type and location. */
p = fmt;
while( ( c = *p++ ) != 0 )
{
if( c != '%' )
continue;
if( ( c = *p++ ) == '%' )
continue; /* skip %% case */
pos = p - fmt - 2; /* p already incremented twice */
/* width -- width via parameter */
if (c == '*')
{
/* not supported feature */
*rv = -1;
break;
}
/* width field -- skip */
while ((c >= '0') && (c <= '9'))
{
c = *p++;
}
/* precision */
if (c == '.')
{
c = *p++;
if (c == '*') {
/* not supported feature */
*rv = -1;
break;
}
while ((c >= '0') && (c <= '9'))
{
c = *p++;
}
}
cn++;
/* size and format */
nas[cn].type = tidyFormatType_UINT;
switch (c)
{
case 'c': /* unsigned int (char) */
case 'u': /* unsigned int */
case 'X': /* unsigned int as hex */
case 'x': /* unsigned int as hex */
case 'o': /* octal */
nas[cn].u.ui = va_arg( ap, unsigned int );
break;
case 'd': /* signed int */
case 'i': /* signed int */
nas[cn].type = tidyFormatType_INT;
nas[cn].u.i = va_arg( ap, int );
break;
case 's': /* string */
nas[cn].type = tidyFormatType_STRING;
nas[cn].u.s = va_arg( ap, char* );
break;
case 'e': /* double */
case 'E': /* double */
case 'f': /* double */
case 'F': /* double */
case 'g': /* double */
case 'G': /* double */
nas[cn].type = tidyFormatType_DOUBLE;
nas[cn].u.d = va_arg( ap, double );
break;
default:
nas[cn].type = tidyFormatType_UNKNOWN;
*rv = -1;
break;
}
/* position and format */
nas[cn].formatStart = pos;
nas[cn].formatLength = (p - fmt) - pos;
/* the format string exceeds the buffer length */
if ( nas[cn].formatLength >= FORMAT_LENGTH )
{
*rv = -1;
break;
}
else
{
strncpy(nas[cn].format, fmt + nas[cn].formatStart, nas[cn].formatLength);
nas[cn].format[nas[cn].formatLength] = 0; /* Is. #800 - If count <= srcLen, no 0 added! */
}
/* Something's not right. */
if( nas[cn].type == tidyFormatType_UNKNOWN )
{
*rv = -1;
break;
}
}
/* third pass: fill the nas[cn].ap */
if( *rv < 0 )
{
TidyDocFree( doc, nas );;
return NULL;
}
*rv = number;
return nas;
}

182
third_party/tidy/messageobj.h vendored Normal file
View file

@ -0,0 +1,182 @@
#ifndef messageobj_h
#define messageobj_h
/* clang-format off */
/**************************************************************************//**
* @file
* Provides an external, extensible API for message reporting.
*
* This module implements the `_TidyMessageImpl` structure (declared in
* `tidy-int.h`) in order to abstract the reporting of reports and dialogue
* from the rest of Tidy, and to enable a robust and extensible API for
* message interrogation by LibTidy users.
*
* @author Jim Derry
* @copyright Copyright (c) 2017 HTACG. See tidy.h for license.
* @date Created 2017-March-10
*
******************************************************************************/
#include "third_party/tidy/forward.h"
/** @addtogroup internal_api */
/** @{ */
/** @defgroup messageobj_instantiation Message Creation and Releasing */
/** @{ */
/** Creates a TidyMessageImpl, but without line numbers, such as used for
** information report output.
*/
TidyMessageImpl *TY_(tidyMessageCreate)( TidyDocImpl *doc,
uint code,
TidyReportLevel level,
... );
/** Creates a TidyMessageImpl, using the line and column from the provided
** Node as the message position source.
*/
TidyMessageImpl *TY_(tidyMessageCreateWithNode)( TidyDocImpl *doc,
Node *node,
uint code,
TidyReportLevel level,
... );
/** Creates a TidyMessageImpl, using the line and column from the provided
** document's Lexer as the message position source.
*/
TidyMessageImpl *TY_(tidyMessageCreateWithLexer)( TidyDocImpl *doc,
uint code,
TidyReportLevel level,
... );
/** Deallocates a TidyMessageImpl in order to free up its allocated memory
** when you're done using it.
*/
void TY_(tidyMessageRelease)( TidyMessageImpl *message );
/** @} end messageobj_instantiation group */
/** @defgroup messageobj_message_api Report and Dialogue API */
/** @{ */
/** get the document the message came from. */
TidyDocImpl* TY_(getMessageDoc)( TidyMessageImpl message );
/** get the message key code. */
uint TY_(getMessageCode)( TidyMessageImpl message );
/** get the message key string. */
ctmbstr TY_(getMessageKey)( TidyMessageImpl message );
/** get the line number the message applies to. */
int TY_(getMessageLine)( TidyMessageImpl message );
/** get the column the message applies to. */
int TY_(getMessageColumn)( TidyMessageImpl message );
/** get the TidyReportLevel of the message. */
TidyReportLevel TY_(getMessageLevel)( TidyMessageImpl message );
/** get whether or not the message was muted by the configuration. */
Bool TY_(getMessageIsMuted)( TidyMessageImpl message );
/** the built-in format string */
ctmbstr TY_(getMessageFormatDefault)( TidyMessageImpl message );
/** the localized format string */
ctmbstr TY_(getMessageFormat)( TidyMessageImpl message );
/** the message, formatted, default language */
ctmbstr TY_(getMessageDefault)( TidyMessageImpl message );
/** the message, formatted, localized */
ctmbstr TY_(getMessage)( TidyMessageImpl message );
/** the position part, default language */
ctmbstr TY_(getMessagePosDefault)( TidyMessageImpl message );
/** the position part, localized */
ctmbstr TY_(getMessagePos)( TidyMessageImpl message );
/** the prefix part, default language */
ctmbstr TY_(getMessagePrefixDefault)( TidyMessageImpl message );
/** the prefix part, localized */
ctmbstr TY_(getMessagePrefix)( TidyMessageImpl message );
/** the complete message, as would be output in the CLI */
ctmbstr TY_(getMessageOutputDefault)( TidyMessageImpl message );
/* the complete message, as would be output in the CLI, localized */
ctmbstr TY_(getMessageOutput)( TidyMessageImpl message );
/** @} end messageobj_message_api group */
/** @defgroup messageobj_args_api Report Arguments Interrogation API */
/** @{ */
/**
* Initializes the TidyIterator to point to the first item in the message's
* argument. Use `TY_(getNextMEssageArgument)` to get an opaque instance of
* `TidyMessageArgument` for which the subsequent interrogators will be of use.
*/
TidyIterator TY_(getMessageArguments)( TidyMessageImpl message );
/**
* Returns the next `TidyMessageArgument`, for the given message, which can
* then be interrogated with the API, and advances the iterator.
*/
TidyMessageArgument TY_(getNextMessageArgument)( TidyMessageImpl message, TidyIterator* iter );
/**
* Returns the `TidyFormatParameterType` of the given message argument.
*/
TidyFormatParameterType TY_(getArgType)( TidyMessageImpl message, TidyMessageArgument* arg );
/**
* Returns the format specifier of the given message argument. The memory for
* this string is cleared upon termination of the callback, so do be sure to
* make your own copy.
*/
ctmbstr TY_(getArgFormat)( TidyMessageImpl message, TidyMessageArgument* arg );
/**
* Returns the string value of the given message argument. An assertion
* will be generated if the argument type is not a string.
*/
ctmbstr TY_(getArgValueString)( TidyMessageImpl message, TidyMessageArgument* arg );
/**
* Returns the unsigned integer value of the given message argument. An
* assertion will be generated if the argument type is not an unsigned
* integer.
*/
uint TY_(getArgValueUInt)( TidyMessageImpl message, TidyMessageArgument* arg );
/**
* Returns the integer value of the given message argument. An assertion
* will be generated if the argument type is not an integer.
*/
int TY_(getArgValueInt)( TidyMessageImpl message, TidyMessageArgument* arg );
/**
* Returns the double value of the given message argument. An assertion
* will be generated if the argument type is not a double.
*/
double TY_(getArgValueDouble)( TidyMessageImpl message, TidyMessageArgument* arg );
/** @} end messageobj_args_api group */
/** @} end internal_api group */
#endif /* messageobj_h */

6452
third_party/tidy/parser.c vendored Normal file

File diff suppressed because it is too large Load diff

289
third_party/tidy/parser.h vendored Normal file
View file

@ -0,0 +1,289 @@
#ifndef __PARSER_H__
#define __PARSER_H__
/* clang-format off */
/**************************************************************************//**
* @file
* HTML and XML Parsers.
*
* Tidy's HTML parser corrects many conditions and enforces certain user
* preferences during the parsing process. The XML parser produces a tree
* of nodes useful to Tidy but also suitable for use in other XML processing
* applications.
*
* @author HTACG, et al (consult git log)
*
* @copyright
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
* Institute of Technology, European Research Consortium for Informatics
* and Mathematics, Keio University) and HTACG.
* @par
* All Rights Reserved.
* @par
* See `tidy.h` for the complete license.
*
* @date Additional updates: consult git log
*
******************************************************************************/
#include "third_party/tidy/tags.h"
#include "third_party/tidy/forward.h"
/** @addtogroup internal_api */
/** @{ */
/***************************************************************************//**
** @defgroup parser_h HTML and XML Parsing
**
** These functions and structures form the internal API for document
** parsing.
**
** @{
******************************************************************************/
/**
* This typedef represents the state of a parser when it enters and exits.
* When the parser needs to finish work on the way back up the stack, it will
* push one of these records to the stack, and it will pop a record from the
* stack upon re-entry.
*/
typedef struct _TidyParserMemory
{
Parser *identity; /**< Which parser pushed this record? */
Node *original_node; /**< Originally provided node at entry. */
Node *reentry_node; /**< The node with which to re-enter. */
GetTokenMode reentry_mode; /**< The token mode to use when re-entering. */
int reentry_state; /**< State to set during re-entry. Defined locally in each parser. */
GetTokenMode mode; /**< The caller will peek at this value to get the correct mode. */
int register_1; /**< Local variable storage. */
int register_2; /**< Local variable storage. */
} TidyParserMemory;
/**
* This typedef represents a stack of parserState. The Tidy document has its
* own instance of this.
*/
typedef struct _TidyParserStack
{
TidyParserMemory* content; /**< A state record. */
uint size; /**< Current size of the stack. */
int top; /**< Top of the stack. */
} TidyParserStack;
/**
* Allocates and initializes the parser's stack. TidyCreate will perform
* this automatically.
*/
void TY_(InitParserStack)( TidyDocImpl* doc );
/**
* Frees the parser's stack when done. TidyRelease will perform this
* automatically.
*/
void TY_(FreeParserStack)( TidyDocImpl* doc );
/**
* Indicates whether or not the stack is empty.
*/
Bool TY_(isEmptyParserStack)( TidyDocImpl* doc );
/**
* Peek at the parser memory.
*/
TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc );
/**
* Peek at the parser memory "identity" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc );
/**
* Peek at the parser memory "mode" field. This is just a convenience
* to avoid having to create a new struct instance in the caller.
*/
GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc );
/**
* Pop out a parser memory.
*/
TidyParserMemory TY_(popMemory)( TidyDocImpl* doc );
/**
* Push the parser memory to the stack.
*/
void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data );
/**
* Is used to perform a node integrity check recursively after parsing
* an HTML or XML document.
* @note Actual performance of this check can be disabled by defining the
* macro NO_NODE_INTEGRITY_CHECK.
* @param node The root node for the integrity check.
* @returns Returns yes or no indicating integrity of the node structure.
*/
Bool TY_(CheckNodeIntegrity)(Node *node);
/**
* Indicates whether or not a text node ends with a space or newline.
* @note Implementation of this method is found in `pprint.c` for
* some reason.
* @param lexer A reference to the lexer used to lex the document.
* @param node The node to check.
* @returns The result of the check.
*/
Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
/**
* Used to check if a node uses CM_NEW, which determines how attributes
* without values should be printed. This was introduced to deal with
* user-defined tags e.g. ColdFusion.
* @param node The node to check.
* @returns The result of the check.
*/
Bool TY_(IsNewNode)(Node *node);
/**
* Transforms a given node to another element, for example, from a `p`
* to a `br`.
* @param doc The document which the node belongs to.
* @param node The node to coerce.
* @param tid The tag type to coerce the node into.
* @param obsolete If the old node was obsolete, a report will be generated.
* @param expected If the old node was not expected to be found in this
* particular location, a report will be generated.
*/
void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool expected);
/**
* Extract a node and its children from a markup tree.
* @param node The node to remove.
* @returns Returns the removed node.
*/
Node *TY_(RemoveNode)(Node *node);
/**
* Remove node from markup tree and discard it.
* @param doc The Tidy document from which to discard the node.
* @param element The node to discard.
* @returns Returns the next node.
*/
Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element);
/**
* Insert node into markup tree as the first element of content of element.
* @param element The new destination node.
* @param node The node to insert.
*/
void TY_(InsertNodeAtStart)(Node *element, Node *node);
/**
* Insert node into markup tree as the last element of content of element.
* @param element The new destination node.
* @param node The node to insert.
*/
void TY_(InsertNodeAtEnd)(Node *element, Node *node);
/**
* Insert node into markup tree before element.
* @param element The node before which the node is inserted.
* @param node The node to insert.
*/
void TY_(InsertNodeBeforeElement)(Node *element, Node *node);
/**
* Insert node into markup tree after element.
* @param element The node after which the node is inserted.
* @param node The node to insert.
*/
void TY_(InsertNodeAfterElement)(Node *element, Node *node);
/**
* Trims a single, empty element, returning the next node.
* @param doc The Tidy document.
* @param element The element to trim.
* @returns Returns the next node.
*/
Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element );
/**
* Trims a tree of empty elements recursively, returning the next node.
* @param doc The Tidy document.
* @param node The element to trim.
* @returns Returns the next node.
*/
Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node);
/**
* Indicates whether or not a text node is blank, meaning that it consists
* of nothing, or a single space.
* @param lexer The lexer used to lex the document.
* @param node The node to test.
* @returns Returns the result of the test.
*/
Bool TY_(IsBlank)(Lexer *lexer, Node *node);
/**
* Indicates whether or not a node is declared as containing javascript
* code.
* @param node The node to test.
* @returns Returns the result of the test.
*/
Bool TY_(IsJavaScript)(Node *node);
/**
* Parses a document after lexing using the HTML parser. It begins by properly
* configuring the overall HTML structure, and subsequently processes all
* remaining nodes. HTML is the root node.
* @param doc The Tidy document.
*/
void TY_(ParseDocument)( TidyDocImpl* doc );
/**
* Indicates whether or not whitespace is to be preserved in XHTML/XML
* documents.
* @param doc The Tidy document.
* @param element The node to test.
* @returns Returns the result of the test.
*/
Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element );
/**
* Parses a document after lexing using the XML parser.
* @param doc The Tidy document.
*/
void TY_(ParseXMLDocument)( TidyDocImpl* doc );
/** @} end parser_h group */
/** @} end internal_api group */
#endif /* __PARSER_H__ */

2713
third_party/tidy/pprint.c vendored Normal file

File diff suppressed because it is too large Load diff

144
third_party/tidy/pprint.h vendored Normal file
View file

@ -0,0 +1,144 @@
#ifndef __PPRINT_H__
#define __PPRINT_H__
/* clang-format off */
/**************************************************************************//**
* @file
* Pretty Print the parse tree.
*
* Pretty printer for HTML and XML documents.
* - Block-level and unknown elements are printed on new lines and
* their contents indented with a user configured amount of spaces/tabs.
* - Inline elements are printed inline.
* - Inline content is wrapped on spaces (except in attribute values or
* preformatted text, after start tags and before end tags.
*
* @author HTACG, et al (consult git log)
*
* @copyright
* Copyright (c) 1998-2021 World Wide Web Consortium (Massachusetts
* Institute of Technology, European Research Consortium for Informatics
* and Mathematics, Keio University) and HTACG.
* @par
* All Rights Reserved.
* @par
* See `tidy.h` for the complete license.
*
* @date Additional updates: consult git log
*
******************************************************************************/
#include "third_party/tidy/forward.h"
/** @addtogroup internal_api */
/** @{ */
/***************************************************************************//**
** @defgroup print_h HTML and XML Pretty Printing
**
** These functions and structures form the internal API for document
** printing.
**
** @{
******************************************************************************/
/**
* This typedef represents the current pretty-printing mode, and instructs
* the printer behavior per the content currently being output.
*/
typedef enum {
NORMAL = 0u, /**< Normal output. */
PREFORMATTED = 1u, /**< Preformatted output. */
COMMENT = 2u, /**< Comment. */
ATTRIBVALUE = 4u, /**< An attribute's value. */
NOWRAP = 8u, /**< Content that should not be wrapped. */
CDATA = 16u /**< CDATA content. */
} PrettyPrintMode;
/**
* A record of the state of a single line, capturing the indent
* level, in-attribute, and in-string state of a line. Instances
* of this record are used by the pretty-printing buffer.
*
* The pretty printer keeps at most two lines of text in the
* buffer before flushing output. We need to capture the
* indent state (indent level) at the _beginning_ of _each_
* line, not the end of just the second line.
*
* We must also keep track "In Attribute" and "In String"
* states at the _end_ of each line,
*/
typedef struct _TidyIndent
{
int spaces; /**< Indent level of the line. */
int attrValStart; /**< Attribute in-value state. */
int attrStringStart; /**< Attribute in-string state. */
} TidyIndent;
/**
* The pretty-printing buffer.
*/
typedef struct _TidyPrintImpl
{
TidyAllocator *allocator; /**< Allocator */
uint *linebuf; /**< The line buffer proper. */
uint lbufsize; /**< Current size of the buffer. */
uint linelen; /**< Current line length. */
uint wraphere; /**< Point in the line to wrap text. */
uint line; /**< Current line. */
uint ixInd; /**< Index into the indent[] array. */
TidyIndent indent[2]; /**< Two lines worth of indent state */
} TidyPrintImpl;
/**
* Allocates and initializes the pretty-printing buffer for a Tidy document.
*/
void TY_(InitPrintBuf)( TidyDocImpl* doc );
/**
* Deallocates and free a Tidy document's pretty-printing buffer.
*/
void TY_(FreePrintBuf)( TidyDocImpl* doc );
/**
* Flushes the current buffer to the actual output sink.
*/
void TY_(PFlushLine)( TidyDocImpl* doc, uint indent );
/**
* Print just the content of the HTML body element, which is useful when
* want to reuse material from other documents.
* -- Sebastiano Vigna <vigna@dsi.unimi.it>
*/
void TY_(PrintBody)( TidyDocImpl* doc );
/**
* Print the HTML document tree for the given document using the given node
* as the root of the document. Note that you can print an entire document
* node as body using PPrintTree()
*/
void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node );
/**
* Print the XML document tree for the given document using the given node
* as the root of the document.
*/
void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node );
/** @} end print_h group */
/** @} end internal_api group */
#endif /* __PPRINT_H__ */

414
third_party/tidy/sprtf.c vendored Normal file
View file

@ -0,0 +1,414 @@
/* clang-format off */
/* sprtf.c
* SPRTF - Log output utility - part of the HTML Tidy project
*
* Copyright (c) 1998-2017 Geoff R. McLane and HTACG
*
* See tidy.h for the copyright notice.
*/
#ifdef _MSC_VER
# pragma warning( disable : 4995 )
#endif
#ifdef _MSC_VER
# if (defined(UNICODE) || defined(_UNICODE))
# endif
#else /* !_MSC_VER */
#endif /* _MSC_VER y/n */
#include "third_party/tidy/sprtf.h"
#ifdef ENABLE_DEBUG_LOG
#ifdef _MSC_VER
# ifndef _CRT_SECURE_NO_DEPRECATE
# define _CRT_SECURE_NO_DEPRECATE
# endif /* #ifndef _CRT_SECURE_NO_DEPRECATE */
# pragma warning( disable:4996 )
#else
# define strcmpi strcasecmp
#endif
#ifndef MX_ONE_BUF
# define MX_ONE_BUF 1024
#endif
#ifndef MX_BUFFERS
# define MX_BUFFERS 1024
#endif
static char _s_strbufs[MX_ONE_BUF * MX_BUFFERS];
static int iNextBuf = 0;
char *GetNxtBuf()
{
iNextBuf++;
if(iNextBuf >= MX_BUFFERS)
iNextBuf = 0;
return &_s_strbufs[MX_ONE_BUF * iNextBuf];
}
#define MXIO 512
static char def_log[] = "temptidy.txt"; /* use local log */
static char logfile[264] = "\0";
static FILE * outfile = NULL;
static int addsystime = 0;
static int addsysdate = 0;
static int addstdout = 1;
static int addflush = 1;
static int add2screen = 0;
static int add2listview = 0;
static int append_to_log = 0;
#ifndef VFP
# define VFP(a) ( a && ( a != (FILE *)-1 ) )
#endif
int add_list_out( int val )
{
int i = add2listview;
add2listview = val;
return i;
}
int add_std_out( int val )
{
int i = addstdout;
addstdout = val;
return i;
}
int add_screen_out( int val )
{
int i = add2screen;
add2screen = val;
return i;
}
int add_sys_time( int val )
{
int i = addsystime;
addsystime = val;
return i;
}
int add_sys_date( int val )
{
int i = addsysdate;
addsysdate = val;
return i;
}
int add_append_log( int val )
{
int i = append_to_log;
append_to_log = val;
return i;
}
#ifdef _MSC_VER
static const char *mode = "wb"; /* in window sprtf looks after the line endings */
#else
static const char *mode = "w";
#endif
int open_log_file( void )
{
if (logfile[0] == 0)
strcpy(logfile,def_log);
if (append_to_log) {
#ifdef _MSC_VER
mode = "ab"; /* in window sprtf looks after the line endings */
#else
mode = "a";
#endif
}
outfile = fopen(logfile, mode);
if( outfile == 0 ) {
outfile = (FILE *)-1;
sprtf("ERROR: Failed to open log file [%s] ...\n", logfile);
/* exit(1); failed */
return 0; /* failed */
}
return 1; /* success */
}
void close_log_file( void )
{
if( VFP(outfile) ) {
fclose(outfile);
}
outfile = NULL;
}
char * get_log_file( void )
{
if (logfile[0] == 0)
strcpy(logfile,def_log);
if (outfile == (FILE *)-1) /* disable the log file */
return (char *)"none";
return logfile;
}
void set_log_file( char * nf, int open )
{
if (logfile[0] == 0)
strcpy(logfile,def_log);
if ( nf && *nf && strcmpi(nf,logfile) ) {
close_log_file(); /* remove any previous */
strcpy(logfile,nf); /* set new name */
if (strcmp(logfile,"none") == 0) { /* if equal 'none' */
outfile = (FILE *)-1; /* disable the log file */
} else if (open) {
open_log_file(); /* and open it ... anything previous written is 'lost' */
} else
outfile = 0; /* else set 0 to open on first write */
}
}
#ifdef _MSC_VER
int gettimeofday(struct timeval *tp, void *tzp)
{
#ifdef WIN32
struct _timeb timebuffer;
_ftime(&timebuffer);
tp->tv_sec = (long)timebuffer.time;
tp->tv_usec = timebuffer.millitm * 1000;
#else
tp->tv_sec = time(NULL);
tp->tv_usec = 0;
#endif
return 0;
}
#endif /* _MSC_VER */
void add_date_stg( char *ps, struct timeval *ptv )
{
time_t curtime;
struct tm * ptm;
curtime = (ptv->tv_sec & 0xffffffff);
ptm = localtime(&curtime);
if (ptm) {
strftime(EndBuf(ps),128,"%Y/%m/%d",ptm);
}
}
void add_time_stg( char *ps, struct timeval *ptv )
{
time_t curtime;
struct tm * ptm;
curtime = (ptv->tv_sec & 0xffffffff);
ptm = localtime(&curtime);
if (ptm) {
strftime(EndBuf(ps),128,"%H:%M:%S",ptm);
}
}
char *get_date_stg()
{
char *ps;
struct timeval tv;
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 );
ps = GetNxtBuf();
*ps = 0;
add_date_stg( ps, &tv );
return ps;
}
char *get_time_stg()
{
char *ps;
struct timeval tv;
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 );
ps = GetNxtBuf();
*ps = 0;
add_time_stg( ps, &tv );
return ps;
}
char *get_date_time_stg()
{
char *ps;
struct timeval tv;
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 );
ps = GetNxtBuf();
*ps = 0;
add_date_stg( ps, &tv );
strcat(ps," ");
add_time_stg( ps, &tv );
return ps;
}
static void oi( char * psin )
{
int len, w;
char * ps = psin;
if (!ps)
return;
len = (int)strlen(ps);
if (len) {
if( outfile == 0 ) {
open_log_file();
}
if( VFP(outfile) ) {
char *tb;
if (addsysdate) {
tb = GetNxtBuf();
len = sprintf( tb, "%s - %s", get_date_time_stg(), ps );
ps = tb;
} else if( addsystime ) {
tb = GetNxtBuf();
len = sprintf( tb, "%s - %s", get_time_stg(), ps );
ps = tb;
}
w = (int)fwrite( ps, 1, len, outfile );
if( w != len ) {
fclose(outfile);
outfile = (FILE *)-1;
sprtf("WARNING: Failed write to log file [%s] ...\n", logfile);
exit(1);
} else if (addflush) {
fflush( outfile );
}
}
if( addstdout ) {
fwrite( ps, 1, len, stderr ); /* 20170917 - Switch to using 'stderr' in place of 'stdout' */
}
#ifdef ADD_LISTVIEW
if (add2listview) {
LVInsertItem(ps);
}
#endif /* ADD_LISTVIEW */
#ifdef ADD_SCREENOUT
if (add2screen) {
Add_String(ps); /* add string to screen list */
}
#endif /* #ifdef ADD_SCREENOUT */
}
}
#ifdef _MSC_VER
/* service to ensure line endings in windows only */
static void prt( char * ps )
{
static char _s_buf[1024];
char * pb = _s_buf;
size_t i, j, k;
char c, d;
i = strlen(ps);
k = 0;
d = 0;
if(i) {
k = 0;
d = 0;
for( j = 0; j < i; j++ ) {
c = ps[j];
if( c == 0x0d ) {
if( (j+1) < i ) {
if( ps[j+1] != 0x0a ) {
pb[k++] = c;
c = 0x0a;
}
} else {
pb[k++] = c;
c = 0x0a;
}
} else if( c == 0x0a ) {
if( d != 0x0d ) {
pb[k++] = 0x0d;
}
}
pb[k++] = c;
d = c;
if( k >= MXIO ) {
pb[k] = 0;
oi(pb);
k = 0;
}
} /* for length of string */
if( k ) {
pb[k] = 0;
oi( pb );
}
}
}
#endif /* #ifdef _MSC_VER */
int direct_out_it( char *cp )
{
#ifdef _MSC_VER
prt(cp);
#else
oi(cp);
#endif
return (int)strlen(cp);
}
/* STDAPI StringCchVPrintf( OUT LPTSTR pszDest,
* IN size_t cchDest, IN LPCTSTR pszFormat, IN va_list argList ); */
int MCDECL sprtf( const char *pf, ... )
{
static char _s_sprtfbuf[M_MAX_SPRTF+4];
char * pb = _s_sprtfbuf;
int i;
va_list arglist;
va_start(arglist, pf);
i = vsnprintf( pb, M_MAX_SPRTF, pf, arglist );
va_end(arglist);
#ifdef _MSC_VER
prt(pb); /* ensure CR/LF */
#else
oi(pb);
#endif
return i;
}
#ifdef UNICODE
/* WIDE VARIETY */
static void wprt( PTSTR ps )
{
static char _s_woibuf[1024];
char * cp = _s_woibuf;
int len = (int)lstrlen(ps);
if(len) {
int ret = WideCharToMultiByte( CP_ACP, /* UINT CodePage, // code page */
0, /* DWORD dwFlags, // performance and mapping flags */
ps, /* LPCWSTR lpWideCharStr, // wide-character string */
len, /* int cchWideChar, // number of chars in string. */
cp, /* LPSTR lpMultiByteStr, // buffer for new string */
1024, /* int cbMultiByte, // size of buffer */
NULL, /* LPCSTR lpDefaultChar, // default for unmappable chars */
NULL ); /* LPBOOL lpUsedDefaultChar // set when default char used */
/* oi(cp); */
prt(cp);
}
}
int MCDECL wsprtf( PTSTR pf, ... )
{
static WCHAR _s_sprtfwbuf[1024];
PWSTR pb = _s_sprtfwbuf;
int i = 1;
va_list arglist;
va_start(arglist, pf);
*pb = 0;
StringCchVPrintf(pb,1024,pf,arglist);
va_end(arglist);
wprt(pb);
return i;
}
#endif /* #ifdef UNICODE */
#endif /* #ifdef ENABLE_DEBUG_LOG */
/* eof - sprtf.c */

101
third_party/tidy/sprtf.h vendored Normal file
View file

@ -0,0 +1,101 @@
#ifndef _SPRTF_HXX_
#define _SPRTF_HXX_
/* clang-format off */
/**************************************************************************//**
* @file
* Log output utility - part of the HTML Tidy project
*
* @author Geoff R. McLane [reports _at_ geoffair _dot_ info]
*
* @copyright
* Copyright (c) 1998-2017 Geoff R. McLane and HTACG.
* @par
* All Rights Reserved.
* @par
* See `tidy.h` for the complete license.
*
* @date 2017/02/12 17:06:02 Revision 1.0.2 geoff - correct license and coding style
* @date 2012/11/06 13:01:25 Revision 1.0.1 geoff
* @date 2012/10/17 00:00:00 Revision 1.0.0 geoff
* @date Additional updates: consult git log
*
******************************************************************************/
#include "third_party/tidy/tidyplatform.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef ENABLE_DEBUG_LOG
/*=============================================================================
* EXTRA Debugging, and information aid.
*
* When building and defining the ENABLE_DEBUG_LOG macro, Tidy will output
* extensive debug information. In addition to this macro, you can supply
* cmake build flags for additional diagnostic information:
* - -DENABLE_ALLOC_DEBUG:BOOL=ON - DEBUG_ALLOCATION
* - -DENABLE_MEMORY_DEBUG:BOOL=ON - DEBUG_MEMORY
* - -DENABLE_CRTDBG_MEMORY:BOOL=ON - _CRTDBG_MAP_ALLOC (WIN32 only)
*
* _MSC_VER Only - ENABLE_DEBUG_LOG is automatically enabled in the Debug
* build, unless DISABLE_DEBUG_LOG is defined. See 'tidyplatform.h'
*
* You can use DEBUG_LOG( SPRTF() ) to avoid #ifdef ENABLE_DEBUG_LOG for
* one-liners.
*
* This EXTRA Debug information is also written to a 'temptidy.txt' log
* file, for review, and analysis.
*
*===========================================================================*/
#ifndef SPRTF
# define SPRTF sprtf
#endif
#ifdef _MSC_VER
# define MCDECL _cdecl
#else
# define MCDECL
#endif
int add_std_out( int val );
int add_sys_time( int val );
int add_sys_date( int val );
int add_screen_out( int val );
int add_list_out( int val );
int add_append_log( int val );
int open_log_file( void );
void close_log_file( void );
void set_log_file( char * nf, int open );
char * get_log_file( void );
int MCDECL sprtf( const char *pf, ... );
#define M_MAX_SPRTF 2048
int direct_out_it( char *cp );
char *GetNxtBuf(void);
#define EndBuf(a) ( a + strlen(a) )
char *get_date_stg(void);
char *get_time_stg(void);
char *get_date_time_stg(void);
#ifdef _MSC_VER
int gettimeofday(struct timeval *tp, void *tzp);
#endif
# define DEBUG_LOG(ARG) do { ARG; } while(0)
#else
# define DEBUG_LOG(ARG)
#endif
#ifdef __cplusplus
}
#endif
#endif /* #ifndef _SPRTF_HXX_*/
/* eof - sprtf.h */

1149
third_party/tidy/streamio.c vendored Normal file

File diff suppressed because it is too large Load diff

175
third_party/tidy/streamio.h vendored Normal file
View file

@ -0,0 +1,175 @@
#ifndef __STREAMIO_H__
#define __STREAMIO_H__
/* clang-format off */
/* streamio.h -- handles character stream I/O
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Wrapper around Tidy input source and output sink
that calls appropriate interfaces, and applies
necessary char encoding transformations: to/from
ISO-10646 and/or UTF-8.
*/
#include "third_party/tidy/forward.h"
#include "third_party/tidy/tidybuffio.h"
#include "third_party/tidy/fileio.h"
#ifdef __cplusplus
extern "C"
{
#endif
typedef enum
{
FileIO,
BufferIO,
UserIO
} IOType;
/* states for ISO 2022
A document in ISO-2022 based encoding uses some ESC sequences called
"designator" to switch character sets. The designators defined and
used in ISO-2022-JP are:
"ESC" + "(" + ? for ISO646 variants
"ESC" + "$" + ? and
"ESC" + "$" + "(" + ? for multibyte character sets
*/
typedef enum
{
FSM_ASCII,
FSM_ESC,
FSM_ESCD,
FSM_ESCDP,
FSM_ESCP,
FSM_NONASCII
} ISO2022State;
/************************
** Source
************************/
enum
{
CHARBUF_SIZE=5,
LASTPOS_SIZE=64
};
/* non-raw input is cleaned up*/
struct _StreamIn
{
ISO2022State state; /* FSM for ISO2022 */
Bool pushed;
TidyAllocator *allocator;
tchar* charbuf;
uint bufpos;
uint bufsize;
int tabs;
int lastcols[LASTPOS_SIZE];
unsigned short curlastpos; /* current last position in lastcols */
unsigned short firstlastpos; /* first valid last position in lastcols */
int curcol;
int curline;
int encoding;
IOType iotype;
TidyInputSource source;
/* Pointer back to document for error reporting */
TidyDocImpl* doc;
};
StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
void TY_(freeStreamIn)(StreamIn* in);
StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );
int TY_(ReadBOMEncoding)(StreamIn *in);
uint TY_(ReadChar)( StreamIn* in );
void TY_(UngetChar)( uint c, StreamIn* in );
Bool TY_(IsEOF)( StreamIn* in );
/************************
** Sink
************************/
struct _StreamOut
{
int encoding;
ISO2022State state; /* for ISO 2022 */
uint nl;
IOType iotype;
TidyOutputSink sink;
};
StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln );
StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln );
StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln );
StreamOut* TY_(StdErrOutput)(void);
/* StreamOut* StdOutOutput(void); */
void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );
void TY_(WriteChar)( uint c, StreamOut* out );
void TY_(outBOM)( StreamOut *out );
ctmbstr TY_(GetEncodingNameFromTidyId)(uint id);
ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id);
int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
/************************
** Misc
************************/
/* character encodings
*/
#define RAW 0
#define ASCII 1
#define LATIN0 2
#define LATIN1 3
#define UTF8 4
#define ISO2022 5
#define MACROMAN 6
#define WIN1252 7
#define IBM858 8
#define UTF16LE 9
#define UTF16BE 10
#define UTF16 11
#define BIG5 12
#define SHIFTJIS 13
/* Function for conversion from Windows-1252 to Unicode */
uint TY_(DecodeWin1252)(uint c);
/* Function to convert from MacRoman to Unicode */
uint TY_(DecodeMacRoman)(uint c);
#ifdef __cplusplus
}
#endif
/* Use numeric constants as opposed to escape chars (\r, \n)
** to avoid conflict Mac compilers that may re-define these.
*/
#define CR 0xD
#define LF 0xA
#if defined(MAC_OS_CLASSIC)
# define DEFAULT_NL_CONFIG TidyCR
#elif defined(_WIN32) || defined(OS2_OS)
# define DEFAULT_NL_CONFIG TidyCRLF
#else
# define DEFAULT_NL_CONFIG TidyLF
#endif
#endif /* __STREAMIO_H__ */

40
third_party/tidy/tagask.c vendored Normal file
View file

@ -0,0 +1,40 @@
/* clang-format off */
/* tagask.c -- Interrogate node type
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/tidy-int.h"
#include "third_party/tidy/tags.h"
#include "third_party/tidy/tidy.h"
Bool tidyNodeIsText( TidyNode tnod )
{ return TY_(nodeIsText)( tidyNodeToImpl(tnod) );
}
Bool tidyNodeCMIsBlock( TidyNode tnod ); /* not exported yet */
Bool tidyNodeCMIsBlock( TidyNode tnod )
{ return TY_(nodeCMIsBlock)( tidyNodeToImpl(tnod) );
}
Bool tidyNodeCMIsInline( TidyNode tnod ); /* not exported yet */
Bool tidyNodeCMIsInline( TidyNode tnod )
{ return TY_(nodeCMIsInline)( tidyNodeToImpl(tnod) );
}
Bool tidyNodeCMIsEmpty( TidyNode tnod ); /* not exported yet */
Bool tidyNodeCMIsEmpty( TidyNode tnod )
{ return TY_(nodeCMIsEmpty)( tidyNodeToImpl(tnod) );
}
Bool tidyNodeIsHeader( TidyNode tnod )
{ return TY_(nodeIsHeader)( tidyNodeToImpl(tnod) );
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

1190
third_party/tidy/tags.c vendored Normal file

File diff suppressed because it is too large Load diff

482
third_party/tidy/tags.h vendored Normal file
View file

@ -0,0 +1,482 @@
#ifndef __TAGS_H__
#define __TAGS_H__
/* clang-format off */
/**************************************************************************//**
* @file
* Recognize HTML Tags.
*
* The HTML tags are stored as 8 bit ASCII strings.
* Use lookupw() to find a tag given a wide char string.
*
* @author HTACG, et al (consult git log)
*
* @copyright
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
* Institute of Technology, European Research Consortium for Informatics
* and Mathematics, Keio University) and HTACG.
* @par
* All Rights Reserved.
* @par
* See `tidy.h` for the complete license.
*
* @date Additional updates: consult git log
*
******************************************************************************/
#include "third_party/tidy/forward.h"
#include "third_party/tidy/lexer.h"
#include "third_party/tidy/config.h"
#include "third_party/tidy/attrdict.h"
/** @addtogroup internal_api */
/** @{ */
/***************************************************************************//**
** @defgroup tags_h HTML Tags
**
** This module organizes all of Tidy's HTML tag operations, such as parsing
** tags, defining tags, and user-defined tags.
**
** @{
******************************************************************************/
/** @name Basic Structures and Tag Operations.
** These structures form the backbone of Tidy tag processing, and the
** functions in this group provide basic operations with tags and nodes.
*/
/** @{ */
/** This enumeration defines the types of user-defined tags that can be
** created.
*/
typedef enum
{
tagtype_null = 0, /**< First item marker. */
tagtype_empty = 1, /**< Tag is an empty element. */
tagtype_inline = 2, /**< Tag is an inline element. */
tagtype_block = 4, /**< Tag is a block level element. */
tagtype_pre = 8 /**< Tag is a preformatted tag. */
} UserTagType;
/** This typedef describes a function to be used to parse HTML of a Tidy tag.
** @param doc The Tidy document.
** @param node The node being parsed.
** @param mode The GetTokenMode to be used for parsing the node contents.
** @param popStack A flag indicating that we are re-entering this parser, and
** it should restore a state from the stack.
*/
typedef Node* (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode );
/** This typedef describes a function be be used to check the attributes
** of a Tidy tag.
*/
typedef void (CheckAttribs)( TidyDocImpl* doc, Node *node );
/** Defines a dictionary entry for a single Tidy tag, including all of the
** relevant information that it requires.
*/
struct _Dict
{
TidyTagId id; /**< Identifier for this tag. */
tmbstr name; /**< The tag name. */
uint versions; /**< Accumulates potential HTML versions. See TY_(ConstrainVersion). */
AttrVersion const * attrvers; /**< Accumulates potential HTML versions for attributes. */
uint model; /**< Indicates the relevant content models for the tag. See lexer.h; there is no enum. */
Parser* parser; /**< Specifies the parser to use for this tag. */
CheckAttribs* chkattrs; /**< Specifies the function to check this tag's attributes. */
Dict* next; /**< Link to next tag. */
};
/** This enum indicates the maximum size of the has table for tag hash lookup.
*/
enum
{
ELEMENT_HASH_SIZE=178u /**< Maximum number of tags in the hash table. */
};
/** This structure provide hash lookup for Tidy tags.
*/
typedef struct _DictHash
{
Dict const* tag; /**< The current tag. */
struct _DictHash* next; /**< The next tag. */
} DictHash;
/** This structure consists of the lists of all tags known to Tidy.
*/
typedef struct _TidyTagImpl
{
Dict* xml_tags; /**< Placeholder for all xml tags. */
Dict* declared_tag_list; /**< User-declared tags. */
DictHash* hashtab[ELEMENT_HASH_SIZE]; /**< All of Tidy's built-in tags. */
} TidyTagImpl;
/** Coordinates Config update and Tags data.
** @param doc The Tidy document.
** @param opt The option the tag is intended for.
** @param name The name of the new tag.
*/
void TY_(DeclareUserTag)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name );
/** Interface for finding a tag by TidyTagId.
** @param tid The TidyTagId to search for.
** @returns An instance of a Tidy tag.
*/
const Dict* TY_(LookupTagDef)( TidyTagId tid );
/** Assigns the node's tag.
** @param doc The Tidy document.
** @param node The node to assign the tag to.
** @returns Returns a bool indicating whether or not the tag was assigned.
*/
Bool TY_(FindTag)( TidyDocImpl* doc, Node *node );
/** Finds the parser function for a given node.
** @param doc The Tidy document.
** @param node The node to lookup.
** @returns The parser for the given node.
*/
Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node );
/** Defines a new user-defined tag.
** @param doc The Tidy document.
** @param tagType The type of user-defined tag to define.
** @param name The name of the new tag.
*/
void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name );
/** Frees user-defined tags of the given type, or all user tags in given
** `tagtype_null`.
** @param doc The Tidy document.
** @param tagType The type of tag to free, or `tagtype_null` to free all
** user-defined tags.
*/
void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType );
/** Initiates an iterator for a list of user-declared tags, including autonomous
** custom tags detected in the document if @ref TidyUseCustomTags is not set to
** **no**.
** @param doc An instance of a TidyDocImp to query.
** @result Returns a TidyIterator, which is a token used to represent the
** current position in a list within LibTidy.
*/
TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc );
/** Given a valid TidyIterator initiated with TY_(GetDeclaredTagList)(),
** returns a string representing a user-declared or autonomous custom tag.
** @remark Specifying tagType limits the scope of the tags to one of
** @ref UserTagType types. Note that autonomous custom tags (if used)
** are added to one of these option types, depending on the value of
** @ref TidyUseCustomTags.
** @param doc The Tidy document.
** @param tagType The type of tag to iterate through.
** @param iter The iterator token provided initially by
** TY_(GetDeclaredTagList)().
** @result A string containing the next tag.
*/
ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType,
TidyIterator* iter );
/** Initializes tags and tag structures for the given Tidy document.
** @param doc The Tidy document.
*/
void TY_(InitTags)( TidyDocImpl* doc );
/** Frees the tags and structures used by Tidy for tags.
** @param doc The Tidy document.
*/
void TY_(FreeTags)( TidyDocImpl* doc );
/** Tidy defaults to HTML5 mode. If the <!DOCTYPE ...> is found to NOT be
** HTML5, then adjust the tags table to HTML4 mode.
** @param doc The Tidy document.
*/
void TY_(AdjustTags)( TidyDocImpl *doc );
/** Reset the tags table back to default HTML5 mode.
** @param doc The Tidy document.
*/
void TY_(ResetTags)( TidyDocImpl *doc );
/** Indicates whether or not the Tidy is processing in HTML5 mode.
** @param doc The Tidy document.
** @returns Returns `yes` if processing in HTML5 mode.
*/
Bool TY_(IsHTML5Mode)( TidyDocImpl *doc );
/** @} */
/** @name Parser Methods And Attribute Checker Functions for Tags
** These functions define the parsers and attribute checking functions for
** each of Tidy's tags.
*/
/** @{ */
Parser TY_(ParseHTML);
Parser TY_(ParseHead);
Parser TY_(ParseTitle);
Parser TY_(ParseScript);
Parser TY_(ParseFrameSet);
Parser TY_(ParseNoFrames);
Parser TY_(ParseBody);
Parser TY_(ParsePre);
Parser TY_(ParseList);
Parser TY_(ParseDefList);
Parser TY_(ParseBlock);
Parser TY_(ParseInline);
Parser TY_(ParseEmpty);
Parser TY_(ParseTableTag);
Parser TY_(ParseColGroup);
Parser TY_(ParseRowGroup);
Parser TY_(ParseRow);
Parser TY_(ParseSelect);
Parser TY_(ParseOptGroup);
Parser TY_(ParseText);
Parser TY_(ParseDatalist);
Parser TY_(ParseNamespace);
CheckAttribs TY_(CheckAttributes);
/** @} */
/** @name Other Tag and Node Lookup Functions
** These functions perform additional lookup on tags and nodes.
*/
/** @{ */
/** Gets the TidyTagId of the given node. 0 == TidyTag_UNKNOWN.
*/
#define TagId(node) ((node) && (node)->tag ? (node)->tag->id : TidyTag_UNKNOWN)
/** Determines if the given node is of the given tag id type.
*/
#define TagIsId(node, tid) ((node) && (node)->tag && (node)->tag->id == tid)
/** Inquires whether or not the given node is a text node.
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeIsText)( Node* node );
/** Inquires whether or not the given node is an element node.
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeIsElement)( Node* node );
/** Inquires whether or not the given node has any text.
** @param doc The Tidy document.
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node );
/** Inquires whether the given element looks like it's an autonomous custom
** element tag.
** @param element A string to be checked.
** @returns The status of the inquiry.
*/
Bool TY_(elementIsAutonomousCustomFormat)( ctmbstr element );
/** Inquires whether the given node looks like it's an autonomous custom
** element tag.
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeIsAutonomousCustomFormat)( Node* node );
/** True if the node looks like it's an autonomous custom element tag, and
** TidyCustomTags is not disabled, and we're in HTML5 mode, which are all
** requirements for valid autonomous custom tags.
** @param doc The Tidy document.
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeIsAutonomousCustomTag)( TidyDocImpl* doc, Node* node );
/** Does the node have the indicated content model? True if any of the bits
** requested are set.
** @param node The node being interrogated.
** @param contentModel The content model to check against.
** @returns The status of the inquiry.
*/
Bool TY_(nodeHasCM)( Node* node, uint contentModel );
/** Does the content model of the node include block?
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeCMIsBlock)( Node* node );
/** Does the content model of the node include inline?
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeCMIsInline)( Node* node );
/** Does the content model of the node include empty?
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeCMIsEmpty)( Node* node );
/** Is the node a header, such as H1, H2, ..., H6?
** @param node The node being interrogated.
** @returns The status of the inquiry.
*/
Bool TY_(nodeIsHeader)( Node* node );
/** Inquires as to the header level of the given node: 1, 2, ..., 6.
** @param node The node being interrogated.
** @returns The header level.
*/
uint TY_(nodeHeaderLevel)( Node* node );
#define nodeIsHTML( node ) TagIsId( node, TidyTag_HTML )
#define nodeIsHEAD( node ) TagIsId( node, TidyTag_HEAD )
#define nodeIsTITLE( node ) TagIsId( node, TidyTag_TITLE )
#define nodeIsBASE( node ) TagIsId( node, TidyTag_BASE )
#define nodeIsMETA( node ) TagIsId( node, TidyTag_META )
#define nodeIsBODY( node ) TagIsId( node, TidyTag_BODY )
#define nodeIsFRAMESET( node ) TagIsId( node, TidyTag_FRAMESET )
#define nodeIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
#define nodeIsIFRAME( node ) TagIsId( node, TidyTag_IFRAME )
#define nodeIsNOFRAMES( node ) TagIsId( node, TidyTag_NOFRAMES )
#define nodeIsHR( node ) TagIsId( node, TidyTag_HR )
#define nodeIsH1( node ) TagIsId( node, TidyTag_H1 )
#define nodeIsH2( node ) TagIsId( node, TidyTag_H2 )
#define nodeIsPRE( node ) TagIsId( node, TidyTag_PRE )
#define nodeIsLISTING( node ) TagIsId( node, TidyTag_LISTING )
#define nodeIsP( node ) TagIsId( node, TidyTag_P )
#define nodeIsUL( node ) TagIsId( node, TidyTag_UL )
#define nodeIsOL( node ) TagIsId( node, TidyTag_OL )
#define nodeIsDL( node ) TagIsId( node, TidyTag_DL )
#define nodeIsDIR( node ) TagIsId( node, TidyTag_DIR )
#define nodeIsLI( node ) TagIsId( node, TidyTag_LI )
#define nodeIsDT( node ) TagIsId( node, TidyTag_DT )
#define nodeIsDD( node ) TagIsId( node, TidyTag_DD )
#define nodeIsTABLE( node ) TagIsId( node, TidyTag_TABLE )
#define nodeIsCAPTION( node ) TagIsId( node, TidyTag_CAPTION )
#define nodeIsTD( node ) TagIsId( node, TidyTag_TD )
#define nodeIsTH( node ) TagIsId( node, TidyTag_TH )
#define nodeIsTR( node ) TagIsId( node, TidyTag_TR )
#define nodeIsCOL( node ) TagIsId( node, TidyTag_COL )
#define nodeIsCOLGROUP( node ) TagIsId( node, TidyTag_COLGROUP )
#define nodeIsBR( node ) TagIsId( node, TidyTag_BR )
#define nodeIsA( node ) TagIsId( node, TidyTag_A )
#define nodeIsLINK( node ) TagIsId( node, TidyTag_LINK )
#define nodeIsB( node ) TagIsId( node, TidyTag_B )
#define nodeIsI( node ) TagIsId( node, TidyTag_I )
#define nodeIsSTRONG( node ) TagIsId( node, TidyTag_STRONG )
#define nodeIsEM( node ) TagIsId( node, TidyTag_EM )
#define nodeIsBIG( node ) TagIsId( node, TidyTag_BIG )
#define nodeIsSMALL( node ) TagIsId( node, TidyTag_SMALL )
#define nodeIsPARAM( node ) TagIsId( node, TidyTag_PARAM )
#define nodeIsOPTION( node ) TagIsId( node, TidyTag_OPTION )
#define nodeIsOPTGROUP( node ) TagIsId( node, TidyTag_OPTGROUP )
#define nodeIsIMG( node ) TagIsId( node, TidyTag_IMG )
#define nodeIsMAP( node ) TagIsId( node, TidyTag_MAP )
#define nodeIsAREA( node ) TagIsId( node, TidyTag_AREA )
#define nodeIsNOBR( node ) TagIsId( node, TidyTag_NOBR )
#define nodeIsWBR( node ) TagIsId( node, TidyTag_WBR )
#define nodeIsFONT( node ) TagIsId( node, TidyTag_FONT )
#define nodeIsLAYER( node ) TagIsId( node, TidyTag_LAYER )
#define nodeIsSPACER( node ) TagIsId( node, TidyTag_SPACER )
#define nodeIsCENTER( node ) TagIsId( node, TidyTag_CENTER )
#define nodeIsSTYLE( node ) TagIsId( node, TidyTag_STYLE )
#define nodeIsSCRIPT( node ) TagIsId( node, TidyTag_SCRIPT )
#define nodeIsNOSCRIPT( node ) TagIsId( node, TidyTag_NOSCRIPT )
#define nodeIsFORM( node ) TagIsId( node, TidyTag_FORM )
#define nodeIsTEXTAREA( node ) TagIsId( node, TidyTag_TEXTAREA )
#define nodeIsBLOCKQUOTE( node ) TagIsId( node, TidyTag_BLOCKQUOTE )
#define nodeIsAPPLET( node ) TagIsId( node, TidyTag_APPLET )
#define nodeIsOBJECT( node ) TagIsId( node, TidyTag_OBJECT )
#define nodeIsDIV( node ) TagIsId( node, TidyTag_DIV )
#define nodeIsSPAN( node ) TagIsId( node, TidyTag_SPAN )
#define nodeIsINPUT( node ) TagIsId( node, TidyTag_INPUT )
#define nodeIsQ( node ) TagIsId( node, TidyTag_Q )
#define nodeIsLABEL( node ) TagIsId( node, TidyTag_LABEL )
#define nodeIsH3( node ) TagIsId( node, TidyTag_H3 )
#define nodeIsH4( node ) TagIsId( node, TidyTag_H4 )
#define nodeIsH5( node ) TagIsId( node, TidyTag_H5 )
#define nodeIsH6( node ) TagIsId( node, TidyTag_H6 )
#define nodeIsADDRESS( node ) TagIsId( node, TidyTag_ADDRESS )
#define nodeIsXMP( node ) TagIsId( node, TidyTag_XMP )
#define nodeIsSELECT( node ) TagIsId( node, TidyTag_SELECT )
#define nodeIsBLINK( node ) TagIsId( node, TidyTag_BLINK )
#define nodeIsMARQUEE( node ) TagIsId( node, TidyTag_MARQUEE )
#define nodeIsEMBED( node ) TagIsId( node, TidyTag_EMBED )
#define nodeIsBASEFONT( node ) TagIsId( node, TidyTag_BASEFONT )
#define nodeIsISINDEX( node ) TagIsId( node, TidyTag_ISINDEX )
#define nodeIsS( node ) TagIsId( node, TidyTag_S )
#define nodeIsSTRIKE( node ) TagIsId( node, TidyTag_STRIKE )
#define nodeIsSUB( node ) TagIsId( node, TidyTag_SUB )
#define nodeIsSUP( node ) TagIsId( node, TidyTag_SUP )
#define nodeIsU( node ) TagIsId( node, TidyTag_U )
#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU )
#define nodeIsMAIN( node ) TagIsId( node, TidyTag_MAIN )
#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON )
#define nodeIsCANVAS( node ) TagIsId( node, TidyTag_CANVAS )
#define nodeIsPROGRESS( node ) TagIsId( node, TidyTag_PROGRESS )
#define nodeIsINS( node ) TagIsId( node, TidyTag_INS )
#define nodeIsDEL( node ) TagIsId( node, TidyTag_DEL )
#define nodeIsSVG( node ) TagIsId( node, TidyTag_SVG )
/* HTML5 */
#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST )
#define nodeIsDATA( node ) TagIsId( node, TidyTag_DATA )
#define nodeIsMATHML( node ) TagIsId( node, TidyTag_MATHML ) /* #130 MathML attr and entity fix! */
#define nodeIsTEMPLATE( node ) TagIsId( node, TidyTag_TEMPLATE )
/* NOT in HTML 5 */
#define nodeIsACRONYM( node ) TagIsId( node, TidyTag_ACRONYM )
#define nodesIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
#define nodeIsTT( node ) TagIsId( node, TidyTag_TT )
/** @} name */
/** @} tags_h group */
/** @} internal_api addtogroup */
#endif /* __TAGS_H__ */

188
third_party/tidy/tidy-int.h vendored Normal file
View file

@ -0,0 +1,188 @@
/* clang-format off */
#ifndef __TIDY_INT_H__
#define __TIDY_INT_H__
/* tidy-int.h -- internal library declarations
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/tidy.h"
#include "third_party/tidy/config.h"
#include "third_party/tidy/lexer.h"
#include "third_party/tidy/tags.h"
#include "third_party/tidy/attrs.h"
#include "third_party/tidy/pprint.h"
#include "third_party/tidy/access.h"
#include "third_party/tidy/message.h"
#include "libc/time/struct/utimbuf.h"
#include "third_party/tidy/parser.h"
#ifndef MAX
#define MAX(a,b) (((a) > (b))?(a):(b))
#endif
#ifndef MIN
#define MIN(a,b) (((a) < (b))?(a):(b))
#endif
/*\
* Issue #166 - repeated <main> element
* Change the previous on/off uint flag badForm
* to a BIT flag to support other than <form>
* errors. This could be extended more...
\*/
#define flg_BadForm 0x00000001
#define flg_BadMain 0x00000002
struct _TidyDocImpl
{
/* The Document Tree (and backing store buffer) */
Node root; /* This MUST remain the first declared
variable in this structure */
Lexer* lexer;
/* Config + Markup Declarations */
TidyConfigImpl config;
TidyTagImpl tags;
TidyAttribImpl attribs;
TidyAccessImpl access;
TidyMutedMessages muted;
/* The Pretty Print buffer */
TidyPrintImpl pprint;
/* I/O */
StreamIn* docIn;
StreamOut* docOut;
StreamOut* errout;
TidyReportFilter reportFilter;
TidyReportCallback reportCallback;
TidyMessageCallback messageCallback;
TidyOptCallback pOptCallback;
TidyConfigCallback pConfigCallback;
TidyConfigChangeCallback pConfigChangeCallback;
TidyPPProgress progressCallback;
TidyParserStack stack;
/* Parse + Repair Results */
uint optionErrors;
uint errors;
uint warnings;
uint accessErrors;
uint infoMessages;
uint docErrors;
int parseStatus;
uint badAccess; /* for accessibility errors */
uint badLayout; /* for bad style errors */
uint badChars; /* for bad char encodings */
uint badForm; /* bit field, for badly placed form tags, or other format errors */
uint footnotes; /* bit field, for other footnotes, until formalized */
Bool HTML5Mode; /* current mode is html5 */
Bool xmlDetected; /* true if XML was used/detected */
uint indent_char; /* space or tab character, for indenting */
/* Memory allocator */
TidyAllocator* allocator;
/* Miscellaneous */
void* appData;
uint nClassId;
Bool inputHadBOM;
#if PRESERVE_FILE_TIMES
struct utimbuf filetimes;
#endif
tmbstr givenDoctype;
};
/** The basic struct for communicating a message within LibTidy. All of the
** relevant information pertaining to a message can be retrieved with the
** accessor functions and one of these records.
*/
struct _TidyMessageImpl
{
TidyDocImpl *tidyDoc; /* document instance this message is attributed to */
Node *tidyNode; /* the node reporting the message, if applicable */
uint code; /* the message code */
int line; /* the line message applies to */
int column; /* the column the message applies to */
TidyReportLevel level; /* the severity level of the message */
Bool allowMessage; /* indicates whether or not a filter rejected a message */
Bool muted; /* indicates whether or not a configuration mutes this message */
int argcount; /* the number of arguments */
struct printfArg* arguments; /* the arguments' values and types */
ctmbstr messageKey; /* the message code as a key string */
ctmbstr messageFormatDefault; /* the built-in format string */
ctmbstr messageFormat; /* the localized format string */
tmbstr messageDefault; /* the message, formatted, default language */
tmbstr message; /* the message, formatted, localized */
tmbstr messagePosDefault; /* the position part, default language */
tmbstr messagePos; /* the position part, localized */
ctmbstr messagePrefixDefault; /* the prefix part, default language */
ctmbstr messagePrefix; /* the prefix part, localized */
tmbstr messageOutputDefault; /* the complete string Tidy would output */
tmbstr messageOutput; /* the complete string, localized */
};
#define tidyDocToImpl( tdoc ) ((TidyDocImpl*)(tdoc))
#define tidyImplToDoc( doc ) ((TidyDoc)(doc))
#define tidyMessageToImpl( tmessage ) ((TidyMessageImpl*)(tmessage))
#define tidyImplToMessage( message ) ((TidyMessage)(message))
#define tidyNodeToImpl( tnod ) ((Node*)(tnod))
#define tidyImplToNode( node ) ((TidyNode)(node))
#define tidyAttrToImpl( tattr ) ((AttVal*)(tattr))
#define tidyImplToAttr( attval ) ((TidyAttr)(attval))
#define tidyOptionToImpl( topt ) ((const TidyOptionImpl*)(topt))
#define tidyImplToOption( option ) ((TidyOption)(option))
/** Wrappers for easy memory allocation using the document's allocator */
#define TidyDocAlloc(doc, size) TidyAlloc((doc)->allocator, size)
#define TidyDocRealloc(doc, block, size) TidyRealloc((doc)->allocator, block, size)
#define TidyDocFree(doc, block) TidyFree((doc)->allocator, block)
#define TidyDocPanic(doc, msg) TidyPanic((doc)->allocator, msg)
int TY_(DocParseStream)( TidyDocImpl* impl, StreamIn* in );
/*
[i_a] generic node tree traversal code; used in several spots.
Define your own callback, which returns one of the NodeTraversalSignal values
to instruct the tree traversal routine TraverseNodeTree() what to do.
Pass custom data to/from the callback using the 'propagate' reference.
*/
typedef enum
{
ContinueTraversal, /* visit siblings and children */
SkipChildren, /* visit siblings of this node; ignore its children */
SkipSiblings, /* ignore subsequent siblings of this node; ignore their children; traverse */
SkipChildrenAndSiblings, /* visit siblings of this node; ignore its children */
VisitParent, /* REVERSE traversal: visit the parent of the current node */
ExitTraversal /* terminate traversal on the spot */
} NodeTraversalSignal;
typedef NodeTraversalSignal NodeTraversalCallBack(TidyDocImpl* doc, Node* node, void *propagate);
NodeTraversalSignal TY_(TraverseNodeTree)(TidyDocImpl* doc, Node* node, NodeTraversalCallBack *cb, void *propagate);
#endif /* __TIDY_INT_H__ */

2595
third_party/tidy/tidy.c vendored Normal file

File diff suppressed because it is too large Load diff

2222
third_party/tidy/tidy.h vendored Normal file

File diff suppressed because it is too large Load diff

83
third_party/tidy/tidy.mk vendored Normal file
View file

@ -0,0 +1,83 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
PKGS += THIRD_PARTY_TIDY
THIRD_PARTY_TIDY_SRCS = $(THIRD_PARTY_TIDY_A_SRCS)
THIRD_PARTY_TIDY_HDRS = $(THIRD_PARTY_TIDY_A_HDRS)
THIRD_PARTY_TIDY_INCS = $(THIRD_PARTY_TIDY_A_INCS)
THIRD_PARTY_TIDY_BINS = $(THIRD_PARTY_TIDY_COMS) $(THIRD_PARTY_TIDY_COMS:%=%.dbg)
THIRD_PARTY_TIDY_ARTIFACTS += THIRD_PARTY_TIDY_A
THIRD_PARTY_TIDY = $(THIRD_PARTY_TIDY_A_DEPS) $(THIRD_PARTY_TIDY_A)
THIRD_PARTY_TIDY_A = o/$(MODE)/third_party/tidy/tidy.a
THIRD_PARTY_TIDY_A_FILES := $(wildcard third_party/tidy/*)
THIRD_PARTY_TIDY_A_HDRS = $(filter %.h,$(THIRD_PARTY_TIDY_A_FILES))
THIRD_PARTY_TIDY_A_INCS = $(filter %.inc,$(THIRD_PARTY_TIDY_A_FILES))
THIRD_PARTY_TIDY_A_SRCS = $(filter %.c,$(THIRD_PARTY_TIDY_A_FILES))
THIRD_PARTY_TIDY_A_OBJS = $(THIRD_PARTY_TIDY_A_SRCS:%.c=o/$(MODE)/%.o)
THIRD_PARTY_TIDY_A_DIRECTDEPS = \
LIBC_FMT \
LIBC_INTRIN \
LIBC_MEM \
LIBC_NEXGEN32E \
LIBC_RUNTIME \
LIBC_CALLS \
LIBC_UNICODE \
LIBC_STDIO \
LIBC_SYSV \
LIBC_STR \
LIBC_STUBS
THIRD_PARTY_TIDY_A_DEPS := \
$(call uniq,$(foreach x,$(THIRD_PARTY_TIDY_A_DIRECTDEPS),$($(x))))
THIRD_PARTY_TIDY_A_CHECKS = \
$(THIRD_PARTY_TIDY_A).pkg \
$(THIRD_PARTY_TIDY_A_HDRS:%=o/$(MODE)/%.ok)
$(THIRD_PARTY_TIDY_A): \
third_party/tidy/ \
$(THIRD_PARTY_TIDY_A).pkg \
$(THIRD_PARTY_TIDY_A_OBJS)
$(THIRD_PARTY_TIDY_A).pkg: \
$(THIRD_PARTY_TIDY_A_OBJS) \
$(foreach x,$(THIRD_PARTY_TIDY_A_DIRECTDEPS),$($(x)_A).pkg)
o/$(MODE)/third_party/tidy/tidy.com.dbg: \
$(THIRD_PARTY_TIDY) \
o/$(MODE)/third_party/tidy/tidy.o \
o/$(MODE)/third_party/tidy/.tidyrc.zip.o \
$(CRT) \
$(APE_NO_MODIFY_SELF)
@$(APELINK)
o/$(MODE)/third_party/tidy/tidy.com: \
o/$(MODE)/third_party/tidy/tidy.com.dbg \
o/$(MODE)/third_party/zip/zip.com \
o/$(MODE)/tool/build/symtab.com
@$(COMPILE) -AOBJCOPY -T$@ $(OBJCOPY) -S -O binary $< $@
@$(COMPILE) -ASYMTAB o/$(MODE)/tool/build/symtab.com \
-o o/$(MODE)/third_party/tidy/.tidy/.symtab $<
@$(COMPILE) -AZIP -T$@ o/$(MODE)/third_party/zip/zip.com -9qj $@ \
o/$(MODE)/third_party/tidy/.tidy/.symtab
o/$(MODE)/third_party/tidy/.tidyrc.zip.o: \
ZIPOBJ_FLAGS += \
-B
THIRD_PARTY_TIDY_COMS = \
o/$(MODE)/third_party/tidy/tidy.com
THIRD_PARTY_TIDY_LIBS = $(foreach x,$(THIRD_PARTY_TIDY_ARTIFACTS),$($(x)))
THIRD_PARTY_TIDY_SRCS = $(foreach x,$(THIRD_PARTY_TIDY_ARTIFACTS),$($(x)_SRCS))
THIRD_PARTY_TIDY_CHECKS = $(foreach x,$(THIRD_PARTY_TIDY_ARTIFACTS),$($(x)_CHECKS))
THIRD_PARTY_TIDY_OBJS = $(foreach x,$(THIRD_PARTY_TIDY_ARTIFACTS),$($(x)_OBJS))
$(THIRD_PARTY_TIDY_OBJS): $(BUILD_FILES) third_party/tidy/tidy.mk
.PHONY: o/$(MODE)/third_party/tidy
o/$(MODE)/third_party/tidy: \
$(THIRD_PARTY_TIDY_BINS) \
$(THIRD_PARTY_TIDY_CHECKS)

126
third_party/tidy/tidybuffio.h vendored Normal file
View file

@ -0,0 +1,126 @@
#ifndef __TIDY_BUFFIO_H__
#define __TIDY_BUFFIO_H__
/* clang-format off */
/**************************************************************************//**
* @file
* Treat buffer as a stream that Tidy can use for I/O operations. It offers
* the ability for the buffer to grow as bytes are added, and keeps track
* of current read and write points.
*
* @author
* HTACG, et al (consult git log)
*
* @copyright
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
* Institute of Technology, European Research Consortium for Informatics
* and Mathematics, Keio University).
* @copyright
* See tidy.h for license.
*
* @date
* Consult git log.
******************************************************************************/
#include "third_party/tidy/tidyplatform.h"
#include "third_party/tidy/tidy.h"
#ifdef __cplusplus
extern "C" {
#endif
/** A TidyBuffer is chunk of memory that can be used for multiple I/O purposes
** within Tidy.
** @ingroup IO
*/
TIDY_STRUCT
struct _TidyBuffer
{
TidyAllocator* allocator; /**< Memory allocator */
byte* bp; /**< Pointer to bytes */
uint size; /**< Number of bytes currently in use */
uint allocated; /**< Number of bytes allocated */
uint next; /**< Offset of current input position */
};
/** Initialize data structure using the default allocator */
void tidyBufInit( TidyBuffer* buf );
/** Initialize data structure using the given custom allocator */
void tidyBufInitWithAllocator( TidyBuffer* buf, TidyAllocator* allocator );
/** Free current buffer, allocate given amount, reset input pointer,
use the default allocator */
void tidyBufAlloc( TidyBuffer* buf, uint allocSize );
/** Free current buffer, allocate given amount, reset input pointer,
use the given custom allocator */
void tidyBufAllocWithAllocator( TidyBuffer* buf,
TidyAllocator* allocator,
uint allocSize );
/** Expand buffer to given size.
** Chunk size is minimum growth. Pass 0 for default of 256 bytes.
*/
void tidyBufCheckAlloc( TidyBuffer* buf,
uint allocSize, uint chunkSize );
/** Free current contents and zero out */
void tidyBufFree( TidyBuffer* buf );
/** Set buffer bytes to 0 */
void tidyBufClear( TidyBuffer* buf );
/** Attach to existing buffer */
void tidyBufAttach( TidyBuffer* buf, byte* bp, uint size );
/** Detach from buffer. Caller must free. */
void tidyBufDetach( TidyBuffer* buf );
/** Append bytes to buffer. Expand if necessary. */
void tidyBufAppend( TidyBuffer* buf, void* vp, uint size );
/** Append one byte to buffer. Expand if necessary. */
void tidyBufPutByte( TidyBuffer* buf, byte bv );
/** Get byte from end of buffer */
int tidyBufPopByte( TidyBuffer* buf );
/** Get byte from front of buffer. Increment input offset. */
int tidyBufGetByte( TidyBuffer* buf );
/** At end of buffer? */
Bool tidyBufEndOfInput( TidyBuffer* buf );
/** Put a byte back into the buffer. Decrement input offset. */
void tidyBufUngetByte( TidyBuffer* buf, byte bv );
/**************
TIDY
**************/
/* Forward declarations
*/
/** Initialize a buffer input source */
void tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf );
/** Initialize a buffer output sink */
void tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf );
#ifdef __cplusplus
}
#endif
#endif /* __TIDY_BUFFIO_H__ */
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

1472
third_party/tidy/tidyenum.h vendored Normal file

File diff suppressed because it is too large Load diff

2765
third_party/tidy/tidylib.c vendored Normal file

File diff suppressed because it is too large Load diff

65
third_party/tidy/tidyplatform.h vendored Normal file
View file

@ -0,0 +1,65 @@
#ifndef __TIDY_PLATFORM_H__
#define __TIDY_PLATFORM_H__
COSMOPOLITAN_C_START_
#define LINUX_OS
#define PLATFORM_NAME "Cosmopolitan"
#define TIDY_CONFIG_FILE "/zip/.tidyrc"
#define TIDY_USER_CONFIG_FILE "~/.tidyrc"
#define SUPPORT_LOCALIZATIONS 0
#define SUPPORT_CONSOLE_APP 1
#define FILENAMES_CASE_SENSITIVE 1
#define PRESERVE_FILE_TIMES 1
#define HAS_FUTIME 0
#define UTIME_NEEDS_CLOSED_FILE 1
#define HAS_VSNPRINTF 1
#define SUPPORT_POSIX_MAPPED_FILES 1
#define TIDY_EXPORT
#define TIDY_STRUCT
#define TIDY_THREAD_LOCAL
#define TIDY_INDENTATION_LIMIT 50
#define TIDY_CALL
/* #define SUPPORT_GETPWNAM */
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
#define ARG_UNUSED(x) x __attribute__((__unused__))
#define FUNC_UNUSED __attribute__((__unused__))
#else
#define ARG_UNUSED(x) x
#define FUNC_UNUSED
#endif
typedef unsigned int uint;
typedef unsigned long ulong;
typedef unsigned char byte;
typedef uint tchar; /* single, full character */
typedef char tmbchar; /* single, possibly partial character */
typedef enum { no, yes } Bool;
typedef tmbchar* tmbstr; /* pointer to buffer of possibly partial chars */
typedef const tmbchar* ctmbstr; /* Ditto, but const */
#define NULLSTR (tmbstr) ""
#define TMBSTR_DEFINED
/* Opaque data structure.
* Cast to implementation type struct within lib.
* This will reduce inter-dependencies/conflicts w/ application code.
*/
#if 1
#define opaque_type(typenam) \
struct _##typenam { \
int _opaque; \
}; \
typedef struct _##typenam const* typenam
#else
#define opaque_type(typenam) typedef const void* typenam
#endif
/* Opaque data structure used to pass back
** and forth to keep current position in a
** list or other collection.
*/
opaque_type(TidyIterator);
COSMOPOLITAN_C_END_
#endif /* __TIDY_PLATFORM_H__ */

289
third_party/tidy/tmbstr.c vendored Normal file
View file

@ -0,0 +1,289 @@
/* clang-format off */
/* tmbstr.c -- Tidy string utility functions
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/forward.h"
#include "third_party/tidy/tmbstr.h"
#include "libc/fmt/fmt.h"
#include "third_party/tidy/lexer.h"
/* like strdup but using an allocator */
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str )
{
tmbstr s = NULL;
if ( str )
{
uint len = TY_(tmbstrlen)( str );
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
while ( 0 != (*cp++ = *str++) )
/**/;
}
return s;
}
/* like strndup but using an allocator */
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len )
{
tmbstr s = NULL;
if ( str && len > 0 )
{
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
while ( len-- > 0 && (*cp++ = *str++) )
/**/;
*cp = 0;
}
return s;
}
/* exactly same as strncpy */
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size )
{
if ( s1 != NULL && s2 != NULL )
{
tmbstr cp = s1;
while ( *s2 && --size ) /* Predecrement: reserve byte */
*cp++ = *s2++; /* for NULL terminator. */
*cp = 0;
}
return size;
}
/* Allows expressions like: cp += tmbstrcpy( cp, "joebob" );
*/
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 )
{
uint ncpy = 0;
while (0 != (*s1++ = *s2++) )
++ncpy;
return ncpy;
}
/* Allows expressions like: cp += tmbstrcat( cp, "joebob" );
*/
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 )
{
uint ncpy = 0;
while ( *s1 )
++s1;
while (0 != (*s1++ = *s2++) )
++ncpy;
return ncpy;
}
/* exactly same as strcmp */
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 )
{
int c;
while ((c = *s1) == *s2)
{
if (c == '\0')
return 0;
++s1;
++s2;
}
return (*s1 > *s2 ? 1 : -1);
}
/* returns byte count, not char count */
uint TY_(tmbstrlen)( ctmbstr str )
{
uint len = 0;
if ( str )
{
while ( *str++ )
++len;
}
return len;
}
/*
MS C 4.2 (and ANSI C) doesn't include strcasecmp.
Note that tolower and toupper won't
work on chars > 127.
Neither does ToLower()!
*/
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 )
{
uint c;
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
{
if (c == '\0')
return 0;
++s1;
++s2;
}
return (*s1 > *s2 ? 1 : -1);
}
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n )
{
uint c;
if (s1 == NULL || s2 == NULL)
{
if (s1 == s2)
return 0;
return (s1 == NULL ? -1 : 1);
}
while ((c = (byte)*s1) == (byte)*s2)
{
if (c == '\0')
return 0;
if (n == 0)
return 0;
++s1;
++s2;
--n;
}
if (n == 0)
return 0;
return (*s1 > *s2 ? 1 : -1);
}
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n )
{
uint c;
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
{
if (c == '\0')
return 0;
if (n == 0)
return 0;
++s1;
++s2;
--n;
}
if (n == 0)
return 0;
return (*s1 > *s2 ? 1 : -1);
}
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 )
{
uint len2 = TY_(tmbstrlen)(s2);
int ix, diff = len1 - len2;
for ( ix = 0; ix <= diff; ++ix )
{
if ( TY_(tmbstrncmp)(s1+ix, s2, len2) == 0 )
return (ctmbstr) s1+ix;
}
return NULL;
}
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 )
{
uint len1 = TY_(tmbstrlen)(s1), len2 = TY_(tmbstrlen)(s2);
int ix, diff = len1 - len2;
for ( ix = 0; ix <= diff; ++ix )
{
if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
return (ctmbstr) s1+ix;
}
return NULL;
}
/* Transform ASCII chars in string to lower case */
tmbstr TY_(tmbstrtolower)( tmbstr s )
{
tmbstr cp;
for ( cp=s; *cp; ++cp )
*cp = (tmbchar) TY_(ToLower)( *cp );
return s;
}
/* Transform ASCII chars in string to upper case */
tmbstr TY_(tmbstrtoupper)(tmbstr s)
{
tmbstr cp;
for (cp = s; *cp; ++cp)
*cp = (tmbchar)TY_(ToUpper)(*cp);
return s;
}
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
{
int retval;
#if HAS_VSNPRINTF
retval = vsnprintf(buffer, count - 1, format, args);
/* todo: conditionally null-terminate the string? */
buffer[count - 1] = 0;
#else
retval = vsprintf(buffer, format, args);
#endif /* HAS_VSNPRINTF */
return retval;
}
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
{
int retval;
va_list args;
va_start(args, format);
retval = TY_(tmbvsnprintf)(buffer, count, format, args);
va_end(args);
return retval;
}
void TY_(strrep)(tmbstr buffer, ctmbstr str, ctmbstr rep)
{
char *p = strstr(buffer, str);
do
{
if(p)
{
char buf[1024];
memset(buf,'\0',strlen(buf));
if(buffer == p)
{
strcpy(buf,rep);
strcat(buf,p+strlen(str));
}
else
{
strncpy(buf,buffer,strlen(buffer) - strlen(p));
strcat(buf,rep);
strcat(buf,p+strlen(str));
}
memset(buffer,'\0',strlen(buffer));
strcpy(buffer,buf);
}
} while(p && (p = strstr(buffer, str)));
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

92
third_party/tidy/tmbstr.h vendored Normal file
View file

@ -0,0 +1,92 @@
#ifndef __TMBSTR_H__
#define __TMBSTR_H__
/* clang-format off */
/* tmbstr.h - Tidy string utility functions
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/tidy.h"
#include "third_party/tidy/access.h"
#include "third_party/tidy/tidyplatform.h"
#ifdef __cplusplus
extern "C"
{
#endif
/* like strdup but using an allocator */
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str );
/* like strndup but using an allocator */
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len);
/* exactly same as strncpy */
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size );
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 );
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 );
/* exactly same as strcmp */
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 );
/* returns byte count, not char count */
uint TY_(tmbstrlen)( ctmbstr str );
/*
MS C 4.2 doesn't include strcasecmp.
Note that tolower and toupper won't
work on chars > 127.
Neither do Lexer.ToLower() or Lexer.ToUpper()!
We get away with this because, except for XML tags,
we are always comparing to ascii element and
attribute names defined by HTML specs.
*/
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 );
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n );
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n );
/* return offset of cc from beginning of s1,
** -1 if not found.
*/
/* TY_PRIVATE int TY_(tmbstrnchr)( ctmbstr s1, uint len1, tmbchar cc ); */
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 );
/* TY_PRIVATE ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 ); */
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 );
/* transform string to lower case */
tmbstr TY_(tmbstrtolower)( tmbstr s );
/* Transform ASCII chars in string to upper case */
tmbstr TY_(tmbstrtoupper)( tmbstr s );
/* TY_PRIVATE Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ); */
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
#ifdef __GNUC__
__attribute__((format(printf, 3, 0)))
#endif
;
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
#ifdef __GNUC__
__attribute__((format(printf, 3, 4)))
#endif
;
void TY_(strrep)(tmbstr buffer, ctmbstr str, ctmbstr rep);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* __TMBSTR_H__ */

525
third_party/tidy/utf8.c vendored Normal file
View file

@ -0,0 +1,525 @@
/* clang-format off */
/* utf8.c -- convert characters to/from UTF-8
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Uses public interfaces to abstract input source and output
sink, which may be user supplied or either FILE* or memory
based Tidy implementations. Encoding support is uniform
regardless of I/O mechanism.
Note, UTF-8 encoding, by itself, does not affect the actual
"codepoints" of the underlying character encoding. In the
cases of ASCII, Latin1, Unicode (16-bit, BMP), these all
refer to ISO-10646 "codepoints". For anything else, they
refer to some other "codepoint" set.
Put another way, UTF-8 is a variable length method to
represent any non-negative integer value. The glyph
that a integer value represents is unchanged and defined
externally (e.g. by ISO-10646, Big5, Win1252, MacRoman,
Latin2-9, and so on).
Put still another way, UTF-8 is more of a _transfer_ encoding
than a _character_ encoding, per se.
*/
#include "third_party/tidy/tidy.h"
#include "third_party/tidy/forward.h"
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "third_party/tidy/utf8.h"
/*
UTF-8 encoding/decoding functions
Return # of bytes in UTF-8 sequence; result < 0 if illegal sequence
Also see below for UTF-16 encoding/decoding functions
References :
1) UCS Transformation Format 8 (UTF-8):
ISO/IEC 10646-1:1996 Amendment 2 or ISO/IEC 10646-1:2000 Annex D
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335>
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html>
Table 4 - Mapping from UCS-4 to UTF-8
2) Unicode standards:
<https://www.unicode.org/standard/standard.html>
3) Legal UTF-8 byte sequences:
<https://www.unicode.org/versions/corrigendum1.html>
Code point 1st byte 2nd byte 3rd byte 4th byte
---------- -------- -------- -------- --------
U+0000..U+007F 00..7F
U+0080..U+07FF C2..DF 80..BF
U+0800..U+0FFF E0 A0..BF 80..BF
U+1000..U+FFFF E1..EF 80..BF 80..BF
U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also
allows for the use of five- and six-byte sequences to encode
characters that are outside the range of the Unicode character
set; those five- and six-byte sequences are illegal for the use
of UTF-8 as a transformation of Unicode characters. ISO/IEC 10646
does not allow mapping of unpaired surrogates, nor U+FFFE and U+FFFF
(but it does allow other noncharacters).
4) RFC 2279: UTF-8, a transformation format of ISO 10646:
<http://www.ietf.org/rfc/rfc2279.txt>
5) UTF-8 and Unicode FAQ:
<http://www.cl.cam.ac.uk/~mgk25/unicode.html>
6) Markus Kuhn's UTF-8 decoder stress test file:
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt>
7) UTF-8 Demo:
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt>
8) UTF-8 Sampler:
<http://www.columbia.edu/kermit/utf8.html>
9) Transformation Format for 16 Planes of Group 00 (UTF-16):
ISO/IEC 10646-1:1996 Amendment 1 or ISO/IEC 10646-1:2000 Annex C
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n2005/n2005.pdf>
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-16.html>
10) RFC 2781: UTF-16, an encoding of ISO 10646:
<http://www.ietf.org/rfc/rfc2781.txt>
11) UTF-16 invalid surrogate pairs:
<https://www.unicode.org/faq/utf_bom.html#16>
UTF-16 UTF-8 UCS-4
D83F DFF* F0 9F BF B* 0001FFF*
D87F DFF* F0 AF BF B* 0002FFF*
D8BF DFF* F0 BF BF B* 0003FFF*
D8FF DFF* F1 8F BF B* 0004FFF*
D93F DFF* F1 9F BF B* 0005FFF*
D97F DFF* F1 AF BF B* 0006FFF*
...
DBBF DFF* F3 BF BF B* 000FFFF*
DBFF DFF* F4 8F BF B* 0010FFF*
* = E or F
1010 A
1011 B
1100 C
1101 D
1110 E
1111 F
*/
#define kNumUTF8Sequences 7
#define kMaxUTF8Bytes 4
#define kUTF8ByteSwapNotAChar 0xFFFE
#define kUTF8NotAChar 0xFFFF
#define kMaxUTF8FromUCS4 0x10FFFF
#define kUTF16SurrogatesBegin 0x10000
#define kMaxUTF16FromUCS4 0x10FFFF
/* UTF-16 surrogate pair areas */
#define kUTF16LowSurrogateBegin 0xD800
#define kUTF16LowSurrogateEnd 0xDBFF
#define kUTF16HighSurrogateBegin 0xDC00
#define kUTF16HighSurrogateEnd 0xDFFF
/* offsets into validUTF8 table below */
static const int offsetUTF8Sequences[kMaxUTF8Bytes + 1] =
{
0, /* 1 byte */
1, /* 2 bytes */
2, /* 3 bytes */
4, /* 4 bytes */
kNumUTF8Sequences /* must be last */
};
static const struct validUTF8Sequence
{
uint lowChar;
uint highChar;
int numBytes;
byte validBytes[8];
} validUTF8[kNumUTF8Sequences] =
{
/* low high #bytes byte 1 byte 2 byte 3 byte 4 */
{0x0000, 0x007F, 1, {0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
{0x0080, 0x07FF, 2, {0xC2, 0xDF, 0x80, 0xBF, 0x00, 0x00, 0x00, 0x00}},
{0x0800, 0x0FFF, 3, {0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF, 0x00, 0x00}},
{0x1000, 0xFFFF, 3, {0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF, 0x00, 0x00}},
{0x10000, 0x3FFFF, 4, {0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}},
{0x40000, 0xFFFFF, 4, {0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}},
{0x100000, 0x10FFFF, 4, {0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}}
};
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
TidyInputSource* inp, int* count )
{
byte tempbuf[10];
byte *buf = &tempbuf[0];
uint ch = 0, n = 0;
int i, bytes = 0;
Bool hasError = no;
if ( successorBytes )
buf = (byte*) successorBytes;
/* special check if we have been passed an EOF char */
if ( firstByte == EndOfStream )
{
/* at present */
*c = firstByte;
*count = 1;
return 0;
}
ch = firstByte; /* first byte is passed in separately */
if (ch <= 0x7F) /* 0XXX XXXX one byte */
{
n = ch;
bytes = 1;
}
else if ((ch & 0xE0) == 0xC0) /* 110X XXXX two bytes */
{
n = ch & 31;
bytes = 2;
}
else if ((ch & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
{
n = ch & 15;
bytes = 3;
}
else if ((ch & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
{
n = ch & 7;
bytes = 4;
}
else if ((ch & 0xFC) == 0xF8) /* 1111 10XX five bytes */
{
n = ch & 3;
bytes = 5;
hasError = yes;
}
else if ((ch & 0xFE) == 0xFC) /* 1111 110X six bytes */
{
n = ch & 1;
bytes = 6;
hasError = yes;
}
else
{
/* not a valid first byte of a UTF-8 sequence */
n = ch;
bytes = 1;
hasError = yes;
}
/* successor bytes should have the form 10XX XXXX */
/* If caller supplied buffer, use it. Else see if caller
** supplied an input source, use that.
*/
if ( successorBytes )
{
for ( i=0; i < bytes-1; ++i )
{
if ( !buf[i] || (buf[i] & 0xC0) != 0x80 )
{
hasError = yes;
bytes = i+1;
break;
}
n = (n << 6) | (buf[i] & 0x3F);
}
}
else if ( inp )
{
for ( i=0; i < bytes-1 && !inp->eof(inp->sourceData); ++i )
{
int b = inp->getByte( inp->sourceData );
buf[i] = (tmbchar) b;
/* End of data or illegal successor byte value */
if ( b == EOF || (buf[i] & 0xC0) != 0x80 )
{
hasError = yes;
bytes = i+1;
if ( b != EOF )
inp->ungetByte( inp->sourceData, buf[i] );
break;
}
n = (n << 6) | (buf[i] & 0x3F);
}
}
else if ( bytes > 1 )
{
hasError = yes;
bytes = 1;
}
if (!hasError && ((n == kUTF8ByteSwapNotAChar) || (n == kUTF8NotAChar)))
hasError = yes;
if (!hasError && (n > kMaxUTF8FromUCS4))
hasError = yes;
if (!hasError)
{
int lo, hi;
lo = offsetUTF8Sequences[bytes - 1];
hi = offsetUTF8Sequences[bytes] - 1;
/* check for overlong sequences */
if ((n < validUTF8[lo].lowChar) || (n > validUTF8[hi].highChar))
hasError = yes;
else
{
hasError = yes; /* assume error until proven otherwise */
for (i = lo; i <= hi; i++)
{
int tempCount;
byte theByte;
for (tempCount = 0; tempCount < bytes; tempCount++)
{
if (!tempCount)
theByte = (tmbchar) firstByte;
else
theByte = buf[tempCount - 1];
if ( theByte >= validUTF8[i].validBytes[(tempCount * 2)] &&
theByte <= validUTF8[i].validBytes[(tempCount * 2) + 1] )
hasError = no;
if (hasError)
break;
}
}
}
}
#if 1 && defined(_DEBUG)
if ( hasError )
{
/* debug */
fprintf( stderr, "UTF-8 decoding error of %d bytes : ", bytes );
fprintf( stderr, "0x%02x ", firstByte );
for (i = 1; i < bytes; i++)
fprintf( stderr, "0x%02x ", buf[i - 1] );
fprintf( stderr, " = U+%04X\n", n );
}
#endif
*count = bytes;
*c = n;
if ( hasError )
return -1;
return 0;
}
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
TidyOutputSink* outp, int* count )
{
byte tempbuf[10] = {0};
byte* buf = &tempbuf[0];
int bytes = 0;
Bool hasError = no;
if ( encodebuf )
buf = (byte*) encodebuf;
if (c <= 0x7F) /* 0XXX XXXX one byte */
{
buf[0] = (tmbchar) c;
bytes = 1;
}
else if (c <= 0x7FF) /* 110X XXXX two bytes */
{
buf[0] = (tmbchar) ( 0xC0 | (c >> 6) );
buf[1] = (tmbchar) ( 0x80 | (c & 0x3F) );
bytes = 2;
}
else if (c <= 0xFFFF) /* 1110 XXXX three bytes */
{
buf[0] = (tmbchar) (0xE0 | (c >> 12));
buf[1] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
buf[2] = (tmbchar) (0x80 | (c & 0x3F));
bytes = 3;
if ( c == kUTF8ByteSwapNotAChar || c == kUTF8NotAChar )
hasError = yes;
}
else if (c <= 0x1FFFFF) /* 1111 0XXX four bytes */
{
buf[0] = (tmbchar) (0xF0 | (c >> 18));
buf[1] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
buf[2] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
buf[3] = (tmbchar) (0x80 | (c & 0x3F));
bytes = 4;
if (c > kMaxUTF8FromUCS4)
hasError = yes;
}
else if (c <= 0x3FFFFFF) /* 1111 10XX five bytes */
{
buf[0] = (tmbchar) (0xF8 | (c >> 24));
buf[1] = (tmbchar) (0x80 | (c >> 18));
buf[2] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
buf[3] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
buf[4] = (tmbchar) (0x80 | (c & 0x3F));
bytes = 5;
hasError = yes;
}
else if (c <= 0x7FFFFFFF) /* 1111 110X six bytes */
{
buf[0] = (tmbchar) (0xFC | (c >> 30));
buf[1] = (tmbchar) (0x80 | ((c >> 24) & 0x3F));
buf[2] = (tmbchar) (0x80 | ((c >> 18) & 0x3F));
buf[3] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
buf[4] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
buf[5] = (tmbchar) (0x80 | (c & 0x3F));
bytes = 6;
hasError = yes;
}
else
hasError = yes;
/* don't output invalid UTF-8 byte sequence to a stream */
if ( !hasError && outp != NULL )
{
int ix;
for ( ix=0; ix < bytes; ++ix )
outp->putByte( outp->sinkData, buf[ix] );
}
#if 1 && defined(_DEBUG)
if ( hasError )
{
int i;
fprintf( stderr, "UTF-8 encoding error for U+%x : ", c );
for (i = 0; i < bytes; i++)
fprintf( stderr, "0x%02x ", buf[i] );
fprintf( stderr, "\n" );
}
#endif
*count = bytes;
if (hasError)
return -1;
return 0;
}
/* return one less than the number of bytes used by the UTF-8 byte sequence */
/* str points to the UTF-8 byte sequence */
/* the Unicode char is returned in *ch */
uint TY_(GetUTF8)( ctmbstr str, uint *ch )
{
uint n;
int bytes;
int err;
bytes = 0;
/* first byte "str[0]" is passed in separately from the */
/* rest of the UTF-8 byte sequence starting at "str[1]" */
err = TY_(DecodeUTF8BytesToChar)( &n, str[0], str+1, NULL, &bytes );
if (err)
{
#if 1 && defined(_DEBUG)
fprintf(stderr, "pprint UTF-8 decoding error for U+%x : ", n);
#endif
n = 0xFFFD; /* replacement char */
}
*ch = n;
return bytes - 1;
}
/* store char c as UTF-8 encoded byte stream */
tmbstr TY_(PutUTF8)( tmbstr buf, uint c )
{
int err, count = 0;
err = TY_(EncodeCharToUTF8Bytes)( c, buf, NULL, &count );
if (err)
{
#if 1 && defined(_DEBUG)
fprintf(stderr, "pprint UTF-8 encoding error for U+%x : ", c);
#endif
/* replacement char 0xFFFD encoded as UTF-8 */
buf[0] = (byte) 0xEF;
buf[1] = (byte) 0xBF;
buf[2] = (byte) 0xBD;
count = 3;
}
buf += count;
return buf;
}
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 )
{
return ( ucs4 <= kMaxUTF16FromUCS4 );
}
Bool TY_(IsHighSurrogate)( tchar ch )
{
return ( ch >= kUTF16HighSurrogateBegin && ch <= kUTF16HighSurrogateEnd );
}
Bool TY_(IsLowSurrogate)( tchar ch )
{
return ( ch >= kUTF16LowSurrogateBegin && ch <= kUTF16LowSurrogateEnd );
}
tchar TY_(CombineSurrogatePair)( tchar high, tchar low )
{
assert( TY_(IsHighSurrogate)(high) && TY_(IsLowSurrogate)(low) );
return ( ((low - kUTF16LowSurrogateBegin) * 0x400) +
high - kUTF16HighSurrogateBegin + 0x10000 );
}
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* low, tchar* high )
{
Bool status = ( TY_(IsValidCombinedChar)( utf16 ) && high && low );
if ( status )
{
*low = (utf16 - kUTF16SurrogatesBegin) / 0x400 + kUTF16LowSurrogateBegin;
*high = (utf16 - kUTF16SurrogatesBegin) % 0x400 + kUTF16HighSurrogateBegin;
}
return status;
}
Bool TY_(IsValidCombinedChar)( tchar ch )
{
return ( ch >= kUTF16SurrogatesBegin &&
(ch & 0x0000FFFE) != 0x0000FFFE &&
(ch & 0x0000FFFF) != 0x0000FFFF );
}
Bool TY_(IsCombinedChar)( tchar ch )
{
return ( ch >= kUTF16SurrogatesBegin );
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

48
third_party/tidy/utf8.h vendored Normal file
View file

@ -0,0 +1,48 @@
#ifndef __UTF8_H__
#define __UTF8_H__
/* clang-format off */
/* utf8.h -- convert characters to/from UTF-8
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#include "third_party/tidy/tidyplatform.h"
#include "third_party/tidy/access.h"
#include "third_party/tidy/tidybuffio.h"
/* UTF-8 encoding/decoding support
** Does not convert character "codepoints", i.e. to/from 10646.
*/
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
TidyInputSource* inp, int* count );
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
TidyOutputSink* outp, int* count );
uint TY_(GetUTF8)( ctmbstr str, uint *ch );
tmbstr TY_(PutUTF8)( tmbstr buf, uint c );
#define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */
#define UNICODE_BOM UNICODE_BOM_BE
#define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
Bool TY_(IsHighSurrogate)( tchar ch );
Bool TY_(IsLowSurrogate)( tchar ch );
Bool TY_(IsCombinedChar)( tchar ch );
Bool TY_(IsValidCombinedChar)( tchar ch );
tchar TY_(CombineSurrogatePair)( tchar high, tchar low );
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );
#endif /* __UTF8_H__ */

24
third_party/tidy/version.inc vendored Normal file
View file

@ -0,0 +1,24 @@
/* clang-format off */
/* version information
(c) 2007-2015 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
*/
#ifdef RELEASE_DATE
static const char TY_(release_date)[] = RELEASE_DATE;
#else
static const char TY_(release_date)[] = "2015/01/22";
#endif
#ifdef LIBTIDY_VERSION
#ifdef RC_NUMBER
static const char TY_(library_version)[] = LIBTIDY_VERSION "." RC_NUMBER;
#else
static const char TY_(library_version)[] = LIBTIDY_VERSION;
#endif
#else
static const char TY_(library_version)[] = "5.0.0";
#endif
/* eof */