mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-24 06:49:02 +00:00
Add w3c html tidy
This commit is contained in:
parent
ecc8962555
commit
3c7ae0fc72
63 changed files with 56239 additions and 0 deletions
Makefile
libc/isystem/sys
third_party
third_party.mk
tidy
.tidyrcLICENSE.mdREADME.cosmoaccess.caccess.halloc.cattrdict.cattrdict.hattrs.cattrs.hbuffio.ccharsets.ccharsets.hclean.cclean.hconfig.cconfig.hentities.centities.hfileio.cfileio.hforward.hgdoc.cgdoc.histack.clanguage.clanguage.hlanguage_en.inclexer.clexer.hmappedio.cmappedio.hmessage.cmessage.hmessageobj.cmessageobj.hparser.cparser.hpprint.cpprint.hsprtf.csprtf.hstreamio.cstreamio.htagask.ctags.ctags.htidy-int.htidy.ctidy.htidy.mktidybuffio.htidyenum.htidylib.ctidyplatform.htmbstr.ctmbstr.hutf8.cutf8.hversion.inc
1
Makefile
1
Makefile
|
@ -138,6 +138,7 @@ include net/http/http.mk # │
|
|||
include third_party/mbedtls/mbedtls.mk # │
|
||||
include net/https/https.mk # │
|
||||
include third_party/regex/regex.mk #─┘
|
||||
include third_party/tidy/tidy.mk
|
||||
include third_party/third_party.mk
|
||||
include libc/testlib/testlib.mk
|
||||
include tool/viz/lib/vizlib.mk
|
||||
|
|
|
@ -7,4 +7,12 @@
|
|||
#include "libc/sysv/consts/s.h"
|
||||
#include "libc/sysv/consts/utime.h"
|
||||
#include "libc/time/time.h"
|
||||
|
||||
#define st_atime st_atim.tv_sec
|
||||
#define st_atime_nsec st_atim.tv_nsec
|
||||
#define st_mtime st_mtim.tv_sec
|
||||
#define st_mtime_nsec st_mtim.tv_nsec
|
||||
#define st_ctime st_ctim.tv_sec
|
||||
#define st_ctime_nsec st_ctim.tv_nsec
|
||||
|
||||
#endif
|
||||
|
|
1
third_party/third_party.mk
vendored
1
third_party/third_party.mk
vendored
|
@ -24,6 +24,7 @@ o/$(MODE)/third_party: \
|
|||
o/$(MODE)/third_party/smallz4 \
|
||||
o/$(MODE)/third_party/sqlite3 \
|
||||
o/$(MODE)/third_party/stb \
|
||||
o/$(MODE)/third_party/tidy \
|
||||
o/$(MODE)/third_party/xed \
|
||||
o/$(MODE)/third_party/zip \
|
||||
o/$(MODE)/third_party/zlib
|
||||
|
|
101
third_party/tidy/.tidyrc
vendored
Normal file
101
third_party/tidy/.tidyrc
vendored
Normal file
|
@ -0,0 +1,101 @@
|
|||
# http://tidy.sourceforge.net/docs/quickref.html#clean
|
||||
accessibility-check: 0
|
||||
add-meta-charset: yes
|
||||
add-xml-decl: no
|
||||
add-xml-space: no
|
||||
alt-text:
|
||||
anchor-as-name: yes
|
||||
ascii-chars: no
|
||||
assume-xml-procins: no
|
||||
bare: no
|
||||
break-before-br: no
|
||||
char-encoding: utf8
|
||||
clean: yes
|
||||
coerce-endtags: yes
|
||||
css-prefix: c
|
||||
custom-tags: no
|
||||
decorate-inferred-ul: no
|
||||
doctype: auto
|
||||
drop-empty-elements: yes
|
||||
drop-empty-paras: yes
|
||||
drop-proprietary-attributes: no
|
||||
enclose-block-text: yes
|
||||
enclose-text: yes
|
||||
error-file:
|
||||
escape-cdata: no
|
||||
escape-scripts: yes
|
||||
fix-backslash: yes
|
||||
fix-bad-comments: auto
|
||||
fix-style-tags: no
|
||||
fix-uri: yes
|
||||
force-output: no
|
||||
gdoc: no
|
||||
gnu-emacs: yes
|
||||
hide-comments: no
|
||||
indent-attributes: no
|
||||
indent-cdata: no
|
||||
indent-spaces: 2
|
||||
indent-with-tabs: no
|
||||
indent: no
|
||||
input-encoding: utf8
|
||||
input-xml: no
|
||||
join-classes: no
|
||||
join-styles: yes
|
||||
keep-tabs: no
|
||||
keep-time: no
|
||||
literal-attributes: no
|
||||
logical-emphasis: no
|
||||
lower-literals: yes
|
||||
markup: yes
|
||||
merge-divs: auto
|
||||
merge-emphasis: yes
|
||||
merge-spans: auto
|
||||
mute-id: no
|
||||
mute:
|
||||
ncr: yes
|
||||
new-blocklevel-tags:
|
||||
new-empty-tags:
|
||||
new-inline-tags:
|
||||
new-pre-tags:
|
||||
newline: LF
|
||||
numeric-entities: no
|
||||
omit-optional-tags: yes
|
||||
output-bom: auto
|
||||
output-encoding: utf8
|
||||
output-file:
|
||||
output-html: no
|
||||
output-xhtml: no
|
||||
output-xml: no
|
||||
preserve-entities: no
|
||||
priority-attributes:
|
||||
punctuation-wrap: no
|
||||
quiet: no
|
||||
quote-ampersand: yes
|
||||
quote-marks: no
|
||||
quote-nbsp: yes
|
||||
repeated-attributes: keep-last
|
||||
replace-color: no
|
||||
show-body-only: no
|
||||
show-errors: 10
|
||||
show-filename: no
|
||||
show-info: yes
|
||||
show-meta-change: no
|
||||
show-warnings: yes
|
||||
skip-nested: yes
|
||||
sort-attributes: none
|
||||
strict-tags-attributes: no
|
||||
tab-size: 8
|
||||
tidy-mark: no
|
||||
uppercase-attributes: no
|
||||
uppercase-tags: no
|
||||
vertical-space: yes
|
||||
warn-proprietary-attributes: no
|
||||
word-2000: no
|
||||
wrap-asp: yes
|
||||
wrap-attributes: no
|
||||
wrap-jste: yes
|
||||
wrap-php: no
|
||||
wrap-script-literals: no
|
||||
wrap-sections: yes
|
||||
wrap: 68
|
||||
write-back: no
|
50
third_party/tidy/LICENSE.md
vendored
Normal file
50
third_party/tidy/LICENSE.md
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
# HTML Tidy
|
||||
|
||||
## HTML parser and pretty printer
|
||||
|
||||
Copyright (c) 1998-2016 World Wide Web Consortium
|
||||
(Massachusetts Institute of Technology, European Research
|
||||
Consortium for Informatics and Mathematics, Keio University).
|
||||
All Rights Reserved.
|
||||
|
||||
Additional contributions (c) 2001-2016 University of Toronto, Terry Teague,
|
||||
@geoffmcl, HTACG, and others.
|
||||
|
||||
### Contributing Author(s):
|
||||
|
||||
Dave Raggett <dsr@w3.org>
|
||||
|
||||
The contributing author(s) would like to thank all those who
|
||||
helped with testing, bug fixes and suggestions for improvements.
|
||||
This wouldn't have been possible without your help.
|
||||
|
||||
## COPYRIGHT NOTICE:
|
||||
|
||||
This software and documentation is provided "as is," and
|
||||
the copyright holders and contributing author(s) make no
|
||||
representations or warranties, express or implied, including
|
||||
but not limited to, warranties of merchantability or fitness
|
||||
for any particular purpose or that the use of the software or
|
||||
documentation will not infringe any third party patents,
|
||||
copyrights, trademarks or other rights.
|
||||
|
||||
The copyright holders and contributing author(s) will not be held
|
||||
liable for any direct, indirect, special or consequential damages
|
||||
arising out of any use of the software or documentation, even if
|
||||
advised of the possibility of such damage.
|
||||
|
||||
Permission is hereby granted to use, copy, modify, and distribute
|
||||
this source code, or portions hereof, documentation and executables,
|
||||
for any purpose, without fee, subject to the following restrictions:
|
||||
|
||||
1. The origin of this source code must not be misrepresented.
|
||||
2. Altered versions must be plainly marked as such and must
|
||||
not be misrepresented as being the original source.
|
||||
3. This Copyright notice may not be removed or altered from any
|
||||
source or altered source distribution.
|
||||
|
||||
The copyright holders and contributing author(s) specifically
|
||||
permit, without fee, and encourage the use of this source code
|
||||
as a component for supporting the Hypertext Markup Language in
|
||||
commercial products. If you use this source code in a product,
|
||||
acknowledgement is not required but would be appreciated.
|
31
third_party/tidy/README.cosmo
vendored
Normal file
31
third_party/tidy/README.cosmo
vendored
Normal file
|
@ -0,0 +1,31 @@
|
|||
DESCRIPTION
|
||||
|
||||
HTML Tidy is a tool for spotting errors in HTML code. It's also able
|
||||
to reformat source code in a configurable manner. It has outstanding
|
||||
support for HTML5.
|
||||
|
||||
PROVENANCE
|
||||
|
||||
https://github.com/htacg/tidy-html5/
|
||||
Commit: d08ddc2860aa95ba8e301343a30837f157977cba
|
||||
Author: Jim Derry <balthisar@gmail.com>
|
||||
Date: Tue Jan 25 10:17:15 2022 -0500
|
||||
|
||||
LICENSE
|
||||
|
||||
W3C License
|
||||
This is a permissive license that only requires notice preservation in
|
||||
sources. https://en.wikipedia.org/wiki/W3C_Software_Notice_and_License
|
||||
Similar to the Apache 2.0 license any local changes must be documented
|
||||
|
||||
LOCAL CHANGES
|
||||
|
||||
No changes to tidy program. Only platform normalization.
|
||||
|
||||
- Obtain .tidyrc from /zip/.tidyrc rather than /etc/tidrc
|
||||
- Configure tidyplatform.h
|
||||
- Rename a few .h files to .inc
|
||||
- Normalize header includes for cosmopolitan repo
|
||||
- Delete ugly no-op macros, e.g. TIDY_CALL, TIDY_EXPORT, etc.
|
||||
- Add clang-format off directives
|
||||
- Delete trailing whitespace
|
3542
third_party/tidy/access.c
vendored
Normal file
3542
third_party/tidy/access.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
91
third_party/tidy/access.h
vendored
Normal file
91
third_party/tidy/access.h
vendored
Normal file
|
@ -0,0 +1,91 @@
|
|||
#ifndef __ACCESS_H__
|
||||
#define __ACCESS_H__
|
||||
/* clang-format off */
|
||||
|
||||
/*********************************************************************
|
||||
* carry out accessibility checks
|
||||
*
|
||||
* This module carries out processes for all accessibility checks. It
|
||||
* traverses through all the content within the tree and evaluates the
|
||||
* tags for accessibility.
|
||||
*
|
||||
* To perform the following checks, 'AccessibilityChecks' must be
|
||||
* called AFTER the tree structure has been formed.
|
||||
*
|
||||
* If, in the command prompt or configuration file, there is no
|
||||
* specification of which accessibility priorities to check, then no
|
||||
* accessibility checks will be performed.
|
||||
*
|
||||
* The accessibility checks to perform depending on user's desire:
|
||||
* 1. priority 1
|
||||
* 2. priority 1 & 2
|
||||
* 3. priority 1, 2, & 3
|
||||
*
|
||||
* Reference document: https://www.w3.org/TR/WAI-WEBCONTENT/
|
||||
*
|
||||
* Copyright University of Toronto
|
||||
* Portions (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
* See `tidy.h` for the copyright notice.
|
||||
* Programmed by: Mike Lam and Chris Ridpath
|
||||
* Modifications by: Terry Teague (TRT)
|
||||
* Further modifications: consult git log.
|
||||
*********************************************************************/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
|
||||
enum {
|
||||
TEXTBUF_SIZE=128u
|
||||
};
|
||||
|
||||
struct _TidyAccessImpl;
|
||||
typedef struct _TidyAccessImpl TidyAccessImpl;
|
||||
|
||||
struct _TidyAccessImpl
|
||||
{
|
||||
/* gets set from Tidy variable AccessibilityCheckLevel */
|
||||
int PRIORITYCHK; /**< */
|
||||
|
||||
/* Number of characters that are found within the concatenated text */
|
||||
int counter;
|
||||
|
||||
/* list of characters in the text nodes found within a container element */
|
||||
tmbchar textNode[ TEXTBUF_SIZE ];
|
||||
|
||||
/* The list of characters found within one text node */
|
||||
tmbchar text[ TEXTBUF_SIZE ];
|
||||
|
||||
/* Number of frame elements found within a frameset */
|
||||
int numFrames;
|
||||
|
||||
/* Number of 'longdesc' attributes found within a frameset */
|
||||
int HasCheckedLongDesc;
|
||||
|
||||
int CheckedHeaders;
|
||||
int ListElements;
|
||||
int OtherListElements;
|
||||
|
||||
/* For 'USEMAP' identifier */
|
||||
Bool HasUseMap;
|
||||
Bool HasName;
|
||||
Bool HasMap;
|
||||
|
||||
/* For tracking nodes that are deleted from the original parse tree - TRT */
|
||||
/* Node *access_tree; */
|
||||
|
||||
Bool HasTH;
|
||||
Bool HasValidFor;
|
||||
Bool HasValidId;
|
||||
Bool HasValidRowHeaders;
|
||||
Bool HasValidColumnHeaders;
|
||||
Bool HasInvalidRowHeader;
|
||||
Bool HasInvalidColumnHeader;
|
||||
int ForID;
|
||||
|
||||
};
|
||||
|
||||
|
||||
void TY_(AccessibilityChecks)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
#endif /* __ACCESS_H__ */
|
130
third_party/tidy/alloc.c
vendored
Normal file
130
third_party/tidy/alloc.c
vendored
Normal file
|
@ -0,0 +1,130 @@
|
|||
/* clang-format off */
|
||||
/* clang-format off */
|
||||
/* alloc.c -- Default memory allocation routines.
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
/* #define DEBUG_MEMORY very NOISY extra DEBUG of memory allocation, reallocation and free */
|
||||
|
||||
#include "third_party/tidy/tidy.h"
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/assert.h"
|
||||
#include "third_party/tidy/sprtf.h"
|
||||
|
||||
static TidyMalloc g_malloc = NULL;
|
||||
static TidyRealloc g_realloc = NULL;
|
||||
static TidyFree g_free = NULL;
|
||||
static TidyPanic g_panic = NULL;
|
||||
|
||||
#if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_MEMORY)
|
||||
static int alloccnt = 0;
|
||||
static int realloccnt = 0;
|
||||
static int freecnt = 0;
|
||||
#endif
|
||||
|
||||
|
||||
Bool tidySetMallocCall( TidyMalloc fmalloc )
|
||||
{
|
||||
g_malloc = fmalloc;
|
||||
return yes;
|
||||
}
|
||||
Bool tidySetReallocCall( TidyRealloc frealloc )
|
||||
{
|
||||
g_realloc = frealloc;
|
||||
return yes;
|
||||
}
|
||||
Bool tidySetFreeCall( TidyFree ffree )
|
||||
{
|
||||
g_free = ffree;
|
||||
return yes;
|
||||
}
|
||||
Bool tidySetPanicCall( TidyPanic fpanic )
|
||||
{
|
||||
g_panic = fpanic;
|
||||
return yes;
|
||||
}
|
||||
|
||||
static void defaultPanic( TidyAllocator* ARG_UNUSED(allocator), ctmbstr msg )
|
||||
{
|
||||
if ( g_panic )
|
||||
g_panic( msg );
|
||||
else
|
||||
{
|
||||
/* 2 signifies a serious error */
|
||||
fprintf( stderr, "Fatal error: %s\n", msg );
|
||||
#ifdef _DEBUG
|
||||
assert(0);
|
||||
#endif
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
static void* defaultAlloc( TidyAllocator* allocator, size_t size )
|
||||
{
|
||||
void *p = ( g_malloc ? g_malloc(size) : malloc(size) );
|
||||
if ( !p )
|
||||
defaultPanic( allocator,"Out of memory!");
|
||||
#if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_MEMORY)
|
||||
alloccnt++;
|
||||
SPRTF("%d: alloc MEM %p, size %d\n", alloccnt, p, (int)size );
|
||||
if (size == 0) {
|
||||
SPRTF("NOTE: An allocation of ZERO bytes!!!!!!\n");
|
||||
}
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
static void* defaultRealloc( TidyAllocator* allocator, void* mem, size_t newsize )
|
||||
{
|
||||
void *p;
|
||||
if ( mem == NULL )
|
||||
return defaultAlloc( allocator, newsize );
|
||||
|
||||
p = ( g_realloc ? g_realloc(mem, newsize) : realloc(mem, newsize) );
|
||||
if (!p)
|
||||
defaultPanic( allocator, "Out of memory!");
|
||||
#if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_MEMORY)
|
||||
realloccnt++;
|
||||
SPRTF("%d: realloc MEM %p, size %d\n", realloccnt, p, (int)newsize );
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
static void defaultFree( TidyAllocator* ARG_UNUSED(allocator), void* mem )
|
||||
{
|
||||
if ( mem )
|
||||
{
|
||||
#if defined(ENABLE_DEBUG_LOG) && defined(DEBUG_MEMORY)
|
||||
freecnt++;
|
||||
SPRTF("%d: free MEM %p\n", freecnt, mem );
|
||||
#endif
|
||||
if ( g_free )
|
||||
g_free( mem );
|
||||
else
|
||||
free( mem );
|
||||
}
|
||||
}
|
||||
|
||||
static const TidyAllocatorVtbl defaultVtbl = {
|
||||
defaultAlloc,
|
||||
defaultRealloc,
|
||||
defaultFree,
|
||||
defaultPanic
|
||||
};
|
||||
|
||||
TidyAllocator TY_(g_default_allocator) = {
|
||||
&defaultVtbl
|
||||
};
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
3670
third_party/tidy/attrdict.c
vendored
Normal file
3670
third_party/tidy/attrdict.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
160
third_party/tidy/attrdict.h
vendored
Normal file
160
third_party/tidy/attrdict.h
vendored
Normal file
|
@ -0,0 +1,160 @@
|
|||
#ifndef __ATTRDICT_H__
|
||||
#define __ATTRDICT_H__
|
||||
/* clang-format off */
|
||||
|
||||
/* attrdict.h -- extended attribute information
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/access.h"
|
||||
#include "third_party/tidy/tidy.h"
|
||||
|
||||
typedef struct _AttrVersion
|
||||
{
|
||||
TidyAttrId attribute;
|
||||
uint versions;
|
||||
} AttrVersion;
|
||||
|
||||
extern const AttrVersion TY_(W3CAttrsFor_A)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ABBR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ACRONYM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ADDRESS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_APPLET)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_AREA)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_B)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BASE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BASEFONT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BDO)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BIG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BLOCKQUOTE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BODY)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BUTTON)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CAPTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CENTER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CITE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CODE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_COL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_COLGROUP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DEL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DFN)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DIR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DIV)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_EM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FIELDSET)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FONT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FORM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FRAME)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FRAMESET)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H1)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H2)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H3)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H4)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H5)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H6)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HEAD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HTML)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_I)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_IFRAME)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_IMG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_INPUT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_INS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ISINDEX)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_KBD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LABEL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LEGEND)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LI)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LINK)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LISTING)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MAP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MATHML)[]; /* [i_a]2 */
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MENU)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_META)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NEXTID)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NOFRAMES)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NOSCRIPT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OBJECT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OPTGROUP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OPTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_P)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PARAM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PICTURE)[]; /* Issue #151 - html5 */
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PLAINTEXT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PRE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_Q)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RB)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RBC)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RTC)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RUBY)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_S)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SAMP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SCRIPT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SELECT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SMALL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SPAN)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_STRIKE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_STRONG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_STYLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SUB)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SUP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SVG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TABLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TBODY)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TEXTAREA)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TFOOT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TH)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_THEAD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TITLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_U)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_UL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_VAR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_XMP)[];
|
||||
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TRACK)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SUMMARY)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FIGCAPTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HGROUP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BDI)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NAV)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SECTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HEADER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DIALOG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MAIN)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MARK)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MENUITEM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_METER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SLOT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TEMPLATE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TIME)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DATA)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DATALIST)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_EMBED)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_KEYGEN)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_WBR)[];
|
||||
|
||||
#endif /* __ATTRDICT_H__ */
|
2780
third_party/tidy/attrs.c
vendored
Normal file
2780
third_party/tidy/attrs.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
487
third_party/tidy/attrs.h
vendored
Normal file
487
third_party/tidy/attrs.h
vendored
Normal file
|
@ -0,0 +1,487 @@
|
|||
#ifndef __ATTRS_H__
|
||||
#define __ATTRS_H__
|
||||
/* clang-format off */
|
||||
|
||||
/* attrs.h -- recognize HTML attributes
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
/* declaration for methods that check attribute values */
|
||||
typedef void (AttrCheck)(TidyDocImpl* doc, Node *node, AttVal *attval);
|
||||
|
||||
struct _Attribute
|
||||
{
|
||||
TidyAttrId id;
|
||||
tmbstr name;
|
||||
AttrCheck* attrchk;
|
||||
|
||||
struct _Attribute* next;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Anchor/Node linked list
|
||||
*/
|
||||
|
||||
struct _Anchor
|
||||
{
|
||||
struct _Anchor *next;
|
||||
Node *node;
|
||||
char *name;
|
||||
};
|
||||
|
||||
typedef struct _Anchor Anchor;
|
||||
|
||||
enum
|
||||
{
|
||||
ATTRIBUTE_HASH_SIZE=178u
|
||||
};
|
||||
|
||||
struct _AttrHash
|
||||
{
|
||||
Attribute const* attr;
|
||||
struct _AttrHash* next;
|
||||
};
|
||||
|
||||
typedef struct _AttrHash AttrHash;
|
||||
|
||||
enum
|
||||
{
|
||||
ANCHOR_HASH_SIZE=1021u
|
||||
};
|
||||
|
||||
/* Keeps a list of attributes that are sorted ahead of the others. */
|
||||
typedef struct _priorityAttribs {
|
||||
tmbstr* list;
|
||||
uint count;
|
||||
uint capacity;
|
||||
} PriorityAttribs;
|
||||
|
||||
struct _TidyAttribImpl
|
||||
{
|
||||
/* anchor/node lookup */
|
||||
Anchor* anchor_hash[ANCHOR_HASH_SIZE];
|
||||
|
||||
/* Declared literal attributes */
|
||||
Attribute* declared_attr_list;
|
||||
|
||||
/* Prioritized list of attributes to write */
|
||||
PriorityAttribs priorityAttribs;
|
||||
|
||||
AttrHash* hashtab[ATTRIBUTE_HASH_SIZE];
|
||||
};
|
||||
|
||||
typedef struct _TidyAttribImpl TidyAttribImpl;
|
||||
|
||||
#define XHTML_NAMESPACE "http://www.w3.org/1999/xhtml"
|
||||
|
||||
AttrCheck TY_(CheckUrl);
|
||||
|
||||
/* public method for finding attribute definition by name */
|
||||
const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval );
|
||||
|
||||
const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
|
||||
|
||||
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
|
||||
|
||||
void TY_(DropAttrByName)( TidyDocImpl* doc, Node *node, ctmbstr name );
|
||||
|
||||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
||||
Node *node, ctmbstr name, ctmbstr value );
|
||||
|
||||
AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value);
|
||||
|
||||
/* Add an item to the list of priority attributes to write first. */
|
||||
void TY_(DefinePriorityAttribute)(TidyDocImpl* doc, ctmbstr name);
|
||||
|
||||
/* Start an iterator for priority attributes. */
|
||||
TidyIterator TY_(getPriorityAttrList)( TidyDocImpl* doc );
|
||||
|
||||
/* Get the next priority attribute. */
|
||||
ctmbstr TY_(getNextPriorityAttr)( TidyDocImpl* doc, TidyIterator* iter );
|
||||
|
||||
Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname );
|
||||
|
||||
/* Bool IsBool( TidyDocImpl* doc, ctmbstr attrname ); */
|
||||
|
||||
Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname );
|
||||
|
||||
/* may id or name serve as anchor? */
|
||||
Bool TY_(IsAnchorElement)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/*
|
||||
In CSS1, selectors can contain only the characters A-Z, 0-9, and
|
||||
Unicode characters 161-255, plus dash (-); they cannot start with
|
||||
a dash or a digit; they can also contain escaped characters and any
|
||||
Unicode character as a numeric code (see next item).
|
||||
|
||||
The backslash followed by at most four hexadecimal digits (0..9A..F)
|
||||
stands for the Unicode character with that number.
|
||||
|
||||
Any character except a hexadecimal digit can be escaped to remove its
|
||||
special meaning, by putting a backslash in front.
|
||||
|
||||
#508936 - CSS class naming for -clean option
|
||||
*/
|
||||
Bool TY_(IsCSS1Selector)( ctmbstr buf );
|
||||
|
||||
Bool TY_(IsValidHTMLID)(ctmbstr id);
|
||||
Bool TY_(IsValidXMLID)(ctmbstr id);
|
||||
|
||||
/* removes anchor for specific node */
|
||||
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, ctmbstr name, Node *node );
|
||||
|
||||
/* free all anchors */
|
||||
void TY_(FreeAnchors)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/* public methods for inititializing/freeing attribute dictionary */
|
||||
void TY_(InitAttrs)( TidyDocImpl* doc );
|
||||
void TY_(FreeAttrTable)( TidyDocImpl* doc );
|
||||
|
||||
void TY_(FreeAttrPriorityList)( TidyDocImpl* doc );
|
||||
|
||||
void TY_(AppendToClassAttr)( TidyDocImpl* doc, AttVal *classattr, ctmbstr classname );
|
||||
/*
|
||||
the same attribute name can't be used
|
||||
more than once in each element
|
||||
*/
|
||||
void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node* node, Bool isXml );
|
||||
void TY_(SortAttributes)(TidyDocImpl* doc, Node* node, TidyAttrSortStrategy strat);
|
||||
|
||||
Bool TY_(IsBoolAttribute)( AttVal* attval );
|
||||
Bool TY_(attrIsEvent)( AttVal* attval );
|
||||
|
||||
AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id );
|
||||
|
||||
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
|
||||
|
||||
Bool TY_(AttributeIsProprietary)(Node* node, AttVal* attval);
|
||||
Bool TY_(AttributeIsMismatched)(Node* node, AttVal* attval, TidyDocImpl* doc);
|
||||
|
||||
|
||||
/* 0 == TidyAttr_UNKNOWN */
|
||||
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN)
|
||||
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid))
|
||||
|
||||
#define AttrHasValue(attr) ((attr) && (attr)->value)
|
||||
#define AttrValueIs(attr, val) (AttrHasValue(attr) && \
|
||||
TY_(tmbstrcasecmp)((attr)->value, val) == 0)
|
||||
#define AttrContains(attr, val) (AttrHasValue(attr) && \
|
||||
TY_(tmbsubstr)((attr)->value, val) != NULL)
|
||||
#define AttrVersions(attr) ((attr) && (attr)->dict ? (attr)->dict->versions : VERS_PROPRIETARY)
|
||||
|
||||
#define AttrsHaveSameId(a, b) (a && b && a->dict && b->dict && a->dict->id && \
|
||||
b->dict->id && a->dict->id == b->dict->id)
|
||||
|
||||
#define attrIsABBR(av) AttrIsId( av, TidyAttr_ABBR )
|
||||
#define attrIsACCEPT(av) AttrIsId( av, TidyAttr_ACCEPT )
|
||||
#define attrIsACCEPT_CHARSET(av) AttrIsId( av, TidyAttr_ACCEPT_CHARSET )
|
||||
#define attrIsACCESSKEY(av) AttrIsId( av, TidyAttr_ACCESSKEY )
|
||||
#define attrIsACTION(av) AttrIsId( av, TidyAttr_ACTION )
|
||||
#define attrIsADD_DATE(av) AttrIsId( av, TidyAttr_ADD_DATE )
|
||||
#define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN )
|
||||
#define attrIsALINK(av) AttrIsId( av, TidyAttr_ALINK )
|
||||
#define attrIsALT(av) AttrIsId( av, TidyAttr_ALT )
|
||||
#define attrIsARCHIVE(av) AttrIsId( av, TidyAttr_ARCHIVE )
|
||||
#define attrIsAXIS(av) AttrIsId( av, TidyAttr_AXIS )
|
||||
#define attrIsBACKGROUND(av) AttrIsId( av, TidyAttr_BACKGROUND )
|
||||
#define attrIsBGCOLOR(av) AttrIsId( av, TidyAttr_BGCOLOR )
|
||||
#define attrIsBGPROPERTIES(av) AttrIsId( av, TidyAttr_BGPROPERTIES )
|
||||
#define attrIsBORDER(av) AttrIsId( av, TidyAttr_BORDER )
|
||||
#define attrIsBORDERCOLOR(av) AttrIsId( av, TidyAttr_BORDERCOLOR )
|
||||
#define attrIsBOTTOMMARGIN(av) AttrIsId( av, TidyAttr_BOTTOMMARGIN )
|
||||
#define attrIsCELLPADDING(av) AttrIsId( av, TidyAttr_CELLPADDING )
|
||||
#define attrIsCELLSPACING(av) AttrIsId( av, TidyAttr_CELLSPACING )
|
||||
#define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET )
|
||||
#define attrIsCHAR(av) AttrIsId( av, TidyAttr_CHAR )
|
||||
#define attrIsCHAROFF(av) AttrIsId( av, TidyAttr_CHAROFF )
|
||||
#define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET )
|
||||
#define attrIsCHECKED(av) AttrIsId( av, TidyAttr_CHECKED )
|
||||
#define attrIsCITE(av) AttrIsId( av, TidyAttr_CITE )
|
||||
#define attrIsCLASS(av) AttrIsId( av, TidyAttr_CLASS )
|
||||
#define attrIsCLASSID(av) AttrIsId( av, TidyAttr_CLASSID )
|
||||
#define attrIsCLEAR(av) AttrIsId( av, TidyAttr_CLEAR )
|
||||
#define attrIsCODE(av) AttrIsId( av, TidyAttr_CODE )
|
||||
#define attrIsCODEBASE(av) AttrIsId( av, TidyAttr_CODEBASE )
|
||||
#define attrIsCODETYPE(av) AttrIsId( av, TidyAttr_CODETYPE )
|
||||
#define attrIsCOLOR(av) AttrIsId( av, TidyAttr_COLOR )
|
||||
#define attrIsCOLS(av) AttrIsId( av, TidyAttr_COLS )
|
||||
#define attrIsCOLSPAN(av) AttrIsId( av, TidyAttr_COLSPAN )
|
||||
#define attrIsCOMPACT(av) AttrIsId( av, TidyAttr_COMPACT )
|
||||
#define attrIsCONTENT(av) AttrIsId( av, TidyAttr_CONTENT )
|
||||
#define attrIsCOORDS(av) AttrIsId( av, TidyAttr_COORDS )
|
||||
#define attrIsDATA(av) AttrIsId( av, TidyAttr_DATA )
|
||||
#define attrIsDATAFLD(av) AttrIsId( av, TidyAttr_DATAFLD )
|
||||
#define attrIsDATAFORMATAS(av) AttrIsId( av, TidyAttr_DATAFORMATAS )
|
||||
#define attrIsDATAPAGESIZE(av) AttrIsId( av, TidyAttr_DATAPAGESIZE )
|
||||
#define attrIsDATASRC(av) AttrIsId( av, TidyAttr_DATASRC )
|
||||
#define attrIsDATETIME(av) AttrIsId( av, TidyAttr_DATETIME )
|
||||
#define attrIsDECLARE(av) AttrIsId( av, TidyAttr_DECLARE )
|
||||
#define attrIsDEFER(av) AttrIsId( av, TidyAttr_DEFER )
|
||||
#define attrIsDIR(av) AttrIsId( av, TidyAttr_DIR )
|
||||
#define attrIsDISABLED(av) AttrIsId( av, TidyAttr_DISABLED )
|
||||
#define attrIsENCODING(av) AttrIsId( av, TidyAttr_ENCODING )
|
||||
#define attrIsENCTYPE(av) AttrIsId( av, TidyAttr_ENCTYPE )
|
||||
#define attrIsFACE(av) AttrIsId( av, TidyAttr_FACE )
|
||||
#define attrIsFOR(av) AttrIsId( av, TidyAttr_FOR )
|
||||
#define attrIsFRAME(av) AttrIsId( av, TidyAttr_FRAME )
|
||||
#define attrIsFRAMEBORDER(av) AttrIsId( av, TidyAttr_FRAMEBORDER )
|
||||
#define attrIsFRAMESPACING(av) AttrIsId( av, TidyAttr_FRAMESPACING )
|
||||
#define attrIsGRIDX(av) AttrIsId( av, TidyAttr_GRIDX )
|
||||
#define attrIsGRIDY(av) AttrIsId( av, TidyAttr_GRIDY )
|
||||
#define attrIsHEADERS(av) AttrIsId( av, TidyAttr_HEADERS )
|
||||
#define attrIsHEIGHT(av) AttrIsId( av, TidyAttr_HEIGHT )
|
||||
#define attrIsHREF(av) AttrIsId( av, TidyAttr_HREF )
|
||||
#define attrIsHREFLANG(av) AttrIsId( av, TidyAttr_HREFLANG )
|
||||
#define attrIsHSPACE(av) AttrIsId( av, TidyAttr_HSPACE )
|
||||
#define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV )
|
||||
#define attrIsID(av) AttrIsId( av, TidyAttr_ID )
|
||||
#define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP )
|
||||
#define attrIsITEMID(av) AttrIsId( av, TidyAttr_ITEMID )
|
||||
#define attrIsITEMPROP(av) AttrIsId( av, TidyAttr_ITEMPROP )
|
||||
#define attrIsITEMREF(av) AttrIsId( av, TidyAttr_ITEMREF )
|
||||
#define attrIsITEMSCOPE(av) AttrIsId( av, TidyAttr_ITEMSCOPE )
|
||||
#define attrIsITEMTYPE(av) AttrIsId( av, TidyAttr_ITEMTYPE )
|
||||
#define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL )
|
||||
#define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG )
|
||||
#define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE )
|
||||
#define attrIsLAST_MODIFIED(av) AttrIsId( av, TidyAttr_LAST_MODIFIED )
|
||||
#define attrIsLAST_VISIT(av) AttrIsId( av, TidyAttr_LAST_VISIT )
|
||||
#define attrIsLEFTMARGIN(av) AttrIsId( av, TidyAttr_LEFTMARGIN )
|
||||
#define attrIsLINK(av) AttrIsId( av, TidyAttr_LINK )
|
||||
#define attrIsLONGDESC(av) AttrIsId( av, TidyAttr_LONGDESC )
|
||||
#define attrIsLOWSRC(av) AttrIsId( av, TidyAttr_LOWSRC )
|
||||
#define attrIsMARGINHEIGHT(av) AttrIsId( av, TidyAttr_MARGINHEIGHT )
|
||||
#define attrIsMARGINWIDTH(av) AttrIsId( av, TidyAttr_MARGINWIDTH )
|
||||
#define attrIsMAXLENGTH(av) AttrIsId( av, TidyAttr_MAXLENGTH )
|
||||
#define attrIsMEDIA(av) AttrIsId( av, TidyAttr_MEDIA )
|
||||
#define attrIsMETHOD(av) AttrIsId( av, TidyAttr_METHOD )
|
||||
#define attrIsMULTIPLE(av) AttrIsId( av, TidyAttr_MULTIPLE )
|
||||
#define attrIsNAME(av) AttrIsId( av, TidyAttr_NAME )
|
||||
#define attrIsNOHREF(av) AttrIsId( av, TidyAttr_NOHREF )
|
||||
#define attrIsNORESIZE(av) AttrIsId( av, TidyAttr_NORESIZE )
|
||||
#define attrIsNOSHADE(av) AttrIsId( av, TidyAttr_NOSHADE )
|
||||
#define attrIsNOWRAP(av) AttrIsId( av, TidyAttr_NOWRAP )
|
||||
#define attrIsOBJECT(av) AttrIsId( av, TidyAttr_OBJECT )
|
||||
#define attrIsOnAFTERUPDATE(av) AttrIsId( av, TidyAttr_OnAFTERUPDATE )
|
||||
#define attrIsOnBEFOREUNLOAD(av) AttrIsId( av, TidyAttr_OnBEFOREUNLOAD )
|
||||
#define attrIsOnBEFOREUPDATE(av) AttrIsId( av, TidyAttr_OnBEFOREUPDATE )
|
||||
#define attrIsOnBLUR(av) AttrIsId( av, TidyAttr_OnBLUR )
|
||||
#define attrIsOnCHANGE(av) AttrIsId( av, TidyAttr_OnCHANGE )
|
||||
#define attrIsOnCLICK(av) AttrIsId( av, TidyAttr_OnCLICK )
|
||||
#define attrIsOnDATAAVAILABLE(av) AttrIsId( av, TidyAttr_OnDATAAVAILABLE )
|
||||
#define attrIsOnDATASETCHANGED(av) AttrIsId( av, TidyAttr_OnDATASETCHANGED )
|
||||
#define attrIsOnDATASETCOMPLETE(av) AttrIsId( av, TidyAttr_OnDATASETCOMPLETE )
|
||||
#define attrIsOnDBLCLICK(av) AttrIsId( av, TidyAttr_OnDBLCLICK )
|
||||
#define attrIsOnERRORUPDATE(av) AttrIsId( av, TidyAttr_OnERRORUPDATE )
|
||||
#define attrIsOnFOCUS(av) AttrIsId( av, TidyAttr_OnFOCUS )
|
||||
#define attrIsOnKEYDOWN(av) AttrIsId( av, TidyAttr_OnKEYDOWN )
|
||||
#define attrIsOnKEYPRESS(av) AttrIsId( av, TidyAttr_OnKEYPRESS )
|
||||
#define attrIsOnKEYUP(av) AttrIsId( av, TidyAttr_OnKEYUP )
|
||||
#define attrIsOnLOAD(av) AttrIsId( av, TidyAttr_OnLOAD )
|
||||
#define attrIsOnMOUSEDOWN(av) AttrIsId( av, TidyAttr_OnMOUSEDOWN )
|
||||
#define attrIsOnMOUSEMOVE(av) AttrIsId( av, TidyAttr_OnMOUSEMOVE )
|
||||
#define attrIsOnMOUSEOUT(av) AttrIsId( av, TidyAttr_OnMOUSEOUT )
|
||||
#define attrIsOnMOUSEOVER(av) AttrIsId( av, TidyAttr_OnMOUSEOVER )
|
||||
#define attrIsOnMOUSEUP(av) AttrIsId( av, TidyAttr_OnMOUSEUP )
|
||||
#define attrIsOnRESET(av) AttrIsId( av, TidyAttr_OnRESET )
|
||||
#define attrIsOnROWENTER(av) AttrIsId( av, TidyAttr_OnROWENTER )
|
||||
#define attrIsOnROWEXIT(av) AttrIsId( av, TidyAttr_OnROWEXIT )
|
||||
#define attrIsOnSELECT(av) AttrIsId( av, TidyAttr_OnSELECT )
|
||||
#define attrIsOnSUBMIT(av) AttrIsId( av, TidyAttr_OnSUBMIT )
|
||||
#define attrIsOnUNLOAD(av) AttrIsId( av, TidyAttr_OnUNLOAD )
|
||||
#define attrIsPROFILE(av) AttrIsId( av, TidyAttr_PROFILE )
|
||||
#define attrIsPROMPT(av) AttrIsId( av, TidyAttr_PROMPT )
|
||||
#define attrIsRBSPAN(av) AttrIsId( av, TidyAttr_RBSPAN )
|
||||
#define attrIsREADONLY(av) AttrIsId( av, TidyAttr_READONLY )
|
||||
#define attrIsREL(av) AttrIsId( av, TidyAttr_REL )
|
||||
#define attrIsREV(av) AttrIsId( av, TidyAttr_REV )
|
||||
#define attrIsRIGHTMARGIN(av) AttrIsId( av, TidyAttr_RIGHTMARGIN )
|
||||
#define attrIsROLE(av) AttrIsId( av, TidyAttr_ROLE )
|
||||
#define attrIsROWS(av) AttrIsId( av, TidyAttr_ROWS )
|
||||
#define attrIsROWSPAN(av) AttrIsId( av, TidyAttr_ROWSPAN )
|
||||
#define attrIsRULES(av) AttrIsId( av, TidyAttr_RULES )
|
||||
#define attrIsSCHEME(av) AttrIsId( av, TidyAttr_SCHEME )
|
||||
#define attrIsSCOPE(av) AttrIsId( av, TidyAttr_SCOPE )
|
||||
#define attrIsSCROLLING(av) AttrIsId( av, TidyAttr_SCROLLING )
|
||||
#define attrIsSELECTED(av) AttrIsId( av, TidyAttr_SELECTED )
|
||||
#define attrIsSHAPE(av) AttrIsId( av, TidyAttr_SHAPE )
|
||||
#define attrIsSHOWGRID(av) AttrIsId( av, TidyAttr_SHOWGRID )
|
||||
#define attrIsSHOWGRIDX(av) AttrIsId( av, TidyAttr_SHOWGRIDX )
|
||||
#define attrIsSHOWGRIDY(av) AttrIsId( av, TidyAttr_SHOWGRIDY )
|
||||
#define attrIsSIZE(av) AttrIsId( av, TidyAttr_SIZE )
|
||||
#define attrIsSLOT(av) AttrIsId( av, TidyAttr_SLOT )
|
||||
#define attrIsSPAN(av) AttrIsId( av, TidyAttr_SPAN )
|
||||
#define attrIsSRC(av) AttrIsId( av, TidyAttr_SRC )
|
||||
#define attrIsSTANDBY(av) AttrIsId( av, TidyAttr_STANDBY )
|
||||
#define attrIsSTART(av) AttrIsId( av, TidyAttr_START )
|
||||
#define attrIsSTYLE(av) AttrIsId( av, TidyAttr_STYLE )
|
||||
#define attrIsSUMMARY(av) AttrIsId( av, TidyAttr_SUMMARY )
|
||||
#define attrIsTABINDEX(av) AttrIsId( av, TidyAttr_TABINDEX )
|
||||
#define attrIsTARGET(av) AttrIsId( av, TidyAttr_TARGET )
|
||||
#define attrIsTEXT(av) AttrIsId( av, TidyAttr_TEXT )
|
||||
#define attrIsTITLE(av) AttrIsId( av, TidyAttr_TITLE )
|
||||
#define attrIsTOPMARGIN(av) AttrIsId( av, TidyAttr_TOPMARGIN )
|
||||
#define attrIsTYPE(av) AttrIsId( av, TidyAttr_TYPE )
|
||||
#define attrIsUSEMAP(av) AttrIsId( av, TidyAttr_USEMAP )
|
||||
#define attrIsVALIGN(av) AttrIsId( av, TidyAttr_VALIGN )
|
||||
#define attrIsVALUE(av) AttrIsId( av, TidyAttr_VALUE )
|
||||
#define attrIsVALUETYPE(av) AttrIsId( av, TidyAttr_VALUETYPE )
|
||||
#define attrIsVERSION(av) AttrIsId( av, TidyAttr_VERSION )
|
||||
#define attrIsVLINK(av) AttrIsId( av, TidyAttr_VLINK )
|
||||
#define attrIsVSPACE(av) AttrIsId( av, TidyAttr_VSPACE )
|
||||
#define attrIsWIDTH(av) AttrIsId( av, TidyAttr_WIDTH )
|
||||
#define attrIsWRAP(av) AttrIsId( av, TidyAttr_WRAP )
|
||||
#define attrIsXMLNS(av) AttrIsId( av, TidyAttr_XMLNS )
|
||||
#define attrIsXML_LANG(av) AttrIsId( av, TidyAttr_XML_LANG )
|
||||
#define attrIsXML_SPACE(av) AttrIsId( av, TidyAttr_XML_SPACE )
|
||||
#define attrIsARIA_ACTIVEDESCENDANT(av) AttrIsId( av, TidyAttr_ARIA_ACTIVEDESCENDANT )
|
||||
#define attrIsARIA_ATOMIC(av) AttrIsId( av, TidyAttr_ARIA_ATOMIC )
|
||||
#define attrIsARIA_AUTOCOMPLETE(av) AttrIsId( av, TidyAttr_ARIA_AUTOCOMPLETE )
|
||||
#define attrIsARIA_BUSY(av) AttrIsId( av, TidyAttr_ARIA_BUSY )
|
||||
#define attrIsARIA_CHECKED(av) AttrIsId( av, TidyAttr_ARIA_CHECKED )
|
||||
#define attrIsARIA_CONTROLS(av) AttrIsId( av, TidyAttr_ARIA_CONTROLS )
|
||||
#define attrIsARIA_DESCRIBEDBY(av) AttrIsId( av, TidyAttr_ARIA_DESCRIBEDBY )
|
||||
#define attrIsARIA_DISABLED(av) AttrIsId( av, TidyAttr_ARIA_DISABLED )
|
||||
#define attrIsARIA_DROPEFFECT(av) AttrIsId( av, TidyAttr_ARIA_DROPEFFECT )
|
||||
#define attrIsARIA_EXPANDED(av) AttrIsId( av, TidyAttr_ARIA_EXPANDED )
|
||||
#define attrIsARIA_FLOWTO(av) AttrIsId( av, TidyAttr_ARIA_FLOWTO )
|
||||
#define attrIsARIA_GRABBED(av) AttrIsId( av, TidyAttr_ARIA_GRABBED )
|
||||
#define attrIsARIA_HASPOPUP(av) AttrIsId( av, TidyAttr_ARIA_HASPOPUP )
|
||||
#define attrIsARIA_HIDDEN(av) AttrIsId( av, TidyAttr_ARIA_HIDDEN )
|
||||
#define attrIsARIA_INVALID(av) AttrIsId( av, TidyAttr_ARIA_INVALID )
|
||||
#define attrIsARIA_LABEL(av) AttrIsId( av, TidyAttr_ARIA_LABEL )
|
||||
#define attrIsARIA_LABELLEDBY(av) AttrIsId( av, TidyAttr_ARIA_LABELLEDBY )
|
||||
#define attrIsARIA_LEVEL(av) AttrIsId( av, TidyAttr_ARIA_LEVEL )
|
||||
#define attrIsARIA_LIVE(av) AttrIsId( av, TidyAttr_ARIA_LIVE )
|
||||
#define attrIsARIA_MULTILINE(av) AttrIsId( av, TidyAttr_ARIA_MULTILINE )
|
||||
#define attrIsARIA_MULTISELECTABLE(av) AttrIsId( av, TidyAttr_ARIA_MULTISELECTABLE )
|
||||
#define attrIsARIA_ORIENTATION(av) AttrIsId( av, TidyAttr_ARIA_ORIENTATION )
|
||||
#define attrIsARIA_OWNS(av) AttrIsId( av, TidyAttr_ARIA_OWNS )
|
||||
#define attrIsARIA_POSINSET(av) AttrIsId( av, TidyAttr_ARIA_POSINSET )
|
||||
#define attrIsARIA_PRESSED(av) AttrIsId( av, TidyAttr_ARIA_PRESSED )
|
||||
#define attrIsARIA_READONLY(av) AttrIsId( av, TidyAttr_ARIA_READONLY )
|
||||
#define attrIsARIA_RELEVANT(av) AttrIsId( av, TidyAttr_ARIA_RELEVANT )
|
||||
#define attrIsARIA_REQUIRED(av) AttrIsId( av, TidyAttr_ARIA_REQUIRED )
|
||||
#define attrIsARIA_SELECTED(av) AttrIsId( av, TidyAttr_ARIA_SELECTED )
|
||||
#define attrIsARIA_SETSIZE(av) AttrIsId( av, TidyAttr_ARIA_SETSIZE )
|
||||
#define attrIsARIA_SORT(av) AttrIsId( av, TidyAttr_ARIA_SORT )
|
||||
#define attrIsARIA_VALUEMAX(av) AttrIsId( av, TidyAttr_ARIA_VALUEMAX )
|
||||
#define attrIsARIA_VALUEMIN(av) AttrIsId( av, TidyAttr_ARIA_VALUEMIN )
|
||||
#define attrIsARIA_VALUENOW(av) AttrIsId( av, TidyAttr_ARIA_VALUENOW )
|
||||
#define attrIsARIA_VALUETEXT(av) AttrIsId( av, TidyAttr_ARIA_VALUETEXT )
|
||||
#define attrIsSVG_FILL(av) AttrIsId( av, TidyAttr_FILL )
|
||||
#define attrIsSVG_FILLRULE(av) AttrIsId( av, TidyAttr_FILLRULE )
|
||||
#define attrIsSVG_STROKE(av) AttrIsId( av, TidyAttr_STROKE )
|
||||
#define attrIsSVG_STROKEDASHARRAY(av) AttrIsId( av, TidyAttr_STROKEDASHARRAY )
|
||||
#define attrIsSVG_STROKEDASHOFFSET(av) AttrIsId( av, TidyAttr_STROKEDASHOFFSET )
|
||||
#define attrIsSVG_STROKELINECAP(av) AttrIsId( av, TidyAttr_STROKELINECAP )
|
||||
#define attrIsSVG_STROKELINEJOIN(av) AttrIsId( av, TidyAttr_STROKELINEJOIN )
|
||||
#define attrIsSVG_STROKEMITERLIMIT(av) AttrIsId( av, TidyAttr_STROKEMITERLIMIT )
|
||||
#define attrIsSVG_STROKEWIDTH(av) AttrIsId( av, TidyAttr_STROKEWIDTH )
|
||||
#define attrIsSVG_COLORINTERPOLATION(a) AttrIsId( a, TidyAttr_COLORINTERPOLATION )
|
||||
#define attrIsSVG_COLORRENDERING(av) AttrIsId( av, TidyAttr_COLORRENDERING )
|
||||
#define attrIsSVG_OPACITY(av) AttrIsId( av, TidyAttr_OPACITY )
|
||||
#define attrIsSVG_STROKEOPACITY(av) AttrIsId( av, TidyAttr_STROKEOPACITY )
|
||||
#define attrIsSVG_FILLOPACITY(av) AttrIsId( av, TidyAttr_FILLOPACITY )
|
||||
|
||||
/* Attribute Retrieval macros
|
||||
*/
|
||||
#define attrGetHREF( nod ) TY_(AttrGetById)( nod, TidyAttr_HREF )
|
||||
#define attrGetSRC( nod ) TY_(AttrGetById)( nod, TidyAttr_SRC )
|
||||
#define attrGetID( nod ) TY_(AttrGetById)( nod, TidyAttr_ID )
|
||||
#define attrGetNAME( nod ) TY_(AttrGetById)( nod, TidyAttr_NAME )
|
||||
#define attrGetSUMMARY( nod ) TY_(AttrGetById)( nod, TidyAttr_SUMMARY )
|
||||
#define attrGetALT( nod ) TY_(AttrGetById)( nod, TidyAttr_ALT )
|
||||
#define attrGetLONGDESC( nod ) TY_(AttrGetById)( nod, TidyAttr_LONGDESC )
|
||||
#define attrGetUSEMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_USEMAP )
|
||||
#define attrGetISMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_ISMAP )
|
||||
#define attrGetLANGUAGE( nod ) TY_(AttrGetById)( nod, TidyAttr_LANGUAGE )
|
||||
#define attrGetTYPE( nod ) TY_(AttrGetById)( nod, TidyAttr_TYPE )
|
||||
#define attrGetVALUE( nod ) TY_(AttrGetById)( nod, TidyAttr_VALUE )
|
||||
#define attrGetCONTENT( nod ) TY_(AttrGetById)( nod, TidyAttr_CONTENT )
|
||||
#define attrGetTITLE( nod ) TY_(AttrGetById)( nod, TidyAttr_TITLE )
|
||||
#define attrGetXMLNS( nod ) TY_(AttrGetById)( nod, TidyAttr_XMLNS )
|
||||
#define attrGetDATAFLD( nod ) TY_(AttrGetById)( nod, TidyAttr_DATAFLD )
|
||||
#define attrGetWIDTH( nod ) TY_(AttrGetById)( nod, TidyAttr_WIDTH )
|
||||
#define attrGetHEIGHT( nod ) TY_(AttrGetById)( nod, TidyAttr_HEIGHT )
|
||||
#define attrGetFOR( nod ) TY_(AttrGetById)( nod, TidyAttr_FOR )
|
||||
#define attrGetSELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_SELECTED )
|
||||
#define attrGetCHARSET( nod ) TY_(AttrGetById)( nod, TidyAttr_CHARSET )
|
||||
#define attrGetCHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_CHECKED )
|
||||
#define attrGetLANG( nod ) TY_(AttrGetById)( nod, TidyAttr_LANG )
|
||||
#define attrGetTARGET( nod ) TY_(AttrGetById)( nod, TidyAttr_TARGET )
|
||||
#define attrGetHTTP_EQUIV( nod ) TY_(AttrGetById)( nod, TidyAttr_HTTP_EQUIV )
|
||||
#define attrGetREL( nod ) TY_(AttrGetById)( nod, TidyAttr_REL )
|
||||
|
||||
#define attrGetOnMOUSEMOVE( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEMOVE )
|
||||
#define attrGetOnMOUSEDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEDOWN )
|
||||
#define attrGetOnMOUSEUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEUP )
|
||||
#define attrGetOnCLICK( nod ) TY_(AttrGetById)( nod, TidyAttr_OnCLICK )
|
||||
#define attrGetOnMOUSEOVER( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOVER )
|
||||
#define attrGetOnMOUSEOUT( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOUT )
|
||||
#define attrGetOnKEYDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYDOWN )
|
||||
#define attrGetOnKEYUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYUP )
|
||||
#define attrGetOnKEYPRESS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYPRESS )
|
||||
#define attrGetOnFOCUS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnFOCUS )
|
||||
#define attrGetOnBLUR( nod ) TY_(AttrGetById)( nod, TidyAttr_OnBLUR )
|
||||
|
||||
#define attrGetBGCOLOR( nod ) TY_(AttrGetById)( nod, TidyAttr_BGCOLOR )
|
||||
|
||||
#define attrGetLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_LINK )
|
||||
#define attrGetALINK( nod ) TY_(AttrGetById)( nod, TidyAttr_ALINK )
|
||||
#define attrGetVLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_VLINK )
|
||||
|
||||
#define attrGetTEXT( nod ) TY_(AttrGetById)( nod, TidyAttr_TEXT )
|
||||
#define attrGetSTYLE( nod ) TY_(AttrGetById)( nod, TidyAttr_STYLE )
|
||||
#define attrGetABBR( nod ) TY_(AttrGetById)( nod, TidyAttr_ABBR )
|
||||
#define attrGetCOLSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_COLSPAN )
|
||||
#define attrGetFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_FONT )
|
||||
#define attrGetBASEFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_BASEFONT )
|
||||
#define attrGetROWSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_ROWSPAN )
|
||||
|
||||
#define attrGetROLE( nod ) TY_(AttrGetById)( nod, TidyAttr_ROLE )
|
||||
|
||||
#define attrGetARIA_ACTIVEDESCENDANT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ACTIVEDESCENDANT )
|
||||
#define attrGetARIA_ATOMIC( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ATOMIC )
|
||||
#define attrGetARIA_AUTOCOMPLETE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_AUTOCOMPLETE )
|
||||
#define attrGetARIA_BUSY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_BUSY )
|
||||
#define attrGetARIA_CHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_CHECKED )
|
||||
#define attrGetARIA_CONTROLS( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_CONTROLS )
|
||||
#define attrGetARIA_DESCRIBEDBY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DESCRIBEDBY )
|
||||
#define attrGetARIA_DISABLED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DISABLED )
|
||||
#define attrGetARIA_DROPEFFECT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_DROPEFFECT )
|
||||
#define attrGetARIA_EXPANDED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_EXPANDED )
|
||||
#define attrGetARIA_FLOWTO( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_FLOWTO )
|
||||
#define attrGetARIA_GRABBED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_GRABBED )
|
||||
#define attrGetARIA_HASPOPUP( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_HASPOPUP )
|
||||
#define attrGetARIA_HIDDEN( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_HIDDEN )
|
||||
#define attrGetARIA_INVALID( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_INVALID )
|
||||
#define attrGetARIA_LABEL( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LABEL )
|
||||
#define attrGetARIA_LABELLEDBY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LABELLEDBY )
|
||||
#define attrGetARIA_LEVEL( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LEVEL )
|
||||
#define attrGetARIA_LIVE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_LIVE )
|
||||
#define attrGetARIA_MULTILINE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_MULTILINE )
|
||||
#define attrGetARIA_MULTISELECTABLE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_MULTISELECTABLE )
|
||||
#define attrGetARIA_ORIENTATION( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_ORIENTATION )
|
||||
#define attrGetARIA_OWNS( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_OWNS )
|
||||
#define attrGetARIA_POSINSET( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_POSINSET )
|
||||
#define attrGetARIA_PRESSED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_PRESSED )
|
||||
#define attrGetARIA_READONLY( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_READONLY )
|
||||
#define attrGetARIA_RELEVANT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_RELEVANT )
|
||||
#define attrGetARIA_REQUIRED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_REQUIRED )
|
||||
#define attrGetARIA_SELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SELECTED )
|
||||
#define attrGetARIA_SETSIZE( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SETSIZE )
|
||||
#define attrGetARIA_SORT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_SORT )
|
||||
#define attrGetARIA_VALUEMAX( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUEMAX )
|
||||
#define attrGetARIA_VALUEMIN( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUEMIN )
|
||||
#define attrGetARIA_VALUENOW( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUENOW )
|
||||
#define attrGetARIA_VALUETEXT( nod ) TY_(AttrGetById)( nod, TidyAttr_ARIA_VALUETEXT )
|
||||
|
||||
#endif /* __ATTRS_H__ */
|
230
third_party/tidy/buffio.c
vendored
Normal file
230
third_party/tidy/buffio.c
vendored
Normal file
|
@ -0,0 +1,230 @@
|
|||
/* clang-format off */
|
||||
/* buffio.c -- Treat buffer as an I/O stream.
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Requires buffer to automatically grow as bytes are added.
|
||||
Must keep track of current read and write points.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/tidy.h"
|
||||
#include "third_party/tidy/tidybuffio.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/assert.h"
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
/**************
|
||||
TIDY
|
||||
**************/
|
||||
|
||||
static int insrc_getByte( void* appData )
|
||||
{
|
||||
TidyBuffer* buf = (TidyBuffer*) appData;
|
||||
return tidyBufGetByte( buf );
|
||||
}
|
||||
static Bool insrc_eof( void* appData )
|
||||
{
|
||||
TidyBuffer* buf = (TidyBuffer*) appData;
|
||||
return tidyBufEndOfInput( buf );
|
||||
}
|
||||
static void insrc_ungetByte( void* appData, byte bv )
|
||||
{
|
||||
TidyBuffer* buf = (TidyBuffer*) appData;
|
||||
tidyBufUngetByte( buf, bv );
|
||||
}
|
||||
|
||||
void tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf )
|
||||
{
|
||||
inp->getByte = insrc_getByte;
|
||||
inp->eof = insrc_eof;
|
||||
inp->ungetByte = insrc_ungetByte;
|
||||
inp->sourceData = buf;
|
||||
}
|
||||
|
||||
static void outsink_putByte( void* appData, byte bv )
|
||||
{
|
||||
TidyBuffer* buf = (TidyBuffer*) appData;
|
||||
tidyBufPutByte( buf, bv );
|
||||
}
|
||||
|
||||
void tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf )
|
||||
{
|
||||
outp->putByte = outsink_putByte;
|
||||
outp->sinkData = buf;
|
||||
}
|
||||
|
||||
|
||||
void tidyBufInit( TidyBuffer* buf )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
tidyBufInitWithAllocator( buf, NULL );
|
||||
}
|
||||
|
||||
void tidyBufAlloc( TidyBuffer* buf, uint allocSize )
|
||||
{
|
||||
tidyBufAllocWithAllocator( buf, NULL, allocSize );
|
||||
}
|
||||
|
||||
void tidyBufInitWithAllocator( TidyBuffer* buf,
|
||||
TidyAllocator *allocator )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
TidyClearMemory( buf, sizeof(TidyBuffer) );
|
||||
buf->allocator = allocator ? allocator : &TY_(g_default_allocator);
|
||||
}
|
||||
|
||||
void tidyBufAllocWithAllocator( TidyBuffer* buf,
|
||||
TidyAllocator *allocator,
|
||||
uint allocSize )
|
||||
{
|
||||
tidyBufInitWithAllocator( buf, allocator );
|
||||
tidyBufCheckAlloc( buf, allocSize, 0 );
|
||||
buf->next = 0;
|
||||
}
|
||||
|
||||
void tidyBufFree( TidyBuffer* buf )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
TidyFree( buf->allocator, buf->bp );
|
||||
tidyBufInitWithAllocator( buf, buf->allocator );
|
||||
}
|
||||
|
||||
void tidyBufClear( TidyBuffer* buf )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
if ( buf->bp )
|
||||
{
|
||||
TidyClearMemory( buf->bp, buf->allocated );
|
||||
buf->size = 0;
|
||||
}
|
||||
buf->next = 0;
|
||||
}
|
||||
|
||||
/* Many users do not call tidyBufInit() or tidyBufAlloc() or their allocator
|
||||
counterparts. So by default, set the default allocator.
|
||||
*/
|
||||
static void setDefaultAllocator( TidyBuffer* buf )
|
||||
{
|
||||
buf->allocator = &TY_(g_default_allocator);
|
||||
}
|
||||
|
||||
/* Avoid thrashing memory by doubling buffer size
|
||||
** until larger than requested size.
|
||||
buf->allocated is bigger than allocSize+1 so that a trailing null byte is
|
||||
always available.
|
||||
*/
|
||||
void tidyBufCheckAlloc( TidyBuffer* buf, uint allocSize, uint chunkSize )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
|
||||
if ( !buf->allocator )
|
||||
setDefaultAllocator( buf );
|
||||
|
||||
if ( 0 == chunkSize )
|
||||
chunkSize = 256;
|
||||
if ( allocSize+1 > buf->allocated )
|
||||
{
|
||||
byte* bp;
|
||||
uint allocAmt = chunkSize;
|
||||
if ( buf->allocated > 0 )
|
||||
allocAmt = buf->allocated;
|
||||
while ( allocAmt < allocSize+1 )
|
||||
allocAmt *= 2;
|
||||
|
||||
bp = (byte*)TidyRealloc( buf->allocator, buf->bp, allocAmt );
|
||||
if ( bp != NULL )
|
||||
{
|
||||
TidyClearMemory( bp + buf->allocated, allocAmt - buf->allocated );
|
||||
buf->bp = bp;
|
||||
buf->allocated = allocAmt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Attach buffer to a chunk O' memory w/out allocation */
|
||||
void tidyBufAttach( TidyBuffer* buf, byte* bp, uint size )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
buf->bp = bp;
|
||||
buf->size = buf->allocated = size;
|
||||
buf->next = 0;
|
||||
if ( !buf->allocator )
|
||||
setDefaultAllocator( buf );
|
||||
}
|
||||
|
||||
/* Clear pointer to memory w/out deallocation */
|
||||
void tidyBufDetach( TidyBuffer* buf )
|
||||
{
|
||||
tidyBufInitWithAllocator( buf, buf->allocator );
|
||||
}
|
||||
|
||||
|
||||
/**************
|
||||
OUTPUT
|
||||
**************/
|
||||
|
||||
void tidyBufAppend( TidyBuffer* buf, void* vp, uint size )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
if ( vp != NULL && size > 0 )
|
||||
{
|
||||
tidyBufCheckAlloc( buf, buf->size + size, 0 );
|
||||
memcpy( buf->bp + buf->size, vp, size );
|
||||
buf->size += size;
|
||||
}
|
||||
}
|
||||
|
||||
void tidyBufPutByte( TidyBuffer* buf, byte bv )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
tidyBufCheckAlloc( buf, buf->size + 1, 0 );
|
||||
buf->bp[ buf->size++ ] = bv;
|
||||
}
|
||||
|
||||
|
||||
int tidyBufPopByte( TidyBuffer* buf )
|
||||
{
|
||||
int bv = EOF;
|
||||
assert( buf != NULL );
|
||||
if ( buf->size > 0 )
|
||||
bv = buf->bp[ --buf->size ];
|
||||
return bv;
|
||||
}
|
||||
|
||||
/**************
|
||||
INPUT
|
||||
**************/
|
||||
|
||||
int tidyBufGetByte( TidyBuffer* buf )
|
||||
{
|
||||
int bv = EOF;
|
||||
if ( ! tidyBufEndOfInput(buf) )
|
||||
bv = buf->bp[ buf->next++ ];
|
||||
return bv;
|
||||
}
|
||||
|
||||
Bool tidyBufEndOfInput( TidyBuffer* buf )
|
||||
{
|
||||
return ( buf->next >= buf->size );
|
||||
}
|
||||
|
||||
void tidyBufUngetByte( TidyBuffer* buf, byte bv )
|
||||
{
|
||||
if ( buf->next > 0 )
|
||||
{
|
||||
--buf->next;
|
||||
assert( bv == buf->bp[ buf->next ] );
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
1032
third_party/tidy/charsets.c
vendored
Normal file
1032
third_party/tidy/charsets.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
22
third_party/tidy/charsets.h
vendored
Normal file
22
third_party/tidy/charsets.h
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
#ifndef __CHARSETS_H__
|
||||
#define __CHARSETS_H__
|
||||
#include "third_party/tidy/access.h"
|
||||
#include "third_party/tidy/tidyplatform.h"
|
||||
/* clang-format off */
|
||||
|
||||
/* charsets.h -- character set information and mappings
|
||||
|
||||
(c) 1998-2021 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
uint TY_(GetEncodingIdFromName)(ctmbstr name);
|
||||
uint TY_(GetEncodingIdFromCodePage)(uint cp);
|
||||
uint TY_(GetEncodingCodePageFromName)(ctmbstr name);
|
||||
uint TY_(GetEncodingCodePageFromId)(uint id);
|
||||
ctmbstr TY_(GetEncodingNameFromId)(uint id);
|
||||
ctmbstr TY_(GetEncodingNameFromCodePage)(uint cp);
|
||||
|
||||
#endif /* __CHARSETS_H__ */
|
||||
|
2861
third_party/tidy/clean.c
vendored
Normal file
2861
third_party/tidy/clean.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
83
third_party/tidy/clean.h
vendored
Normal file
83
third_party/tidy/clean.h
vendored
Normal file
|
@ -0,0 +1,83 @@
|
|||
#ifndef __CLEAN_H__
|
||||
#define __CLEAN_H__
|
||||
#include "third_party/tidy/forward.h"
|
||||
/* clang-format off */
|
||||
|
||||
/* clean.h -- clean up misuse of presentation markup
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
void TY_(FixNodeLinks)(Node *node);
|
||||
|
||||
void TY_(FreeStyles)( TidyDocImpl* doc );
|
||||
|
||||
/* Add class="foo" to node
|
||||
*/
|
||||
void TY_(AddStyleAsClass)( TidyDocImpl* doc, Node *node, ctmbstr stylevalue );
|
||||
void TY_(AddStyleProperty)(TidyDocImpl* doc, Node *node, ctmbstr property );
|
||||
|
||||
void TY_(CleanDocument)( TidyDocImpl* doc );
|
||||
|
||||
/* simplifies <b><b> ... </b> ...</b> etc. */
|
||||
void TY_(NestedEmphasis)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/* replace i by em and b by strong */
|
||||
void TY_(EmFromI)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/*
|
||||
Some people use dir or ul without an li
|
||||
to indent the content. The pattern to
|
||||
look for is a list with a single implicit
|
||||
li. This is recursively replaced by an
|
||||
implicit blockquote.
|
||||
*/
|
||||
void TY_(List2BQ)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/*
|
||||
Replace implicit blockquote by div with an indent
|
||||
taking care to reduce nested blockquotes to a single
|
||||
div with the indent set to match the nesting depth
|
||||
*/
|
||||
void TY_(BQ2Div)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
|
||||
void TY_(DropSections)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
|
||||
/*
|
||||
This is a major clean up to strip out all the extra stuff you get
|
||||
when you save as web page from Word 2000. It doesn't yet know what
|
||||
to do with VML tags, but these will appear as errors unless you
|
||||
declare them as new tags, such as o:p which needs to be declared
|
||||
as inline.
|
||||
*/
|
||||
void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node);
|
||||
|
||||
Bool TY_(IsWord2000)( TidyDocImpl* doc );
|
||||
|
||||
/* where appropriate move object elements from head to body */
|
||||
void TY_(BumpObject)( TidyDocImpl* doc, Node *html );
|
||||
|
||||
Bool TY_(TidyMetaCharset)(TidyDocImpl* doc);
|
||||
|
||||
void TY_(DropComments)(TidyDocImpl* doc, Node* node);
|
||||
void TY_(DropFontElements)(TidyDocImpl* doc, Node* node, Node **pnode);
|
||||
void TY_(WbrToSpace)(TidyDocImpl* doc, Node* node);
|
||||
void TY_(DowngradeTypography)(TidyDocImpl* doc, Node* node);
|
||||
void TY_(ReplacePreformattedSpaces)(TidyDocImpl* doc, Node* node);
|
||||
void TY_(NormalizeSpaces)(Lexer *lexer, Node *node);
|
||||
void TY_(ConvertCDATANodes)(TidyDocImpl* doc, Node* node);
|
||||
|
||||
void TY_(FixAnchors)(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId);
|
||||
void TY_(FixXhtmlNamespace)(TidyDocImpl* doc, Bool wantXmlns);
|
||||
void TY_(FixLanguageInformation)(TidyDocImpl* doc, Node* node, Bool wantXmlLang, Bool wantLang);
|
||||
|
||||
/* Issue #567 - move style elements from body to head */
|
||||
void TY_(CleanStyle)(TidyDocImpl* doc, Node *html);
|
||||
/* Issue #692 - discard multiple titles */
|
||||
void TY_(CleanHead)(TidyDocImpl* doc);
|
||||
|
||||
#endif /* __CLEAN_H__ */
|
2009
third_party/tidy/config.c
vendored
Normal file
2009
third_party/tidy/config.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
434
third_party/tidy/config.h
vendored
Normal file
434
third_party/tidy/config.h
vendored
Normal file
|
@ -0,0 +1,434 @@
|
|||
#ifndef __CONFIG_H__
|
||||
#define __CONFIG_H__
|
||||
/* clang-format off */
|
||||
|
||||
/**************************************************************************//**
|
||||
* @file
|
||||
* Read configuration files and manage configuration properties.
|
||||
*
|
||||
* Config files associate a property name with a value.
|
||||
*
|
||||
* // comments can start at the beginning of a line
|
||||
* # comments can start at the beginning of a line
|
||||
* name: short values fit onto one line
|
||||
* name: a really long value that
|
||||
* continues on the next line
|
||||
*
|
||||
* Property names are case insensitive and should be less than 60 characters
|
||||
* in length, and must start at the beginning of the line, as whitespace at
|
||||
* the start of a line signifies a line continuation.
|
||||
*
|
||||
* @author HTACG, et al (consult git log)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
|
||||
* Institute of Technology, European Research Consortium for Informatics
|
||||
* and Mathematics, Keio University) and HTACG.
|
||||
* @par
|
||||
* All Rights Reserved.
|
||||
* @par
|
||||
* See `tidy.h` for the complete license.
|
||||
*
|
||||
* @date Additional updates: consult git log
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "third_party/tidy/tidy.h"
|
||||
#include "third_party/tidy/streamio.h"
|
||||
|
||||
/** @addtogroup internal_api */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup configuration_options Configuration Options
|
||||
**
|
||||
** This module organizes all of Tidy's configuration options, including
|
||||
** picklist management, option setting and retrieval, option file utilities,
|
||||
** and so on.
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
/** Determines the maximum number of items in an option's picklist. PickLists
|
||||
** may have up to 16 items. For some reason, this limit has been hard-coded
|
||||
** into Tidy for some time. Feel free to increase this as needed.
|
||||
*/
|
||||
#define TIDY_PL_SIZE 16
|
||||
|
||||
|
||||
/** Structs of this type contain information needed in order to present
|
||||
** picklists, relate picklist entries to public enum values, and parse
|
||||
** strings that are accepted in order to assign the value.
|
||||
*/
|
||||
typedef struct PickListItem {
|
||||
ctmbstr label; /**< PickList label for this item. */
|
||||
const int value; /**< The option value represented by this label. */
|
||||
ctmbstr inputs[10]; /**< String values that can select this value. */
|
||||
} PickListItem;
|
||||
|
||||
|
||||
/** An array of PickListItems, fixed in size for in-code declarations.
|
||||
** Arrays must be populated in 0 to 10 order, as the option value is assigned
|
||||
** based on this index and *not* on the structures' value field. It remains
|
||||
** a best practice, however, to assign a public enum value with the proper
|
||||
** index value.
|
||||
*/
|
||||
typedef const PickListItem PickListItems[TIDY_PL_SIZE];
|
||||
|
||||
|
||||
struct _tidy_option; /* forward */
|
||||
|
||||
/** The TidyOptionImpl type implements the `_tidy_option` structure.
|
||||
*/
|
||||
typedef struct _tidy_option TidyOptionImpl;
|
||||
|
||||
|
||||
/** This typedef describes a function that is used for parsing the input
|
||||
** given for a particular Tidy option.
|
||||
*/
|
||||
typedef Bool (ParseProperty)( TidyDocImpl* doc, const TidyOptionImpl* opt );
|
||||
|
||||
|
||||
/** This structure defines the internal representation of a Tidy option.
|
||||
*/
|
||||
struct _tidy_option
|
||||
{
|
||||
TidyOptionId id; /**< The unique identifier for this option. */
|
||||
TidyConfigCategory category; /**< The category of the option. */
|
||||
ctmbstr name; /**< The name of the option. */
|
||||
TidyOptionType type; /**< The date type for the option. */
|
||||
ulong dflt; /**< Default value for TidyInteger and TidyBoolean */
|
||||
ParseProperty* parser; /**< Function to parse input; read-only if NULL. */
|
||||
PickListItems* pickList; /**< The picklist of possible values for this option. */
|
||||
ctmbstr pdflt; /**< Default value for TidyString. */
|
||||
};
|
||||
|
||||
|
||||
/** Stored option values can be one of two internal types.
|
||||
*/
|
||||
typedef union
|
||||
{
|
||||
ulong v; /**< Value for TidyInteger and TidyBoolean */
|
||||
char *p; /**< Value for TidyString */
|
||||
} TidyOptionValue;
|
||||
|
||||
|
||||
/** This type is used to define a structure for keeping track of the values
|
||||
** for each option.
|
||||
*/
|
||||
typedef struct _tidy_config
|
||||
{
|
||||
TidyOptionValue value[ N_TIDY_OPTIONS + 1 ]; /**< Current config values. */
|
||||
TidyOptionValue snapshot[ N_TIDY_OPTIONS + 1 ]; /**< Snapshot of values to be restored later. */
|
||||
uint defined_tags; /**< Tracks user-defined tags. */
|
||||
uint c; /**< Current char in input stream for reading options. */
|
||||
StreamIn* cfgIn; /**< Current input source for reading options.*/
|
||||
} TidyConfigImpl;
|
||||
|
||||
|
||||
/** Used to build a table of documentation cross-references.
|
||||
*/
|
||||
typedef struct {
|
||||
TidyOptionId opt; /**< Identifier. */
|
||||
TidyOptionId const *links; /**< Cross references. Last element must be 'TidyUnknownOption'. */
|
||||
} TidyOptionDoc;
|
||||
|
||||
|
||||
/** Given an option name, return an instance of an option.
|
||||
** @param optnam The option name to retrieve.
|
||||
** @returns The instance of the requested option.
|
||||
*/
|
||||
const TidyOptionImpl* TY_(lookupOption)( ctmbstr optnam );
|
||||
|
||||
|
||||
/** Given an option ID, return an instance of an option.
|
||||
** @param optId The option ID to retrieve.
|
||||
** @returns The instance of the requested option.
|
||||
*/
|
||||
const TidyOptionImpl* TY_(getOption)( TidyOptionId optId );
|
||||
|
||||
/** Given an option ID, indicates whether or not the option is a list.
|
||||
** @param optId The option ID to check.
|
||||
** @returns Returns yes if the option value is a list.
|
||||
*/
|
||||
const Bool TY_(getOptionIsList)( TidyOptionId optId );
|
||||
|
||||
/** Initiates an iterator to cycle through all of the available options.
|
||||
** @param doc The Tidy document to get options.
|
||||
** @returns An iterator token to be used with TY_(getNextOption)().
|
||||
*/
|
||||
TidyIterator TY_(getOptionList)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Gets the next option provided by the iterator.
|
||||
** @param doc The Tidy document to get options.
|
||||
** @param iter The iterator token initialized by TY_(getOptionList)().
|
||||
** @returns The instance of the next option.
|
||||
*/
|
||||
const TidyOptionImpl* TY_(getNextOption)( TidyDocImpl* doc, TidyIterator* iter );
|
||||
|
||||
|
||||
/** Initiates an iterator to cycle through all of the available picklist
|
||||
** possibilities.
|
||||
** @param option An instance of an option for which to iterate a picklist.
|
||||
** @returns An iterator token to be used with TY_(getNextOptionPick)().
|
||||
*/
|
||||
TidyIterator TY_(getOptionPickList)( const TidyOptionImpl* option );
|
||||
|
||||
|
||||
/** Gets the next picklist possibility provided by the iterator.
|
||||
** @param option The instance of the option for which to iterate a picklist.
|
||||
** @param iter The iterator token initialized by TY_(getOptionPickList)().
|
||||
** @returns The next picklist entry.
|
||||
*/
|
||||
ctmbstr TY_(getNextOptionPick)( const TidyOptionImpl* option, TidyIterator* iter );
|
||||
|
||||
|
||||
#if SUPPORT_CONSOLE_APP
|
||||
/** Returns the cross-reference information structure for optID, which is
|
||||
** used for generating documentation.
|
||||
** @param optId The option ID to get cross-reference information for.
|
||||
** @returns Cross reference information.
|
||||
*/
|
||||
const TidyOptionDoc* TY_(OptGetDocDesc)( TidyOptionId optId );
|
||||
#endif /* SUPPORT_CONSOLE_APP */
|
||||
|
||||
|
||||
/** Initialize the configuration for the given Tidy document.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(InitConfig)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Frees the configuration memory for the given Tidy document.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(FreeConfig)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Gets the picklist label for a given value.
|
||||
** @param optId the option id having a picklist to check.
|
||||
** @param pick the picklist item to retrieve.
|
||||
** @returns The label for the pick.
|
||||
*/
|
||||
ctmbstr TY_(GetPickListLabelForPick)( TidyOptionId optId, uint pick );
|
||||
|
||||
|
||||
/** Sets the integer value for the given option Id.
|
||||
** @param doc The Tidy document.
|
||||
** @param optId The option ID to set.
|
||||
** @param val The value to set.
|
||||
** @returns Success or failure.
|
||||
*/
|
||||
Bool TY_(SetOptionInt)( TidyDocImpl* doc, TidyOptionId optId, ulong val );
|
||||
|
||||
|
||||
/** Sets the bool value for the given option Id.
|
||||
** @param doc The Tidy document.
|
||||
** @param optId The option ID to set.
|
||||
** @param val The value to set.
|
||||
** @returns Success or failure.
|
||||
*/
|
||||
Bool TY_(SetOptionBool)( TidyDocImpl* doc, TidyOptionId optId, Bool val );
|
||||
|
||||
|
||||
/** Resets the given option to its default value.
|
||||
** @param doc The Tidy document.
|
||||
** @param optId The option ID to set.
|
||||
** @returns Success or failure.
|
||||
*/
|
||||
Bool TY_(ResetOptionToDefault)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
|
||||
|
||||
/** Resets all options in the document to their default values.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(ResetConfigToDefault)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Stores a snapshot of all of the configuration values that can be
|
||||
** restored later.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(TakeConfigSnapshot)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Restores all of the configuration values to their snapshotted values.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(ResetConfigToSnapshot)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Copies the configuration from one document to another.
|
||||
** @param docTo The destination Tidy document.
|
||||
** @param docFrom The source Tidy document.
|
||||
*/
|
||||
void TY_(CopyConfig)( TidyDocImpl* docTo, TidyDocImpl* docFrom );
|
||||
|
||||
|
||||
/** Attempts to parse the given config file into the document.
|
||||
** @param doc The Tidy document.
|
||||
** @param cfgfil The file to load.
|
||||
** @returns a file system error code.
|
||||
*/
|
||||
int TY_(ParseConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil );
|
||||
|
||||
|
||||
/** Attempts to parse the given config file into the document, using
|
||||
** the provided encoding.
|
||||
** @param doc The Tidy document.
|
||||
** @param cfgfil The file to load.
|
||||
** @param charenc The name of the encoding to use for reading the file.
|
||||
** @returns a file system error code.
|
||||
*/
|
||||
int TY_(ParseConfigFileEnc)( TidyDocImpl* doc,
|
||||
ctmbstr cfgfil, ctmbstr charenc );
|
||||
|
||||
|
||||
/** Saves the current configuration for options not having default values
|
||||
** into the specified file.
|
||||
** @param doc The Tidy document.
|
||||
** @param cfgfil The file to save.
|
||||
** @returns a file system error code.
|
||||
*/
|
||||
int TY_(SaveConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil );
|
||||
|
||||
|
||||
/** Writes the current configuration for options not having default values
|
||||
** into the specified sink.
|
||||
** @param doc The Tidy document.
|
||||
** @param sink The sink to save into.
|
||||
** @returns a file system error code.
|
||||
*/
|
||||
int TY_(SaveConfigSink)( TidyDocImpl* doc, TidyOutputSink* sink );
|
||||
|
||||
|
||||
/** Attempts to parse the provided value for the given option name. Returns
|
||||
** false if unknown option, missing parameter, or the option doesn't
|
||||
** use the parameter.
|
||||
** @param doc The Tidy document.
|
||||
** @param optnam The name of the option to be set.
|
||||
** @param optVal The string value to attempt to parse.
|
||||
** @returns Success or failure.
|
||||
*/
|
||||
Bool TY_(ParseConfigOption)( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optVal );
|
||||
|
||||
|
||||
/** Attempts to parse the provided value for the given option id. Returns
|
||||
** false if unknown option, missing parameter, or the option doesn't
|
||||
** use the parameter.
|
||||
** @param doc The Tidy document.
|
||||
** @param optId The ID of the option to be set.
|
||||
** @param optVal The string value to attempt to parse.
|
||||
** @returns Success or failure.
|
||||
*/
|
||||
Bool TY_(ParseConfigValue)( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optVal );
|
||||
|
||||
|
||||
/** Ensure that char encodings are self consistent.
|
||||
** @param doc The Tidy document to adjust.
|
||||
** @param encoding The encoding being applied.
|
||||
** @returns A bool indicating success or failure.
|
||||
*/
|
||||
Bool TY_(AdjustCharEncoding)( TidyDocImpl* doc, int encoding );
|
||||
|
||||
|
||||
/** Ensure that the configuration options are self consistent.
|
||||
** THIS PROCESS IS DESTRUCTIVE TO THE USER STATE. It examines
|
||||
** certain user-specified options and changes other options
|
||||
** as a result. This means that documented API functions such
|
||||
** as tidyOptGetValue() won't return the user-set values after
|
||||
** this is used. As a result, *don't* just use this function
|
||||
** at every opportunity, but only where needed, which is ONLY
|
||||
** prior to parsing a stream, and again prior to saving a
|
||||
** stream (because we reset after parsing.)
|
||||
** @param doc The Tidy document to adjust.
|
||||
*/
|
||||
void TY_(AdjustConfig)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Indicates whether or not the current configuration is completely default.
|
||||
** @param doc The Tidy document.
|
||||
** @returns The result.
|
||||
*/
|
||||
Bool TY_(ConfigDiffThanDefault)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Indicates whether or not the current configuration is different from the
|
||||
** stored snapshot.
|
||||
** @param doc The Tidy document.
|
||||
** @returns The result.
|
||||
*/
|
||||
Bool TY_(ConfigDiffThanSnapshot)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Returns the character encoding ID for the given character encoding
|
||||
** string.
|
||||
** @param doc The Tidy document.
|
||||
** @param charenc The name of the character encoding.
|
||||
** @returns The Id of the character encoding.
|
||||
*/
|
||||
int TY_(CharEncodingId)( TidyDocImpl* doc, ctmbstr charenc );
|
||||
|
||||
|
||||
/** Returns the full name of the encoding for the given ID.
|
||||
** @param encoding The Id of the encoding.
|
||||
** @returns The name of the character encoding.
|
||||
*/
|
||||
ctmbstr TY_(CharEncodingName)( int encoding );
|
||||
|
||||
|
||||
/** Returns the Tidy command line option name of the encoding for the given ID.
|
||||
** @param encoding The Id of the encoding.
|
||||
** @returns The Tidy command line option representing the encoding.
|
||||
*/
|
||||
ctmbstr TY_(CharEncodingOptName)( int encoding );
|
||||
|
||||
|
||||
/** Coordinates Config update and list data.
|
||||
** @param doc The Tidy document.
|
||||
** @param opt The option the list item is intended for.
|
||||
** @param name The name of the new list item.
|
||||
*/
|
||||
void TY_(DeclareListItem)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name );
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
||||
/* Debug lookup functions will be type-safe and assert option type match */
|
||||
ulong TY_(_cfgGet)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
Bool TY_(_cfgGetBool)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
TidyTriState TY_(_cfgGetAutoBool)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
ctmbstr TY_(_cfgGetString)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
|
||||
#define cfg(doc, id) TY_(_cfgGet)( (doc), (id) )
|
||||
#define cfgBool(doc, id) TY_(_cfgGetBool)( (doc), (id) )
|
||||
#define cfgAutoBool(doc, id) TY_(_cfgGetAutoBool)( (doc), (id) )
|
||||
#define cfgStr(doc, id) TY_(_cfgGetString)( (doc), (id) )
|
||||
|
||||
#else
|
||||
|
||||
/* Release build macros for speed */
|
||||
|
||||
/** Access the raw, non-string uint value of the given option ID. */
|
||||
#define cfg(doc, id) ((doc)->config.value[ (id) ].v)
|
||||
|
||||
/** Access the Bool value of the given option ID. */
|
||||
#define cfgBool(doc, id) ((Bool) cfg(doc, id))
|
||||
|
||||
/** Access the TidyTriState value of the given option ID. */
|
||||
#define cfgAutoBool(doc, id) ((TidyTriState) cfg(doc, id))
|
||||
|
||||
/** Access the string value of the given option ID. */
|
||||
#define cfgStr(doc, id) ((ctmbstr) (doc)->config.value[ (id) ].p)
|
||||
|
||||
#endif /* _DEBUG */
|
||||
|
||||
|
||||
/** @} configuration_options group */
|
||||
/** @} internal_api addtogroup */
|
||||
|
||||
|
||||
#endif /* __CONFIG_H__ */
|
2197
third_party/tidy/entities.c
vendored
Normal file
2197
third_party/tidy/entities.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
19
third_party/tidy/entities.h
vendored
Normal file
19
third_party/tidy/entities.h
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
#ifndef __ENTITIES_H__
|
||||
#define __ENTITIES_H__
|
||||
/* clang-format off */
|
||||
|
||||
/* entities.h -- recognize character entities
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
/* entity starting with "&" returns zero on error */
|
||||
/* uint EntityCode( ctmbstr name, uint versions ); */
|
||||
ctmbstr TY_(EntityName)( uint charCode, uint versions );
|
||||
Bool TY_(EntityInfo)( ctmbstr name, Bool isXml, uint* code, uint* versions );
|
||||
|
||||
#endif /* __ENTITIES_H__ */
|
115
third_party/tidy/fileio.c
vendored
Normal file
115
third_party/tidy/fileio.c
vendored
Normal file
|
@ -0,0 +1,115 @@
|
|||
/* clang-format off */
|
||||
/* fileio.c -- does standard I/O
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Default implementations of Tidy input sources
|
||||
and output sinks based on standard C FILE*.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "third_party/tidy/fileio.h"
|
||||
#include "third_party/tidy/tidy.h"
|
||||
#include "third_party/tidy/sprtf.h"
|
||||
|
||||
typedef struct _fp_input_source
|
||||
{
|
||||
FILE* fp;
|
||||
TidyBuffer unget;
|
||||
} FileSource;
|
||||
|
||||
static int filesrc_getByte( void* sourceData )
|
||||
{
|
||||
FileSource* fin = (FileSource*) sourceData;
|
||||
int bv;
|
||||
if ( fin->unget.size > 0 )
|
||||
bv = tidyBufPopByte( &fin->unget );
|
||||
else
|
||||
bv = fgetc( fin->fp );
|
||||
return bv;
|
||||
}
|
||||
|
||||
static Bool filesrc_eof( void* sourceData )
|
||||
{
|
||||
FileSource* fin = (FileSource*) sourceData;
|
||||
Bool isEOF = ( fin->unget.size == 0 );
|
||||
if ( isEOF )
|
||||
isEOF = feof( fin->fp ) != 0;
|
||||
return isEOF;
|
||||
}
|
||||
|
||||
static void filesrc_ungetByte( void* sourceData, byte bv )
|
||||
{
|
||||
FileSource* fin = (FileSource*) sourceData;
|
||||
tidyBufPutByte( &fin->unget, bv );
|
||||
}
|
||||
|
||||
#if SUPPORT_POSIX_MAPPED_FILES
|
||||
# define initFileSource initStdIOFileSource
|
||||
# define freeFileSource freeStdIOFileSource
|
||||
#endif
|
||||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
|
||||
{
|
||||
FileSource* fin = NULL;
|
||||
|
||||
fin = (FileSource*) TidyAlloc( allocator, sizeof(FileSource) );
|
||||
if ( !fin )
|
||||
return -1;
|
||||
TidyClearMemory( fin, sizeof(FileSource) );
|
||||
fin->unget.allocator = allocator;
|
||||
fin->fp = fp;
|
||||
|
||||
inp->getByte = filesrc_getByte;
|
||||
inp->eof = filesrc_eof;
|
||||
inp->ungetByte = filesrc_ungetByte;
|
||||
inp->sourceData = fin;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
|
||||
{
|
||||
FileSource* fin = (FileSource*) inp->sourceData;
|
||||
if ( closeIt && fin && fin->fp )
|
||||
fclose( fin->fp );
|
||||
tidyBufFree( &fin->unget );
|
||||
if (fin)
|
||||
TidyFree( fin->unget.allocator, fin );
|
||||
}
|
||||
|
||||
void TY_(filesink_putByte)( void* sinkData, byte bv )
|
||||
{
|
||||
FILE* fout = (FILE*) sinkData;
|
||||
fputc( bv, fout );
|
||||
#if defined(ENABLE_DEBUG_LOG)
|
||||
if (fileno(fout) != 2)
|
||||
{
|
||||
if (bv != 0x0d)
|
||||
{
|
||||
/*\
|
||||
* avoid duplicate newline - SPRTF will translate an 0x0d to CRLF,
|
||||
* and do the same with the following 0x0a
|
||||
\*/
|
||||
SPRTF("%c",bv);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void TY_(initFileSink)( TidyOutputSink* outp, FILE* fp )
|
||||
{
|
||||
outp->putByte = TY_(filesink_putByte);
|
||||
outp->sinkData = fp;
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
44
third_party/tidy/fileio.h
vendored
Normal file
44
third_party/tidy/fileio.h
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
#ifndef __FILEIO_H__
|
||||
#define __FILEIO_H__
|
||||
/* clang-format off */
|
||||
|
||||
/** @file fileio.h - does standard C I/O
|
||||
|
||||
Implementation of a FILE* based TidyInputSource and
|
||||
TidyOutputSink.
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/access.h"
|
||||
#include "third_party/tidy/tidybuffio.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** Allocate and initialize file input source */
|
||||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
|
||||
|
||||
/** Free file input source */
|
||||
void TY_(freeFileSource)( TidyInputSource* source, Bool closeIt );
|
||||
|
||||
#if SUPPORT_POSIX_MAPPED_FILES
|
||||
/** Allocate and initialize file input source using Standard C I/O */
|
||||
int TY_(initStdIOFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
|
||||
|
||||
/** Free file input source using Standard C I/O */
|
||||
void TY_(freeStdIOFileSource)( TidyInputSource* source, Bool closeIt );
|
||||
#endif
|
||||
|
||||
/** Initialize file output sink */
|
||||
void TY_(initFileSink)( TidyOutputSink* sink, FILE* fp );
|
||||
|
||||
/* Needed for internal declarations */
|
||||
void TY_(filesink_putByte)( void* sinkData, byte bv );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* __FILEIO_H__ */
|
74
third_party/tidy/forward.h
vendored
Normal file
74
third_party/tidy/forward.h
vendored
Normal file
|
@ -0,0 +1,74 @@
|
|||
#ifndef __FORWARD_H__
|
||||
#define __FORWARD_H__
|
||||
/* clang-format off */
|
||||
|
||||
/* forward.h -- Forward declarations for major Tidy structures
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Avoids many include file circular dependencies.
|
||||
|
||||
Try to keep this file down to the minimum to avoid
|
||||
cross-talk between modules.
|
||||
|
||||
Header files include this file. C files include tidy-int.h.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/tidyplatform.h"
|
||||
#include "third_party/tidy/tidy.h"
|
||||
|
||||
/* Internal symbols are prefixed to avoid clashes with other libraries */
|
||||
#define TYDYAPPEND(str1,str2) str1##str2
|
||||
#define TY_(str) TYDYAPPEND(prvTidy,str)
|
||||
|
||||
/* Internal symbols are prefixed with 'hidden' attr, to avoid exporting */
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#define TY_PRIVATE
|
||||
#else
|
||||
#define TY_PRIVATE __attribute__((__visibility__("hidden")))
|
||||
#endif
|
||||
|
||||
struct _StreamIn;
|
||||
typedef struct _StreamIn StreamIn;
|
||||
|
||||
struct _StreamOut;
|
||||
typedef struct _StreamOut StreamOut;
|
||||
|
||||
struct _TidyDocImpl;
|
||||
typedef struct _TidyDocImpl TidyDocImpl;
|
||||
|
||||
struct _TidyMessageImpl;
|
||||
typedef struct _TidyMessageImpl TidyMessageImpl;
|
||||
|
||||
/* @todo: this name isn't very instructive! */
|
||||
struct _Dict;
|
||||
typedef struct _Dict Dict;
|
||||
|
||||
struct _Attribute;
|
||||
typedef struct _Attribute Attribute;
|
||||
|
||||
struct _AttVal;
|
||||
typedef struct _AttVal AttVal;
|
||||
|
||||
struct _Node;
|
||||
typedef struct _Node Node;
|
||||
|
||||
struct _IStack;
|
||||
typedef struct _IStack IStack;
|
||||
|
||||
struct _Lexer;
|
||||
typedef struct _Lexer Lexer;
|
||||
|
||||
extern TidyAllocator TY_(g_default_allocator);
|
||||
|
||||
/** Wrappers for easy memory allocation using an allocator */
|
||||
#define TidyAlloc(allocator, size) ((allocator)->vtbl->alloc((allocator), (size)))
|
||||
#define TidyRealloc(allocator, block, size) ((allocator)->vtbl->realloc((allocator), (block), (size)))
|
||||
#define TidyFree(allocator, block) ((allocator)->vtbl->free((allocator), (block)))
|
||||
#define TidyPanic(allocator, msg) ((allocator)->vtbl->panic((allocator), (msg)))
|
||||
#define TidyClearMemory(block, size) memset((block), 0, (size))
|
||||
|
||||
|
||||
#endif /* __FORWARD_H__ */
|
182
third_party/tidy/gdoc.c
vendored
Normal file
182
third_party/tidy/gdoc.c
vendored
Normal file
|
@ -0,0 +1,182 @@
|
|||
/* clang-format off */
|
||||
/*
|
||||
clean.c -- clean up misuse of presentation markup
|
||||
|
||||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Filters from other formats such as Microsoft Word
|
||||
often make excessive use of presentation markup such
|
||||
as font tags, B, I, and the align attribute. By applying
|
||||
a set of production rules, it is straight forward to
|
||||
transform this to use CSS.
|
||||
|
||||
Some rules replace some of the children of an element by
|
||||
style properties on the element, e.g.
|
||||
|
||||
<p><b>...</b></p> -> <p style="font-weight: bold">...</p>
|
||||
|
||||
Such rules are applied to the element's content and then
|
||||
to the element itself until none of the rules more apply.
|
||||
Having applied all the rules to an element, it will have
|
||||
a style attribute with one or more properties.
|
||||
|
||||
Other rules strip the element they apply to, replacing
|
||||
it by style properties on the contents, e.g.
|
||||
|
||||
<dir><li><p>...</li></dir> -> <p style="margin-left 1em">...
|
||||
|
||||
These rules are applied to an element before processing
|
||||
its content and replace the current element by the first
|
||||
element in the exposed content.
|
||||
|
||||
After applying both sets of rules, you can replace the
|
||||
style attribute by a class value and style rule in the
|
||||
document head. To support this, an association of styles
|
||||
and class names is built.
|
||||
|
||||
A naive approach is to rely on string matching to test
|
||||
when two property lists are the same. A better approach
|
||||
would be to first sort the properties before matching.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
#include "third_party/tidy/tidy-int.h"
|
||||
#include "third_party/tidy/gdoc.h"
|
||||
#include "third_party/tidy/lexer.h"
|
||||
#include "third_party/tidy/parser.h"
|
||||
#include "third_party/tidy/tags.h"
|
||||
#include "third_party/tidy/attrs.h"
|
||||
#include "third_party/tidy/message.h"
|
||||
#include "third_party/tidy/tmbstr.h"
|
||||
#include "third_party/tidy/utf8.h"
|
||||
|
||||
/*
|
||||
Extricate "element", replace it by its content and delete it.
|
||||
*/
|
||||
static void DiscardContainer( TidyDocImpl* doc, Node *element, Node **pnode)
|
||||
{
|
||||
if (element->content)
|
||||
{
|
||||
Node *node, *parent = element->parent;
|
||||
|
||||
element->last->next = element->next;
|
||||
|
||||
if (element->next)
|
||||
{
|
||||
element->next->prev = element->last;
|
||||
}
|
||||
else
|
||||
parent->last = element->last;
|
||||
|
||||
if (element->prev)
|
||||
{
|
||||
element->content->prev = element->prev;
|
||||
element->prev->next = element->content;
|
||||
}
|
||||
else
|
||||
parent->content = element->content;
|
||||
|
||||
for (node = element->content; node; node = node->next)
|
||||
node->parent = parent;
|
||||
|
||||
*pnode = element->content;
|
||||
|
||||
element->next = element->content = NULL;
|
||||
TY_(FreeNode)(doc, element);
|
||||
}
|
||||
else
|
||||
{
|
||||
*pnode = TY_(DiscardElement)(doc, element);
|
||||
}
|
||||
}
|
||||
|
||||
static void CleanNode( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
Stack *stack = TY_(newStack)(doc, 16);
|
||||
Node *child, *next;
|
||||
|
||||
if ( (child = node->content) )
|
||||
{
|
||||
while (child)
|
||||
{
|
||||
next = child->next;
|
||||
|
||||
if (TY_(nodeIsElement)(child))
|
||||
{
|
||||
if (nodeIsSTYLE(child))
|
||||
TY_(DiscardElement)(doc, child);
|
||||
if (nodeIsP(child) && !child->content)
|
||||
TY_(DiscardElement)(doc, child);
|
||||
else if (nodeIsSPAN(child))
|
||||
DiscardContainer( doc, child, &next);
|
||||
else if (nodeIsA(child) && !child->content)
|
||||
{
|
||||
AttVal *id = TY_(GetAttrByName)( child, "name" );
|
||||
/* Recent Google Docs is using "id" instead of "name" in
|
||||
** the exported html.
|
||||
*/
|
||||
if (!id)
|
||||
id = TY_(GetAttrByName)( child, "id" );
|
||||
|
||||
if (id)
|
||||
TY_(RepairAttrValue)( doc, child->parent, "id", id->value );
|
||||
|
||||
TY_(DiscardElement)(doc, child);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (child->attributes)
|
||||
TY_(DropAttrByName)( doc, child, "class" );
|
||||
|
||||
TY_(push)(stack,next);
|
||||
child = child->content;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
child = next ? next : TY_(pop)(stack);
|
||||
}
|
||||
TY_(freeStack)(stack);
|
||||
}
|
||||
}
|
||||
|
||||
/* insert meta element to force browser to recognize doc as UTF8 */
|
||||
static void SetUTF8( TidyDocImpl* doc )
|
||||
{
|
||||
Node *head = TY_(FindHEAD)( doc );
|
||||
|
||||
if (head)
|
||||
{
|
||||
Node *node = TY_(InferredTag)(doc, TidyTag_META);
|
||||
TY_(AddAttribute)( doc, node, "http-equiv", "Content-Type" );
|
||||
TY_(AddAttribute)( doc, node, "content", "text/html; charset=UTF-8" );
|
||||
TY_(InsertNodeAtStart)( head, node );
|
||||
}
|
||||
}
|
||||
|
||||
/* clean html exported by Google Docs
|
||||
|
||||
- strip the script element, as the style sheet is a mess
|
||||
- strip class attributes
|
||||
- strip span elements, leaving their content in place
|
||||
- replace <a name=...></a> by id on parent element
|
||||
- strip empty <p> elements
|
||||
*/
|
||||
void TY_(CleanGoogleDocument)( TidyDocImpl* doc )
|
||||
{
|
||||
/* placeholder. CleanTree()/CleanNode() will not
|
||||
** zap root element
|
||||
*/
|
||||
CleanNode( doc, &doc->root );
|
||||
SetUTF8( doc );
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
21
third_party/tidy/gdoc.h
vendored
Normal file
21
third_party/tidy/gdoc.h
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
#ifndef __GDOC_H__
|
||||
#define __GDOC_H__
|
||||
#include "third_party/tidy/forward.h"
|
||||
/* clang-format off */
|
||||
|
||||
/* gdoc.h -- clean up html exported by Google Docs
|
||||
|
||||
(c) 2012 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
- strip the script element, as the style sheet is a mess
|
||||
- strip class attributes
|
||||
- strip span elements, leaving their content in place
|
||||
- replace <a name=...></a> by id on parent element
|
||||
- strip empty <p> elements
|
||||
|
||||
*/
|
||||
|
||||
void TY_(CleanGoogleDocument)( TidyDocImpl* doc );
|
||||
|
||||
#endif /* __GDOC_H__ */
|
378
third_party/tidy/istack.c
vendored
Normal file
378
third_party/tidy/istack.c
vendored
Normal file
|
@ -0,0 +1,378 @@
|
|||
/* clang-format off */
|
||||
/* istack.c -- inline stack for compatibility with Mosaic
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/tidy-int.h"
|
||||
#include "third_party/tidy/lexer.h"
|
||||
#include "third_party/tidy/attrs.h"
|
||||
#include "third_party/tidy/streamio.h"
|
||||
#include "third_party/tidy/tmbstr.h"
|
||||
|
||||
/* duplicate attributes */
|
||||
AttVal *TY_(DupAttrs)( TidyDocImpl* doc, AttVal *attrs)
|
||||
{
|
||||
AttVal *newattrs;
|
||||
|
||||
if (attrs == NULL)
|
||||
return attrs;
|
||||
|
||||
newattrs = TY_(NewAttribute)(doc);
|
||||
*newattrs = *attrs;
|
||||
newattrs->next = TY_(DupAttrs)( doc, attrs->next );
|
||||
newattrs->attribute = TY_(tmbstrdup)(doc->allocator, attrs->attribute);
|
||||
newattrs->value = TY_(tmbstrdup)(doc->allocator, attrs->value);
|
||||
newattrs->dict = TY_(FindAttribute)(doc, newattrs);
|
||||
newattrs->asp = attrs->asp ? TY_(CloneNode)(doc, attrs->asp) : NULL;
|
||||
newattrs->php = attrs->php ? TY_(CloneNode)(doc, attrs->php) : NULL;
|
||||
return newattrs;
|
||||
}
|
||||
|
||||
static Bool IsNodePushable( Node *node )
|
||||
{
|
||||
if (node->tag == NULL)
|
||||
return no;
|
||||
|
||||
if (!(node->tag->model & CM_INLINE))
|
||||
return no;
|
||||
|
||||
if (node->tag->model & CM_OBJECT)
|
||||
return no;
|
||||
|
||||
/*\ Issue #92: OLD problem of ins and del which are marked as both
|
||||
* inline and block, thus should NOT ever be 'inserted'
|
||||
\*/
|
||||
if (nodeIsINS(node) || nodeIsDEL(node))
|
||||
return no;
|
||||
|
||||
return yes;
|
||||
}
|
||||
|
||||
/*
|
||||
push a copy of an inline node onto stack
|
||||
but don't push if implicit or OBJECT or APPLET
|
||||
(implicit tags are ones generated from the istack)
|
||||
|
||||
One issue arises with pushing inlines when
|
||||
the tag is already pushed. For instance:
|
||||
|
||||
<p><em>text
|
||||
<p><em>more text
|
||||
|
||||
Shouldn't be mapped to
|
||||
|
||||
<p><em>text</em></p>
|
||||
<p><em><em>more text</em></em>
|
||||
*/
|
||||
void TY_(PushInline)( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
IStack *istack;
|
||||
|
||||
if (node->implicit)
|
||||
return;
|
||||
|
||||
if ( !IsNodePushable(node) )
|
||||
return;
|
||||
|
||||
if ( !nodeIsFONT(node) && TY_(IsPushed)(doc, node) )
|
||||
return;
|
||||
|
||||
/* make sure there is enough space for the stack */
|
||||
if (lexer->istacksize + 1 > lexer->istacklength)
|
||||
{
|
||||
if (lexer->istacklength == 0)
|
||||
lexer->istacklength = 6; /* this is perhaps excessive */
|
||||
|
||||
lexer->istacklength = lexer->istacklength * 2;
|
||||
lexer->istack = (IStack *)TidyDocRealloc(doc, lexer->istack,
|
||||
sizeof(IStack)*(lexer->istacklength));
|
||||
}
|
||||
|
||||
istack = &(lexer->istack[lexer->istacksize]);
|
||||
istack->tag = node->tag;
|
||||
|
||||
istack->element = TY_(tmbstrdup)(doc->allocator, node->element);
|
||||
istack->attributes = TY_(DupAttrs)( doc, node->attributes );
|
||||
++(lexer->istacksize);
|
||||
}
|
||||
|
||||
static void PopIStack( TidyDocImpl* doc )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
IStack *istack;
|
||||
AttVal *av;
|
||||
|
||||
--(lexer->istacksize);
|
||||
istack = &(lexer->istack[lexer->istacksize]);
|
||||
|
||||
while (istack->attributes)
|
||||
{
|
||||
av = istack->attributes;
|
||||
istack->attributes = av->next;
|
||||
TY_(FreeAttribute)( doc, av );
|
||||
}
|
||||
TidyDocFree(doc, istack->element);
|
||||
istack->element = NULL; /* remove the freed element */
|
||||
}
|
||||
|
||||
static void PopIStackUntil( TidyDocImpl* doc, TidyTagId tid )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
IStack *istack;
|
||||
|
||||
while (lexer->istacksize > 0)
|
||||
{
|
||||
PopIStack( doc );
|
||||
istack = &(lexer->istack[lexer->istacksize]);
|
||||
if ( istack->tag->id == tid )
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* pop inline stack */
|
||||
void TY_(PopInline)( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
|
||||
if (node)
|
||||
{
|
||||
if ( !IsNodePushable(node) )
|
||||
return;
|
||||
|
||||
/* if node is </a> then pop until we find an <a> */
|
||||
if ( nodeIsA(node) )
|
||||
{
|
||||
PopIStackUntil( doc, TidyTag_A );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (lexer->istacksize > 0)
|
||||
{
|
||||
PopIStack( doc );
|
||||
|
||||
/* #427822 - fix by Randy Waki 7 Aug 00 */
|
||||
if (lexer->insert >= lexer->istack + lexer->istacksize)
|
||||
lexer->insert = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
Bool TY_(IsPushed)( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
int i;
|
||||
|
||||
for (i = lexer->istacksize - 1; i >= 0; --i)
|
||||
{
|
||||
if (lexer->istack[i].tag == node->tag)
|
||||
return yes;
|
||||
}
|
||||
|
||||
return no;
|
||||
}
|
||||
|
||||
/*
|
||||
Test whether the last element on the stack has the same type than "node".
|
||||
*/
|
||||
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
|
||||
if ( element && !IsNodePushable(element) )
|
||||
return no;
|
||||
|
||||
if (lexer->istacksize > 0) {
|
||||
if (lexer->istack[lexer->istacksize - 1].tag == node->tag) {
|
||||
return yes;
|
||||
}
|
||||
}
|
||||
|
||||
return no;
|
||||
}
|
||||
|
||||
/*
|
||||
This has the effect of inserting "missing" inline
|
||||
elements around the contents of blocklevel elements
|
||||
such as P, TD, TH, DIV, PRE etc. This procedure is
|
||||
called at the start of ParseBlock. when the inline
|
||||
stack is not empty, as will be the case in:
|
||||
|
||||
<i><h1>italic heading</h1></i>
|
||||
|
||||
which is then treated as equivalent to
|
||||
|
||||
<h1><i>italic heading</i></h1>
|
||||
|
||||
This is implemented by setting the lexer into a mode
|
||||
where it gets tokens from the inline stack rather than
|
||||
from the input stream.
|
||||
*/
|
||||
int TY_(InlineDup)( TidyDocImpl* doc, Node* node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
int n;
|
||||
|
||||
if ((n = lexer->istacksize - lexer->istackbase) > 0)
|
||||
{
|
||||
lexer->insert = &(lexer->istack[lexer->istackbase]);
|
||||
lexer->inode = node;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
defer duplicates when entering a table or other
|
||||
element where the inlines shouldn't be duplicated
|
||||
*/
|
||||
void TY_(DeferDup)( TidyDocImpl* doc )
|
||||
{
|
||||
doc->lexer->insert = NULL;
|
||||
doc->lexer->inode = NULL;
|
||||
}
|
||||
|
||||
Node *TY_(InsertedToken)( TidyDocImpl* doc )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
Node *node;
|
||||
IStack *istack;
|
||||
uint n;
|
||||
|
||||
/* this will only be NULL if inode != NULL */
|
||||
if (lexer->insert == NULL)
|
||||
{
|
||||
node = lexer->inode;
|
||||
lexer->inode = NULL;
|
||||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
If this is the "latest" node then update
|
||||
the position, otherwise use current values
|
||||
*/
|
||||
|
||||
if (lexer->inode == NULL)
|
||||
{
|
||||
lexer->lines = doc->docIn->curline;
|
||||
lexer->columns = doc->docIn->curcol;
|
||||
}
|
||||
|
||||
node = TY_(NewNode)(doc->allocator, lexer);
|
||||
node->type = StartTag;
|
||||
node->implicit = yes;
|
||||
node->start = lexer->txtstart;
|
||||
/* #431734 [JTidy bug #226261 (was 126261)] - fix by Gary Peskin 20 Dec 00 */
|
||||
node->end = lexer->txtend; /* was : lexer->txtstart; */
|
||||
istack = lexer->insert;
|
||||
|
||||
/* #if 0 && defined(_DEBUG) */
|
||||
#if definedENABLE_DEBUG_LOG
|
||||
if ( lexer->istacksize == 0 )
|
||||
{
|
||||
SPRTF( "WARNING: ZERO sized istack!\n" );
|
||||
}
|
||||
#endif
|
||||
|
||||
node->element = TY_(tmbstrdup)(doc->allocator, istack->element);
|
||||
node->tag = istack->tag;
|
||||
node->attributes = TY_(DupAttrs)( doc, istack->attributes );
|
||||
|
||||
/* advance lexer to next item on the stack */
|
||||
n = (uint)(lexer->insert - &(lexer->istack[0]));
|
||||
|
||||
/* and recover state if we have reached the end */
|
||||
if (++n < lexer->istacksize)
|
||||
lexer->insert = &(lexer->istack[n]);
|
||||
else
|
||||
lexer->insert = NULL;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
We have two CM_INLINE elements pushed ... the first is closing,
|
||||
but, like the browser, the second should be retained ...
|
||||
Like <b>bold <i>bold and italics</b> italics only</i>
|
||||
This function switches the tag positions on the stack,
|
||||
returning 'yes' if both were found in the expected order.
|
||||
*/
|
||||
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
if ( lexer
|
||||
&& element && element->tag
|
||||
&& node && node->tag
|
||||
&& TY_(IsPushed)( doc, element )
|
||||
&& TY_(IsPushed)( doc, node )
|
||||
&& ((lexer->istacksize - lexer->istackbase) >= 2) )
|
||||
{
|
||||
/* we have a chance of succeeding ... */
|
||||
int i;
|
||||
for (i = (lexer->istacksize - lexer->istackbase - 1); i >= 0; --i)
|
||||
{
|
||||
if (lexer->istack[i].tag == element->tag) {
|
||||
/* found the element tag - phew */
|
||||
IStack *istack1 = &lexer->istack[i];
|
||||
IStack *istack2 = NULL;
|
||||
--i; /* back one more, and continue */
|
||||
for ( ; i >= 0; --i)
|
||||
{
|
||||
if (lexer->istack[i].tag == node->tag)
|
||||
{
|
||||
/* found the element tag - phew */
|
||||
istack2 = &lexer->istack[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( istack2 )
|
||||
{
|
||||
/* perform the swap */
|
||||
IStack tmp_istack = *istack2;
|
||||
*istack2 = *istack1;
|
||||
*istack1 = tmp_istack;
|
||||
return yes;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return no;
|
||||
}
|
||||
|
||||
/*
|
||||
We want to push a specific a specific element on the stack,
|
||||
but it may not be the last element, which InlineDup()
|
||||
would handle. Return yes, if found and inserted.
|
||||
*/
|
||||
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
int n, i;
|
||||
if ( element
|
||||
&& (element->tag != NULL)
|
||||
&& ((n = lexer->istacksize - lexer->istackbase) > 0) )
|
||||
{
|
||||
for ( i = n - 1; i >=0; --i ) {
|
||||
if (lexer->istack[i].tag == element->tag) {
|
||||
/* found our element tag - insert it */
|
||||
lexer->insert = &(lexer->istack[i]);
|
||||
lexer->inode = node;
|
||||
return yes;
|
||||
}
|
||||
}
|
||||
}
|
||||
return no;
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
666
third_party/tidy/language.c
vendored
Normal file
666
third_party/tidy/language.c
vendored
Normal file
|
@ -0,0 +1,666 @@
|
|||
/* clang-format off */
|
||||
/* language.c -- localization support for HTML Tidy.
|
||||
|
||||
Copyright 2015 HTACG
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/language.h"
|
||||
#include "third_party/tidy/tmbstr.h"
|
||||
#include "libc/assert.h"
|
||||
|
||||
#include "third_party/tidy/language_en.inc"
|
||||
|
||||
|
||||
/**
|
||||
* This structure type provides universal access to all of Tidy's strings.
|
||||
* Note limit of 8, to be changed as more added...
|
||||
*/
|
||||
typedef struct {
|
||||
Bool manually_set;
|
||||
languageDefinition *currentLanguage;
|
||||
languageDefinition *fallbackLanguage;
|
||||
languageDefinition *languages[9];
|
||||
} tidyLanguagesType;
|
||||
|
||||
|
||||
/**
|
||||
* This single structure contains all localizations. Note that we preset
|
||||
* `.currentLanguage` to language_en, which is Tidy's default language.
|
||||
*/
|
||||
static tidyLanguagesType tidyLanguages = {
|
||||
no, /* library language was NOT manually set */
|
||||
&language_en, /* current language */
|
||||
&language_en, /* first fallback language */
|
||||
{
|
||||
/* Required localization! */
|
||||
&language_en,
|
||||
#if SUPPORT_LOCALIZATIONS
|
||||
/* These additional languages are installed. */
|
||||
&language_en_gb,
|
||||
&language_es,
|
||||
&language_es_mx,
|
||||
&language_pt_br,
|
||||
&language_zh_cn,
|
||||
&language_fr,
|
||||
&language_de,
|
||||
#endif
|
||||
NULL /* This array MUST be null terminated. */
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* This structure maps old-fashioned Windows strings
|
||||
* to proper POSIX names (modern Windows already uses
|
||||
* POSIX names).
|
||||
*/
|
||||
static const tidyLocaleMapItemImpl localeMappings[] = {
|
||||
{ "america", "en_us" },
|
||||
{ "american english", "en_us" },
|
||||
{ "american-english", "en_us" },
|
||||
{ "american", "en_us" },
|
||||
{ "aus", "en_au" },
|
||||
{ "australia", "en_au" },
|
||||
{ "australian", "en_au" },
|
||||
{ "austria", "de_at" },
|
||||
{ "aut", "de_at" },
|
||||
{ "bel", "nl_be" },
|
||||
{ "belgian", "nl_be" },
|
||||
{ "belgium", "nl_be" },
|
||||
{ "bra", "pt-br" },
|
||||
{ "brazil", "pt-br" },
|
||||
{ "britain", "en_gb" },
|
||||
{ "can", "en_ca" },
|
||||
{ "canada", "en_ca" },
|
||||
{ "canadian", "en_ca" },
|
||||
{ "che", "de_ch" },
|
||||
{ "china", "zh_cn" },
|
||||
{ "chinese-simplified", "zh" },
|
||||
{ "chinese-traditional", "zh_tw" },
|
||||
{ "chinese", "zh" },
|
||||
{ "chn", "zh_cn" },
|
||||
{ "chs", "zh" },
|
||||
{ "cht", "zh_tw" },
|
||||
{ "csy", "cs" },
|
||||
{ "cze", "cs_cz" },
|
||||
{ "czech", "cs_cz" },
|
||||
{ "dan", "da" },
|
||||
{ "danish", "da" },
|
||||
{ "dea", "de_at" },
|
||||
{ "denmark", "da_dk" },
|
||||
{ "des", "de_ch" },
|
||||
{ "deu", "de" },
|
||||
{ "dnk", "da_dk" },
|
||||
{ "dutch-belgian", "nl_be" },
|
||||
{ "dutch", "nl" },
|
||||
{ "ell", "el" },
|
||||
{ "ena", "en_au" },
|
||||
{ "enc", "en_ca" },
|
||||
{ "eng", "eb_gb" },
|
||||
{ "england", "en_gb" },
|
||||
{ "english-american", "en_us" },
|
||||
{ "english-aus", "en_au" },
|
||||
{ "english-can", "en_ca" },
|
||||
{ "english-nz", "en_nz" },
|
||||
{ "english-uk", "eb_gb" },
|
||||
{ "english-us", "en_us" },
|
||||
{ "english-usa", "en_us" },
|
||||
{ "english", "en" },
|
||||
{ "enu", "en_us" },
|
||||
{ "enz", "en_nz" },
|
||||
{ "esm", "es-mx" },
|
||||
{ "esn", "es" },
|
||||
{ "esp", "es" },
|
||||
{ "fin", "fi" },
|
||||
{ "finland", "fi_fi" },
|
||||
{ "finnish", "fi" },
|
||||
{ "fra", "fr" },
|
||||
{ "france", "fr_fr" },
|
||||
{ "frb", "fr_be" },
|
||||
{ "frc", "fr_ca" },
|
||||
{ "french-belgian", "fr_be" },
|
||||
{ "french-canadian", "fr_ca" },
|
||||
{ "french-swiss", "fr_ch" },
|
||||
{ "french", "fr" },
|
||||
{ "frs", "fr_ch" },
|
||||
{ "gbr", "en_gb" },
|
||||
{ "german-austrian", "de_at" },
|
||||
{ "german-swiss", "de_ch" },
|
||||
{ "german", "de" },
|
||||
{ "germany", "de_de" },
|
||||
{ "grc", "el_gr" },
|
||||
{ "great britain", "en_gb" },
|
||||
{ "greece", "el_gr" },
|
||||
{ "greek", "el" },
|
||||
{ "hkg", "zh_hk" },
|
||||
{ "holland", "nl_nl" },
|
||||
{ "hong kong", "zh_hk" },
|
||||
{ "hong-kong", "zh_hk" },
|
||||
{ "hun", "hu" },
|
||||
{ "hungarian", "hu" },
|
||||
{ "hungary", "hu_hu" },
|
||||
{ "iceland", "is_is" },
|
||||
{ "icelandic", "is" },
|
||||
{ "ireland", "en_ie" },
|
||||
{ "irl", "en_ie" },
|
||||
{ "isl", "is" },
|
||||
{ "ita", "it" },
|
||||
{ "ita", "it_it" },
|
||||
{ "italian-swiss", "it_ch" },
|
||||
{ "italian", "it" },
|
||||
{ "italy", "it_it" },
|
||||
{ "its", "it_ch" },
|
||||
{ "japan", "ja_jp" },
|
||||
{ "japanese", "ja" },
|
||||
{ "jpn", "ja" },
|
||||
{ "kor", "ko" },
|
||||
{ "korea", "ko_kr" },
|
||||
{ "korean", "ko" },
|
||||
{ "mex", "es-mx" },
|
||||
{ "mexico", "es-mx" },
|
||||
{ "netherlands", "nl_nl" },
|
||||
{ "new zealand", "en_nz" },
|
||||
{ "new-zealand", "en_nz" },
|
||||
{ "nlb", "nl_be" },
|
||||
{ "nld", "nl" },
|
||||
{ "non", "nn" },
|
||||
{ "nor", "nb" },
|
||||
{ "norway", "no" },
|
||||
{ "norwegian-bokmal", "nb" },
|
||||
{ "norwegian-nynorsk", "nn" },
|
||||
{ "norwegian", "no" },
|
||||
{ "nz", "en_nz" },
|
||||
{ "nzl", "en_nz" },
|
||||
{ "plk", "pl" },
|
||||
{ "pol", "pl-pl" },
|
||||
{ "poland", "pl-pl" },
|
||||
{ "polish", "pl" },
|
||||
{ "portugal", "pt-pt" },
|
||||
{ "portuguese-brazil", "pt-br" },
|
||||
{ "portuguese", "pt" },
|
||||
{ "pr china", "zh_cn" },
|
||||
{ "pr-china", "zh_cn" },
|
||||
{ "prt", "pt-pt" },
|
||||
{ "ptb", "pt-br" },
|
||||
{ "ptg", "pt" },
|
||||
{ "rus", "ru" },
|
||||
{ "russia", "ru-ru" },
|
||||
{ "russian", "ru" },
|
||||
{ "sgp", "zh_sg" },
|
||||
{ "singapore", "zh_sg" },
|
||||
{ "sky", "sk" },
|
||||
{ "slovak", "sk" },
|
||||
{ "spain", "es-es" },
|
||||
{ "spanish-mexican", "es-mx" },
|
||||
{ "spanish-modern", "es" },
|
||||
{ "spanish", "es" },
|
||||
{ "sve", "sv" },
|
||||
{ "svk", "sk-sk" },
|
||||
{ "swe", "sv-se" },
|
||||
{ "sweden", "sv-se" },
|
||||
{ "swedish", "sv" },
|
||||
{ "swiss", "de_ch" },
|
||||
{ "switzerland", "de_ch" },
|
||||
{ "taiwan", "zh_tw" },
|
||||
{ "trk", "tr" },
|
||||
{ "tur", "tr-tr" },
|
||||
{ "turkey", "tr-tr" },
|
||||
{ "turkish", "tr" },
|
||||
{ "twn", "zh_tw" },
|
||||
{ "uk", "en_gb" },
|
||||
{ "united kingdom", "en_gb" },
|
||||
{ "united states", "en_us" },
|
||||
{ "united-kingdom", "en_gb" },
|
||||
{ "united-states", "en_us" },
|
||||
{ "us", "en_us" },
|
||||
{ "usa", "en_us" },
|
||||
|
||||
/* MUST be last. */
|
||||
{ NULL, NULL }
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* The real string lookup function.
|
||||
*/
|
||||
static ctmbstr tidyLocalizedStringImpl( uint messageType, languageDefinition *definition, uint plural )
|
||||
{
|
||||
int i;
|
||||
languageDictionary *dictionary = &definition->messages;
|
||||
uint pluralForm = definition->whichPluralForm(plural);
|
||||
|
||||
for (i = 0; (*dictionary)[i].value; ++i)
|
||||
{
|
||||
if ( (*dictionary)[i].key == messageType && (*dictionary)[i].pluralForm == pluralForm )
|
||||
{
|
||||
return (*dictionary)[i].value;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provides a string given `messageType` in the current
|
||||
* localization, returning the correct plural form given
|
||||
* `quantity`.
|
||||
*
|
||||
* This isn't currently highly optimized; rewriting some
|
||||
* of infrastructure to use hash lookups is a preferred
|
||||
* future optimization.
|
||||
*/
|
||||
ctmbstr TY_(tidyLocalizedStringN)( uint messageType, uint quantity )
|
||||
{
|
||||
ctmbstr result;
|
||||
|
||||
result = tidyLocalizedStringImpl( messageType, tidyLanguages.currentLanguage, quantity);
|
||||
|
||||
if (!result && tidyLanguages.fallbackLanguage )
|
||||
{
|
||||
result = tidyLocalizedStringImpl( messageType, tidyLanguages.fallbackLanguage, quantity);
|
||||
}
|
||||
|
||||
if (!result)
|
||||
{
|
||||
/* Fallback to en which is built in. */
|
||||
result = tidyLocalizedStringImpl( messageType, &language_en, quantity);
|
||||
}
|
||||
|
||||
if (!result)
|
||||
{
|
||||
/* Last resort: Fallback to en singular which is built in. */
|
||||
result = tidyLocalizedStringImpl( messageType, &language_en, 1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provides a string given `messageType` in the current
|
||||
* localization, in the non-plural form.
|
||||
*
|
||||
* This isn't currently highly optimized; rewriting some
|
||||
* of infrastructure to use hash lookups is a preferred
|
||||
* future optimization.
|
||||
*/
|
||||
ctmbstr TY_(tidyLocalizedString)( uint messageType )
|
||||
{
|
||||
return TY_(tidyLocalizedStringN)( messageType, 1 );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the POSIX name for a string. Result is a static char so please
|
||||
* don't try to free it. If the name looks like a cc_ll identifier, we will
|
||||
* return it if there's no other match.
|
||||
* @note this routine uses default allocator, see tidySetMallocCall.
|
||||
*/
|
||||
static tmbstr TY_(tidyNormalizedLocaleName)( ctmbstr locale )
|
||||
{
|
||||
uint i;
|
||||
uint len;
|
||||
static char result[6] = "xx_yy";
|
||||
TidyAllocator * allocator = &TY_(g_default_allocator);
|
||||
|
||||
tmbstr search = TY_(tmbstrdup)( allocator, locale );
|
||||
search = TY_(tmbstrtolower)(search);
|
||||
|
||||
/* See if our string matches a Windows name. */
|
||||
for (i = 0; localeMappings[i].winName; ++i)
|
||||
{
|
||||
if ( strcmp( localeMappings[i].winName, search ) == 0 )
|
||||
{
|
||||
TidyFree( allocator, search );
|
||||
search = TY_(tmbstrdup)( allocator, localeMappings[i].POSIXName );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* We're going to be stupid about this and trust the user, and
|
||||
return just the first two characters if they exist and the
|
||||
4th and 5th if they exist. The worst that can happen is a
|
||||
junk language that doesn't exist and won't be set. */
|
||||
|
||||
len = strlen( search );
|
||||
len = ( len <= 5 ? len : 5 );
|
||||
|
||||
for ( i = 0; i < len; i++ )
|
||||
{
|
||||
if ( i == 2 )
|
||||
{
|
||||
/* Either terminate the string or ensure there's an underscore */
|
||||
if (len == 5) {
|
||||
result[i] = '_';
|
||||
}
|
||||
else {
|
||||
result[i] = '\0';
|
||||
break; /* no need to copy after null */
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result[i] = tolower( search[i] );
|
||||
}
|
||||
}
|
||||
|
||||
TidyFree( allocator, search );
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the languageDefinition if the languageCode is installed in Tidy,
|
||||
* otherwise return NULL
|
||||
*/
|
||||
static languageDefinition *TY_(tidyTestLanguage)( ctmbstr languageCode )
|
||||
{
|
||||
uint i;
|
||||
languageDefinition *testLang;
|
||||
languageDictionary *testDict;
|
||||
ctmbstr testCode;
|
||||
|
||||
for (i = 0; tidyLanguages.languages[i]; ++i)
|
||||
{
|
||||
testLang = tidyLanguages.languages[i];
|
||||
testDict = &testLang->messages;
|
||||
testCode = (*testDict)[0].value;
|
||||
|
||||
if ( strcmp(testCode, languageCode) == 0 )
|
||||
return testLang;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tells Tidy to use a different language for output.
|
||||
* @param languageCode A Windows or POSIX language code, and must match
|
||||
* a TIDY_LANGUAGE for an installed language.
|
||||
* @result Indicates that a setting was applied, but not necessarily the
|
||||
* specific request, i.e., true indicates a language and/or region
|
||||
* was applied. If es_mx is requested but not installed, and es is
|
||||
* installed, then es will be selected and this function will return
|
||||
* true. However the opposite is not true; if es is requested but
|
||||
* not present, Tidy will not try to select from the es_XX variants.
|
||||
*/
|
||||
Bool TY_(tidySetLanguage)( ctmbstr languageCode )
|
||||
{
|
||||
languageDefinition *dict1 = NULL;
|
||||
languageDefinition *dict2 = NULL;
|
||||
tmbstr wantCode = NULL;
|
||||
char lang[3] = "";
|
||||
|
||||
if ( !languageCode || !(wantCode = TY_(tidyNormalizedLocaleName)( languageCode )) )
|
||||
{
|
||||
return no;
|
||||
}
|
||||
|
||||
/* We want to use the specified language as the currentLanguage, and set
|
||||
fallback language as necessary. We have either a two or five digit code,
|
||||
either or both of which might be installed. Let's test both of them:
|
||||
*/
|
||||
|
||||
dict1 = TY_(tidyTestLanguage( wantCode )); /* WANTED language */
|
||||
|
||||
if ( strlen( wantCode ) > 2 )
|
||||
{
|
||||
strncpy(lang, wantCode, 2);
|
||||
lang[2] = '\0';
|
||||
dict2 = TY_(tidyTestLanguage( lang ) ); /* BACKUP language? */
|
||||
}
|
||||
|
||||
if ( dict1 && dict2 )
|
||||
{
|
||||
tidyLanguages.currentLanguage = dict1;
|
||||
tidyLanguages.fallbackLanguage = dict2;
|
||||
}
|
||||
if ( dict1 && !dict2 )
|
||||
{
|
||||
tidyLanguages.currentLanguage = dict1;
|
||||
tidyLanguages.fallbackLanguage = NULL;
|
||||
}
|
||||
if ( !dict1 && dict2 )
|
||||
{
|
||||
tidyLanguages.currentLanguage = dict2;
|
||||
tidyLanguages.fallbackLanguage = NULL;
|
||||
}
|
||||
if ( !dict1 && !dict2 )
|
||||
{
|
||||
/* No change. */
|
||||
}
|
||||
|
||||
return dict1 || dict2;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the current language used by Tidy.
|
||||
*/
|
||||
ctmbstr TY_(tidyGetLanguage)()
|
||||
{
|
||||
languageDefinition *langDef = tidyLanguages.currentLanguage;
|
||||
languageDictionary *langDict = &langDef->messages;
|
||||
return (*langDict)[0].value;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Indicates whether or not the current language was set by a
|
||||
* LibTidy user (yes) or internally by the library (no).
|
||||
*/
|
||||
Bool TY_(tidyGetLanguageSetByUser)()
|
||||
{
|
||||
return tidyLanguages.manually_set;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Specifies to LibTidy that the user (rather than the library)
|
||||
* selected the current language.
|
||||
*/
|
||||
void TY_(tidySetLanguageSetByUser)( void )
|
||||
{
|
||||
tidyLanguages.manually_set = yes;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provides a string given `messageType` in the default
|
||||
* localization (which is `en`), for the given quantity.
|
||||
*/
|
||||
ctmbstr TY_(tidyDefaultStringN)( uint messageType, uint quantity )
|
||||
{
|
||||
return tidyLocalizedStringImpl( messageType, &language_en, quantity);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provides a string given `messageType` in the default
|
||||
* localization (which is `en`), for single plural form.
|
||||
*/
|
||||
ctmbstr TY_(tidyDefaultString)( uint messageType )
|
||||
{
|
||||
return tidyLocalizedStringImpl( messageType, &language_en, 1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines the true size of the `language_en` array indicating the
|
||||
* number of items in the array, _not_ the highest index.
|
||||
*/
|
||||
static const uint tidyStringKeyListSize()
|
||||
{
|
||||
static uint array_size = 0;
|
||||
|
||||
if ( array_size == 0 )
|
||||
{
|
||||
while ( language_en.messages[array_size].value != NULL ) {
|
||||
array_size++;
|
||||
}
|
||||
}
|
||||
|
||||
return array_size;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initializes the TidyIterator to point to the first item
|
||||
* in Tidy's list of localization string keys. Note that
|
||||
* these are provided for documentation generation purposes
|
||||
* and probably aren't useful for LibTidy implementors.
|
||||
*/
|
||||
TidyIterator TY_(getStringKeyList)()
|
||||
{
|
||||
return (TidyIterator)(size_t)1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Provides the next key value in Tidy's list of localized
|
||||
* strings. Note that these are provided for documentation
|
||||
* generation purposes and probably aren't useful to
|
||||
* libtidy implementors.
|
||||
*/
|
||||
uint TY_(getNextStringKey)( TidyIterator* iter )
|
||||
{
|
||||
uint item = 0;
|
||||
size_t itemIndex;
|
||||
assert( iter != NULL );
|
||||
|
||||
itemIndex = (size_t)*iter;
|
||||
|
||||
if ( itemIndex > 0 && itemIndex <= tidyStringKeyListSize() )
|
||||
{
|
||||
item = language_en.messages[ itemIndex - 1 ].key;
|
||||
itemIndex++;
|
||||
}
|
||||
|
||||
*iter = (TidyIterator)( itemIndex <= tidyStringKeyListSize() ? itemIndex : (size_t)0 );
|
||||
return item;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines the true size of the `localeMappings` array indicating the
|
||||
* number of items in the array, _not_ the highest index.
|
||||
*/
|
||||
static const uint tidyLanguageListSize()
|
||||
{
|
||||
static uint array_size = 0;
|
||||
|
||||
if ( array_size == 0 )
|
||||
{
|
||||
while ( localeMappings[array_size].winName ) {
|
||||
array_size++;
|
||||
}
|
||||
}
|
||||
|
||||
return array_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the TidyIterator to point to the first item
|
||||
* in Tidy's structure of Windows<->POSIX local mapping.
|
||||
* Items can be retrieved with getNextWindowsLanguage();
|
||||
*/
|
||||
TidyIterator TY_(getWindowsLanguageList)()
|
||||
{
|
||||
return (TidyIterator)(size_t)1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next record of type `localeMapItem` in
|
||||
* Tidy's structure of Windows<->POSIX local mapping.
|
||||
*/
|
||||
const tidyLocaleMapItemImpl *TY_(getNextWindowsLanguage)( TidyIterator *iter )
|
||||
{
|
||||
const tidyLocaleMapItemImpl *item = NULL;
|
||||
size_t itemIndex;
|
||||
assert( iter != NULL );
|
||||
|
||||
itemIndex = (size_t)*iter;
|
||||
|
||||
if ( itemIndex > 0 && itemIndex <= tidyLanguageListSize() )
|
||||
{
|
||||
item = &localeMappings[ itemIndex -1 ];
|
||||
itemIndex++;
|
||||
}
|
||||
|
||||
*iter = (TidyIterator)( itemIndex <= tidyLanguageListSize() ? itemIndex : (size_t)0 );
|
||||
return item;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a `tidyLocaleMapItemImpl, return the Windows name.
|
||||
*/
|
||||
ctmbstr TY_(TidyLangWindowsName)( const tidyLocaleMapItemImpl *item )
|
||||
{
|
||||
return item->winName;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a `tidyLocaleMapItemImpl, return the POSIX name.
|
||||
*/
|
||||
ctmbstr TY_(TidyLangPosixName)( const tidyLocaleMapItemImpl *item )
|
||||
{
|
||||
return item->POSIXName;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines the number of languages installed in Tidy.
|
||||
*/
|
||||
static const uint tidyInstalledLanguageListSize()
|
||||
{
|
||||
static uint array_size = 0;
|
||||
|
||||
if ( array_size == 0 )
|
||||
{
|
||||
while ( tidyLanguages.languages[array_size] ) {
|
||||
array_size++;
|
||||
}
|
||||
}
|
||||
|
||||
return array_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the TidyIterator to point to the first item
|
||||
* in Tidy's list of installed language codes.
|
||||
* Items can be retrieved with getNextInstalledLanguage();
|
||||
*/
|
||||
TidyIterator TY_(getInstalledLanguageList)()
|
||||
{
|
||||
return (TidyIterator)(size_t)1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next installed language.
|
||||
*/
|
||||
ctmbstr TY_(getNextInstalledLanguage)( TidyIterator* iter )
|
||||
{
|
||||
ctmbstr item = NULL;
|
||||
size_t itemIndex;
|
||||
assert( iter != NULL );
|
||||
|
||||
itemIndex = (size_t)*iter;
|
||||
|
||||
if ( itemIndex > 0 && itemIndex <= tidyInstalledLanguageListSize() )
|
||||
{
|
||||
item = tidyLanguages.languages[itemIndex - 1]->messages[0].value;
|
||||
itemIndex++;
|
||||
}
|
||||
|
||||
*iter = (TidyIterator)( itemIndex <= tidyInstalledLanguageListSize() ? itemIndex : (size_t)0 );
|
||||
return item;
|
||||
}
|
||||
|
||||
/*
|
||||
* end:
|
||||
*/
|
||||
|
223
third_party/tidy/language.h
vendored
Normal file
223
third_party/tidy/language.h
vendored
Normal file
|
@ -0,0 +1,223 @@
|
|||
#ifndef language_h
|
||||
#define language_h
|
||||
/* clang-format off */
|
||||
|
||||
/*********************************************************************
|
||||
* Localization support for HTML Tidy.
|
||||
*
|
||||
* This header provides the public (within libtidy) interface to
|
||||
* basic localization support. To add your own localization, create
|
||||
* a new `language_xx.h` file and add it to the struct in
|
||||
* `language.c`.
|
||||
*
|
||||
* (c) 2015 HTACG
|
||||
* See `tidy.h` for the copyright notice.
|
||||
*********************************************************************/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
|
||||
/** @name Exposed Data Structures */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/**
|
||||
* These enumerations are used within instances of `languageDefinition`
|
||||
* structures to provide additional metadata, and are localizable
|
||||
* therein.
|
||||
*/
|
||||
typedef enum {
|
||||
|
||||
/* Specifies the language code for a particular language. */
|
||||
TIDY_LANGUAGE = 400,
|
||||
|
||||
/* Marker for the last key in the structure. */
|
||||
TIDY_MESSAGE_TYPE_LAST
|
||||
|
||||
} tidyLanguage;
|
||||
|
||||
|
||||
/**
|
||||
* Describes a record for a localization string.
|
||||
* - key must correspond with one of Tidy's enums (see `tidyMessageTypes`
|
||||
* below)
|
||||
* - pluralForm corresponds to gettext plural forms case (not singularity).
|
||||
* Most entries should be case 0, representing the single case.:
|
||||
* https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
|
||||
*/
|
||||
typedef struct languageDictionaryEntry {
|
||||
uint key;
|
||||
uint pluralForm;
|
||||
ctmbstr value;
|
||||
} languageDictionaryEntry;
|
||||
|
||||
|
||||
/**
|
||||
* For now we'll just use an array to hold all of the dictionary
|
||||
* entries. In the future we can convert this to a hash structure
|
||||
* which will make looking up strings faster.
|
||||
*/
|
||||
typedef languageDictionaryEntry const languageDictionary[600];
|
||||
|
||||
|
||||
/**
|
||||
* Finally, a complete language definition. The item `pluralForm`
|
||||
* is a function pointer that will provide the correct plural
|
||||
* form given the value `n`. The actual function is present in
|
||||
* each language header and is language dependent.
|
||||
*/
|
||||
typedef struct languageDefinition {
|
||||
uint (*whichPluralForm)(uint n);
|
||||
languageDictionary messages;
|
||||
} languageDefinition;
|
||||
|
||||
|
||||
/**
|
||||
* The function getNextWindowsLanguage() returns pointers to this type;
|
||||
* it gives LibTidy implementors the ability to determine how Windows
|
||||
* locale names are mapped to POSIX language codes.
|
||||
*/
|
||||
typedef struct tidyLocaleMapItemImpl {
|
||||
ctmbstr winName;
|
||||
ctmbstr POSIXName;
|
||||
} tidyLocaleMapItemImpl;
|
||||
|
||||
|
||||
/** @} */
|
||||
/** @name Localization Related Functions */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/**
|
||||
** Determines the current locale without affecting the C locale.
|
||||
** Tidy has always used the default C locale, and at this point
|
||||
** in its development we're not going to tamper with that.
|
||||
** @param result The buffer to use to return the string.
|
||||
** Returns NULL on failure.
|
||||
** @return The same buffer for convenience.
|
||||
*/
|
||||
tmbstr TY_(tidySystemLocale)(tmbstr result);
|
||||
|
||||
/**
|
||||
* Tells Tidy to use a different language for output.
|
||||
* @param languageCode A Windows or POSIX language code, and must match
|
||||
* a TIDY_LANGUAGE for an installed language.
|
||||
* @result Indicates that a setting was applied, but not necessarily the
|
||||
* specific request, i.e., true indicates a language and/or region
|
||||
* was applied. If es_mx is requested but not installed, and es is
|
||||
* installed, then es will be selected and this function will return
|
||||
* true. However the opposite is not true; if es is requested but
|
||||
* not present, Tidy will not try to select from the es_XX variants.
|
||||
*/
|
||||
Bool TY_(tidySetLanguage)( ctmbstr languageCode );
|
||||
|
||||
/**
|
||||
* Gets the current language used by Tidy.
|
||||
*/
|
||||
ctmbstr TY_(tidyGetLanguage)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Indicates whether or not the current language was set by a
|
||||
* LibTidy user or internally by the library. This flag prevents
|
||||
* subsequently created instances of TidyDocument from changing the
|
||||
* user's language.
|
||||
* @returns Returns yes to indicate that the current language was
|
||||
* specified by an API user.
|
||||
*/
|
||||
Bool TY_(tidyGetLanguageSetByUser)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Specifies to LibTidy that the user (rather than the library)
|
||||
* selected the current language. This flag prevents subsequently
|
||||
* created instances of TidyDocument from changing the user's language.
|
||||
*/
|
||||
void TY_(tidySetLanguageSetByUser)( void );
|
||||
|
||||
|
||||
/**
|
||||
* Provides a string given `messageType` in the current
|
||||
* localization for `quantity`.
|
||||
*/
|
||||
ctmbstr TY_(tidyLocalizedStringN)( uint messageType, uint quantity );
|
||||
|
||||
/**
|
||||
* Provides a string given `messageType` in the current
|
||||
* localization for the single case.
|
||||
*/
|
||||
ctmbstr TY_(tidyLocalizedString)( uint messageType );
|
||||
|
||||
|
||||
/** @} */
|
||||
/** @name Documentation Generation */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/**
|
||||
* Provides a string given `messageType` in the default
|
||||
* localization (which is `en`), for the given quantity.
|
||||
*/
|
||||
ctmbstr TY_(tidyDefaultStringN)( uint messageType, uint quantity );
|
||||
|
||||
/**
|
||||
* Provides a string given `messageType` in the default
|
||||
* localization (which is `en`).
|
||||
*/
|
||||
ctmbstr TY_(tidyDefaultString)( uint messageType );
|
||||
|
||||
/*
|
||||
* Initializes the TidyIterator to point to the first item
|
||||
* in Tidy's list of localization string keys. Note that
|
||||
* these are provided for documentation generation purposes
|
||||
* and probably aren't useful for LibTidy implementors.
|
||||
*/
|
||||
TidyIterator TY_(getStringKeyList)(void);
|
||||
|
||||
/*
|
||||
* Provides the next key value in Tidy's list of localized
|
||||
* strings. Note that these are provided for documentation
|
||||
* generation purposes and probably aren't useful to
|
||||
* libtidy implementors.
|
||||
*/
|
||||
uint TY_(getNextStringKey)( TidyIterator* iter );
|
||||
|
||||
/**
|
||||
* Initializes the TidyIterator to point to the first item
|
||||
* in Tidy's structure of Windows<->POSIX local mapping.
|
||||
* Items can be retrieved with getNextWindowsLanguage();
|
||||
*/
|
||||
TidyIterator TY_(getWindowsLanguageList)(void);
|
||||
|
||||
/**
|
||||
* Returns the next record of type `localeMapItem` in
|
||||
* Tidy's structure of Windows<->POSIX local mapping.
|
||||
*/
|
||||
const tidyLocaleMapItemImpl *TY_(getNextWindowsLanguage)( TidyIterator* iter );
|
||||
|
||||
/**
|
||||
* Given a `tidyLocaleMapItemImpl, return the Windows name.
|
||||
*/
|
||||
ctmbstr TY_(TidyLangWindowsName)( const tidyLocaleMapItemImpl *item );
|
||||
|
||||
/**
|
||||
* Given a `tidyLocaleMapItemImpl, return the POSIX name.
|
||||
*/
|
||||
ctmbstr TY_(TidyLangPosixName)( const tidyLocaleMapItemImpl *item );
|
||||
|
||||
/**
|
||||
* Initializes the TidyIterator to point to the first item
|
||||
* in Tidy's list of installed language codes.
|
||||
* Items can be retrieved with getNextInstalledLanguage();
|
||||
*/
|
||||
TidyIterator TY_(getInstalledLanguageList)(void);
|
||||
|
||||
/**
|
||||
* Returns the next installed language.
|
||||
*/
|
||||
ctmbstr TY_(getNextInstalledLanguage)( TidyIterator* iter );
|
||||
|
||||
|
||||
/** @} */
|
||||
|
||||
#endif /* language_h */
|
2507
third_party/tidy/language_en.inc
vendored
Normal file
2507
third_party/tidy/language_en.inc
vendored
Normal file
File diff suppressed because it is too large
Load diff
4551
third_party/tidy/lexer.c
vendored
Normal file
4551
third_party/tidy/lexer.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
750
third_party/tidy/lexer.h
vendored
Normal file
750
third_party/tidy/lexer.h
vendored
Normal file
|
@ -0,0 +1,750 @@
|
|||
#ifndef __LEXER_H__
|
||||
#define __LEXER_H__
|
||||
/* clang-format off */
|
||||
|
||||
|
||||
/**************************************************************************//**
|
||||
* @file
|
||||
* Lexer for HTML and XML Parsers.
|
||||
*
|
||||
* Given an input source, it returns a sequence of tokens.
|
||||
*
|
||||
* GetToken(source) gets the next token
|
||||
* UngetToken(source) provides one level undo
|
||||
*
|
||||
* The tags include an attribute list:
|
||||
*
|
||||
* - linked list of attribute/value nodes
|
||||
* - each node has 2 NULL-terminated strings.
|
||||
* - entities are replaced in attribute values
|
||||
*
|
||||
* white space is compacted if not in preformatted mode
|
||||
* If not in preformatted mode then leading white space
|
||||
* is discarded and subsequent white space sequences
|
||||
* compacted to single space characters.
|
||||
*
|
||||
* If XmlTags is no then Tag names are folded to upper
|
||||
* case and attribute names to lower case.
|
||||
*
|
||||
* Not yet done:
|
||||
* - Doctype subset and marked sections
|
||||
*
|
||||
* @author HTACG, et al (consult git log)
|
||||
*
|
||||
* @copyright
|
||||
* (c) 1998-2021 (W3C) MIT, ERCIM, Keio University, and HTACG.
|
||||
* See tidy.h for the copyright notice.
|
||||
* @par
|
||||
* All Rights Reserved.
|
||||
* @par
|
||||
* See `tidy.h` for the complete license.
|
||||
*
|
||||
* @date Additional updates: consult git log
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
/** @addtogroup internal_api */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup lexer_h HTML and XML Lexing
|
||||
**
|
||||
** These functions and structures form the internal API for document
|
||||
** lexing.
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Lexer character types.
|
||||
*/
|
||||
#define digit 1u
|
||||
#define letter 2u
|
||||
#define namechar 4u
|
||||
#define white 8u
|
||||
#define newline 16u
|
||||
#define lowercase 32u
|
||||
#define uppercase 64u
|
||||
#define digithex 128u
|
||||
|
||||
|
||||
/**
|
||||
* node->type is one of these values
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RootNode,
|
||||
DocTypeTag,
|
||||
CommentTag,
|
||||
ProcInsTag,
|
||||
TextNode,
|
||||
StartTag,
|
||||
EndTag,
|
||||
StartEndTag,
|
||||
CDATATag,
|
||||
SectionTag,
|
||||
AspTag,
|
||||
JsteTag,
|
||||
PhpTag,
|
||||
XmlDecl
|
||||
} NodeType;
|
||||
|
||||
|
||||
/**
|
||||
* Lexer GetToken() states.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
LEX_CONTENT,
|
||||
LEX_GT,
|
||||
LEX_ENDTAG,
|
||||
LEX_STARTTAG,
|
||||
LEX_COMMENT,
|
||||
LEX_DOCTYPE,
|
||||
LEX_PROCINSTR,
|
||||
LEX_CDATA,
|
||||
LEX_SECTION,
|
||||
LEX_ASP,
|
||||
LEX_JSTE,
|
||||
LEX_PHP,
|
||||
LEX_XMLDECL
|
||||
} LexerState;
|
||||
|
||||
|
||||
/**
|
||||
* ParseDocTypeDecl state constants.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
DT_INTERMEDIATE,
|
||||
DT_DOCTYPENAME,
|
||||
DT_PUBLICSYSTEM,
|
||||
DT_QUOTEDSTRING,
|
||||
DT_INTSUBSET
|
||||
} ParseDocTypeDeclState;
|
||||
|
||||
|
||||
/**
|
||||
* Content model shortcut encoding.
|
||||
* Descriptions are tentative.
|
||||
*/
|
||||
#define CM_UNKNOWN 0
|
||||
#define CM_EMPTY (1 << 0) /**< Elements with no content. Map to HTML specification. */
|
||||
#define CM_HTML (1 << 1) /**< Elements that appear outside of "BODY". */
|
||||
#define CM_HEAD (1 << 2) /**< Elements that can appear within HEAD. */
|
||||
#define CM_BLOCK (1 << 3) /**< HTML "block" elements. */
|
||||
#define CM_INLINE (1 << 4) /**< HTML "inline" elements. */
|
||||
#define CM_LIST (1 << 5) /**< Elements that mark list item ("LI"). */
|
||||
#define CM_DEFLIST (1 << 6) /**< Elements that mark definition list item ("DL", "DT"). */
|
||||
#define CM_TABLE (1 << 7) /**< Elements that can appear inside TABLE. */
|
||||
#define CM_ROWGRP (1 << 8) /**< Used for "THEAD", "TFOOT" or "TBODY". */
|
||||
#define CM_ROW (1 << 9) /**< Used for "TD", "TH" */
|
||||
#define CM_FIELD (1 << 10) /**< Elements whose content must be protected against white space movement. Includes some elements that can found in forms. */
|
||||
#define CM_OBJECT (1 << 11) /**< Used to avoid propagating inline emphasis inside some elements such as OBJECT or APPLET. */
|
||||
#define CM_PARAM (1 << 12) /**< Elements that allows "PARAM". */
|
||||
#define CM_FRAMES (1 << 13) /**< "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
|
||||
#define CM_HEADING (1 << 14) /**< Heading elements (h1, h2, ...). */
|
||||
#define CM_OPT (1 << 15) /**< Elements with an optional end tag. */
|
||||
#define CM_IMG (1 << 16) /**< Elements that use "align" attribute for vertical position. */
|
||||
#define CM_MIXED (1 << 17) /**< Elements with inline and block model. Used to avoid calling InlineDup. */
|
||||
#define CM_NO_INDENT (1 << 18) /**< Elements whose content needs to be indented only if containing one CM_BLOCK element. */
|
||||
#define CM_OBSOLETE (1 << 19) /**< Elements that are obsolete (such as "dir", "menu"). */
|
||||
#define CM_NEW (1 << 20) /**< User defined elements. Used to determine how attributes without value should be printed. */
|
||||
#define CM_OMITST (1 << 21) /**< Elements that cannot be omitted. */
|
||||
#define CM_VOID (1 << 22) /**< Elements that are void per https://www.w3.org/TR/2011/WD-html-markup-20110113/syntax.html#syntax-elements. */
|
||||
|
||||
|
||||
/**
|
||||
* If the document uses just HTML 2.0 tags and attributes described
|
||||
* it is HTML 2.0. Similarly for HTML 3.2 and the 3 flavors of HTML 4.0.
|
||||
* If there are proprietary tags and attributes then describe it as
|
||||
* HTML Proprietary. If it includes the xml-lang or xmlns attributes
|
||||
* but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the
|
||||
* flavors of Voyager (strict, loose or frameset).
|
||||
*/
|
||||
|
||||
/* unknown */
|
||||
#define xxxx 0u
|
||||
|
||||
/* W3C defined HTML/XHTML family document types */
|
||||
#define HT20 1u
|
||||
#define HT32 2u
|
||||
#define H40S 4u
|
||||
#define H40T 8u
|
||||
#define H40F 16u
|
||||
#define H41S 32u
|
||||
#define H41T 64u
|
||||
#define H41F 128u
|
||||
#define X10S 256u
|
||||
#define X10T 512u
|
||||
#define X10F 1024u
|
||||
#define XH11 2048u
|
||||
#define XB10 4096u
|
||||
|
||||
/* proprietary stuff */
|
||||
#define VERS_SUN 8192u
|
||||
#define VERS_NETSCAPE 16384u
|
||||
#define VERS_MICROSOFT 32768u
|
||||
|
||||
/* special flag */
|
||||
#define VERS_XML 65536u
|
||||
|
||||
/* HTML5 */
|
||||
#define HT50 131072u
|
||||
#define XH50 262144u
|
||||
|
||||
/* compatibility symbols */
|
||||
#define VERS_UNKNOWN (xxxx)
|
||||
#define VERS_HTML20 (HT20)
|
||||
#define VERS_HTML32 (HT32)
|
||||
#define VERS_HTML40_STRICT (H40S|H41S|X10S)
|
||||
#define VERS_HTML40_LOOSE (H40T|H41T|X10T)
|
||||
#define VERS_FRAMESET (H40F|H41F|X10F)
|
||||
#define VERS_XHTML11 (XH11)
|
||||
#define VERS_BASIC (XB10)
|
||||
/* HTML5 */
|
||||
#define VERS_HTML5 (HT50|XH50)
|
||||
|
||||
/* meta symbols */
|
||||
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET)
|
||||
#define VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET)
|
||||
#define VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME)
|
||||
#define VERS_EVENTS (VERS_HTML40|VERS_XHTML11)
|
||||
#define VERS_FROM32 (VERS_HTML32|VERS_HTML40|HT50)
|
||||
#define VERS_FROM40 (VERS_HTML40|VERS_XHTML11|VERS_BASIC|VERS_HTML5)
|
||||
#define VERS_XHTML (X10S|X10T|X10F|XH11|XB10|XH50)
|
||||
|
||||
/* strict */
|
||||
#define VERS_STRICT (VERS_HTML5|VERS_HTML40_STRICT)
|
||||
|
||||
/* all W3C defined document types */
|
||||
#define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50)
|
||||
|
||||
/* all proprietary types */
|
||||
#define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)
|
||||
|
||||
|
||||
/**
|
||||
* Linked list of class names and styles
|
||||
*/
|
||||
struct _Style;
|
||||
typedef struct _Style TagStyle;
|
||||
|
||||
struct _Style
|
||||
{
|
||||
tmbstr tag;
|
||||
tmbstr tag_class;
|
||||
tmbstr properties;
|
||||
TagStyle *next;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Linked list of style properties
|
||||
*/
|
||||
struct _StyleProp;
|
||||
typedef struct _StyleProp StyleProp;
|
||||
|
||||
struct _StyleProp
|
||||
{
|
||||
tmbstr name;
|
||||
tmbstr value;
|
||||
StyleProp *next;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Attribute/Value linked list node
|
||||
*/
|
||||
struct _AttVal
|
||||
{
|
||||
AttVal* next;
|
||||
const Attribute* dict;
|
||||
Node* asp;
|
||||
Node* php;
|
||||
int delim;
|
||||
tmbstr attribute;
|
||||
tmbstr value;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Mosaic handles inlines via a separate stack from other elements
|
||||
* We duplicate this to recover from inline markup errors such as:
|
||||
* ~~~
|
||||
* <i>italic text
|
||||
* <p>more italic text</b> normal text
|
||||
* ~~~
|
||||
* which for compatibility with Mosaic is mapped to:
|
||||
* ~~~
|
||||
* <i>italic text</i>
|
||||
* <p><i>more italic text</i> normal text
|
||||
* ~~~
|
||||
* Note that any inline end tag pop's the effect of the current
|
||||
* inline start tag, so that `</b>` pop's `<i>` in the above example.
|
||||
*/
|
||||
struct _IStack
|
||||
{
|
||||
IStack* next;
|
||||
const Dict* tag; /**< tag's dictionary definition */
|
||||
tmbstr element; /**< name (NULL for text nodes) */
|
||||
AttVal* attributes;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl, etc., etc.
|
||||
*/
|
||||
struct _Node
|
||||
{
|
||||
Node* parent; /**< tree structure */
|
||||
Node* prev;
|
||||
Node* next;
|
||||
Node* content;
|
||||
Node* last;
|
||||
|
||||
AttVal* attributes;
|
||||
const Dict* was; /**< old tag when it was changed */
|
||||
const Dict* tag; /**< tag's dictionary definition */
|
||||
|
||||
tmbstr element; /**< name (NULL for text nodes) */
|
||||
|
||||
uint start; /**< start of span onto text array */
|
||||
uint end; /**< end of span onto text array */
|
||||
NodeType type; /**< TextNode, StartTag, EndTag etc. */
|
||||
|
||||
uint line; /**< current line of document */
|
||||
uint column; /**< current column of document */
|
||||
|
||||
int idx; /**< general purpose register */
|
||||
|
||||
Bool closed; /**< true if closed by explicit end tag */
|
||||
Bool implicit; /**< true if inferred */
|
||||
Bool linebreak; /**< true if followed by a line break */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* The following are private to the lexer.
|
||||
* Use `NewLexer()` to create a lexer, and `FreeLexer()` to free it.
|
||||
*/
|
||||
struct _Lexer
|
||||
{
|
||||
uint lines; /**< lines seen */
|
||||
uint columns; /**< at start of current token */
|
||||
Bool waswhite; /**< used to collapse contiguous white space */
|
||||
Bool pushed; /**< true after token has been pushed back */
|
||||
Bool insertspace; /**< when space is moved after end tag */
|
||||
Bool excludeBlocks; /**< Netscape compatibility */
|
||||
Bool exiled; /**< true if moved out of table */
|
||||
Bool isvoyager; /**< true if xmlns attribute on html element (i.e., "Voyager" was the W3C codename for XHTML). */
|
||||
uint versions; /**< bit vector of HTML versions */
|
||||
uint doctype; /**< version as given by doctype (if any) */
|
||||
uint versionEmitted; /**< version of doctype emitted */
|
||||
Bool bad_doctype; /**< e.g. if html or PUBLIC is missing */
|
||||
uint txtstart; /**< start of current node */
|
||||
uint txtend; /**< end of current node */
|
||||
LexerState state; /**< state of lexer's finite state machine */
|
||||
|
||||
Node* token; /**< last token returned by GetToken() */
|
||||
Node* itoken; /**< last duplicate inline returned by GetToken() */
|
||||
Node* root; /**< remember root node of the document */
|
||||
Node* parent; /**< remember parent node for CDATA elements */
|
||||
|
||||
Bool seenEndBody; /**< true if a `</body>` tag has been encountered */
|
||||
Bool seenEndHtml; /**< true if a `</html>` tag has been encountered */
|
||||
|
||||
/*
|
||||
Lexer character buffer
|
||||
|
||||
Parse tree nodes span onto this buffer
|
||||
which contains the concatenated text
|
||||
contents of all of the elements.
|
||||
|
||||
lexsize must be reset for each file.
|
||||
*/
|
||||
tmbstr lexbuf; /**< MB character buffer */
|
||||
uint lexlength; /**< allocated */
|
||||
uint lexsize; /**< used */
|
||||
|
||||
/* Inline stack for compatibility with Mosaic */
|
||||
Node* inode; /**< for deferring text node */
|
||||
IStack* insert; /**< for inferring inline tags */
|
||||
IStack* istack;
|
||||
uint istacklength; /**< allocated */
|
||||
uint istacksize; /**< used */
|
||||
uint istackbase; /**< start of frame */
|
||||
|
||||
TagStyle *styles; /**< used for cleaning up presentation markup */
|
||||
|
||||
TidyAllocator* allocator; /**< allocator */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* modes for GetToken()
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
IgnoreWhitespace, /**< */
|
||||
MixedContent, /**< for elements which don't accept PCDATA */
|
||||
Preformatted, /**< white space preserved as is */
|
||||
IgnoreMarkup, /**< for CDATA elements such as script, style */
|
||||
OtherNamespace, /**< */
|
||||
CdataContent /**< */
|
||||
} GetTokenMode;
|
||||
|
||||
|
||||
/** @name Lexer Functions
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Choose what version to use for new doctype
|
||||
*/
|
||||
int TY_(HTMLVersion)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Everything is allowed in proprietary version of HTML.
|
||||
* This is handled here rather than in the tag/attr dicts
|
||||
*/
|
||||
void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
|
||||
|
||||
Bool TY_(IsWhite)(uint c);
|
||||
Bool TY_(IsDigit)(uint c);
|
||||
Bool TY_(IsLetter)(uint c);
|
||||
Bool TY_(IsHTMLSpace)(uint c);
|
||||
Bool TY_(IsNewline)(uint c);
|
||||
Bool TY_(IsNamechar)(uint c);
|
||||
Bool TY_(IsXMLLetter)(uint c);
|
||||
Bool TY_(IsXMLNamechar)(uint c);
|
||||
|
||||
Bool TY_(IsUpper)(uint c);
|
||||
uint TY_(ToLower)(uint c);
|
||||
uint TY_(ToUpper)(uint c);
|
||||
|
||||
Lexer* TY_(NewLexer)( TidyDocImpl* doc );
|
||||
void TY_(FreeLexer)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Store character c as UTF-8 encoded byte stream
|
||||
*/
|
||||
void TY_(AddCharToLexer)( Lexer *lexer, uint c );
|
||||
|
||||
|
||||
/**
|
||||
* Used for elements and text nodes.
|
||||
* - Element name is NULL for text nodes.
|
||||
* - start and end are offsets into lexbuf,
|
||||
* which contains the textual content of
|
||||
* all elements in the parse tree.
|
||||
* - parent and content allow traversal
|
||||
* of the parse tree in any direction.
|
||||
* - attributes are represented as a linked
|
||||
* list of AttVal nodes which hold the
|
||||
* strings for attribute/value pairs.
|
||||
*/
|
||||
Node* TY_(NewNode)( TidyAllocator* allocator, Lexer* lexer );
|
||||
|
||||
|
||||
/**
|
||||
* Used to clone heading nodes when split by an `<HR>`
|
||||
*/
|
||||
Node* TY_(CloneNode)( TidyDocImpl* doc, Node *element );
|
||||
|
||||
|
||||
/**
|
||||
* Free node's attributes
|
||||
*/
|
||||
void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
|
||||
/**
|
||||
* Doesn't repair attribute list linkage
|
||||
*/
|
||||
void TY_(FreeAttribute)( TidyDocImpl* doc, AttVal *av );
|
||||
|
||||
|
||||
/**
|
||||
* Detach attribute from node
|
||||
*/
|
||||
void TY_(DetachAttribute)( Node *node, AttVal *attr );
|
||||
|
||||
|
||||
/**
|
||||
* Detach attribute from node then free it.
|
||||
*/
|
||||
void TY_(RemoveAttribute)( TidyDocImpl* doc, Node *node, AttVal *attr );
|
||||
|
||||
|
||||
/**
|
||||
* Free document nodes by iterating through peers and recursing
|
||||
* through children. Set `next` to `NULL` before calling `FreeNode()`
|
||||
* to avoid freeing peer nodes. Doesn't patch up prev/next links.
|
||||
*/
|
||||
void TY_(FreeNode)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
|
||||
Node* TY_(TextToken)( Lexer *lexer );
|
||||
|
||||
|
||||
/**
|
||||
* Used for creating preformatted text from Word2000.
|
||||
*/
|
||||
Node* TY_(NewLineNode)( Lexer *lexer );
|
||||
|
||||
|
||||
/**
|
||||
* Used for adding a for Word2000.
|
||||
*/
|
||||
Node* TY_(NewLiteralTextNode)(Lexer *lexer, ctmbstr txt );
|
||||
|
||||
|
||||
void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str );
|
||||
Node* TY_(FindDocType)( TidyDocImpl* doc );
|
||||
Node* TY_(FindHTML)( TidyDocImpl* doc );
|
||||
Node* TY_(FindHEAD)( TidyDocImpl* doc );
|
||||
Node* TY_(FindTITLE)(TidyDocImpl* doc);
|
||||
Node* TY_(FindBody)( TidyDocImpl* doc );
|
||||
Node* TY_(FindXmlDecl)(TidyDocImpl* doc);
|
||||
|
||||
|
||||
/**
|
||||
* Returns containing block element, if any
|
||||
*/
|
||||
Node* TY_(FindContainer)( Node* node );
|
||||
|
||||
|
||||
/**
|
||||
* Add meta element for Tidy.
|
||||
*/
|
||||
Bool TY_(AddGenerator)( TidyDocImpl* doc );
|
||||
|
||||
uint TY_(ApparentVersion)( TidyDocImpl* doc );
|
||||
|
||||
ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool isXhtml );
|
||||
|
||||
uint TY_(HTMLVersionNumberFromCode)( uint vers );
|
||||
|
||||
Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc );
|
||||
|
||||
Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Fixup doctype if missing.
|
||||
*/
|
||||
Bool TY_(FixDocType)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Ensure XML document starts with <?xml version="1.0"?>,and
|
||||
* add encoding attribute if not using ASCII or UTF-8 output.
|
||||
*/
|
||||
Bool TY_(FixXmlDecl)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
Node* TY_(InferredTag)(TidyDocImpl* doc, TidyTagId id);
|
||||
|
||||
void TY_(UngetToken)( TidyDocImpl* doc );
|
||||
|
||||
Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode );
|
||||
|
||||
void TY_(InitMap)(void);
|
||||
|
||||
|
||||
/**
|
||||
* Create a new attribute.
|
||||
*/
|
||||
AttVal* TY_(NewAttribute)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Create a new attribute with given name and value.
|
||||
*/
|
||||
AttVal* TY_(NewAttributeEx)( TidyDocImpl* doc, ctmbstr name, ctmbstr value,
|
||||
int delim );
|
||||
|
||||
|
||||
/**
|
||||
* Insert attribute at the end of attribute list of a node.
|
||||
*/
|
||||
void TY_(InsertAttributeAtEnd)( Node *node, AttVal *av );
|
||||
|
||||
/**
|
||||
* Insert attribute at the start of attribute list of a node.
|
||||
*/
|
||||
void TY_(InsertAttributeAtStart)( Node *node, AttVal *av );
|
||||
|
||||
|
||||
/** @}
|
||||
* @name Inline Stack Functions
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Duplicate attributes.
|
||||
*/
|
||||
AttVal* TY_(DupAttrs)( TidyDocImpl* doc, AttVal* attrs );
|
||||
|
||||
|
||||
/**
|
||||
* Push a copy of an inline node onto stack, but don't push if
|
||||
* implicit or OBJECT or APPLET (implicit tags are ones generated
|
||||
* from the istack).
|
||||
*
|
||||
* One issue arises with pushing inlines when the tag is already pushed.
|
||||
* For instance:
|
||||
* ~~~
|
||||
* <p><em>text
|
||||
* <p><em>more text
|
||||
* ~~~
|
||||
* Shouldn't be mapped to
|
||||
* ~~~
|
||||
* <p><em>text</em></p>
|
||||
* <p><em><em>more text</em></em>
|
||||
* ~~~
|
||||
*/
|
||||
void TY_(PushInline)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
|
||||
/**
|
||||
* Pop inline stack.
|
||||
*/
|
||||
void TY_(PopInline)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
|
||||
Bool TY_(IsPushed)( TidyDocImpl* doc, Node* node );
|
||||
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node );
|
||||
|
||||
|
||||
/**
|
||||
* This has the effect of inserting "missing" inline elements around the
|
||||
* contents of blocklevel elements such as P, TD, TH, DIV, PRE etc. This
|
||||
* procedure is called at the start of `ParseBlock`, when the inline
|
||||
* stack is not empty, as will be the case in:
|
||||
* ~~~
|
||||
* <i><h1>italic heading</h1></i>
|
||||
* ~~~
|
||||
* which is then treated as equivalent to
|
||||
* ~~~
|
||||
* <h1><i>italic heading</i></h1>
|
||||
* ~~~
|
||||
* This is implemented by setting the lexer into a mode where it gets
|
||||
* tokens from the inline stack rather than from the input stream.
|
||||
*/
|
||||
int TY_(InlineDup)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
|
||||
/**
|
||||
* Defer duplicates when entering a table or other
|
||||
* element where the inlines shouldn't be duplicated.
|
||||
*/
|
||||
void TY_(DeferDup)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
Node* TY_(InsertedToken)( TidyDocImpl* doc );
|
||||
|
||||
/**
|
||||
* Stack manipulation for inline elements
|
||||
*/
|
||||
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node );
|
||||
|
||||
|
||||
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element );
|
||||
|
||||
|
||||
/** @}
|
||||
* @name Generic stack of nodes.
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* This typedef represents a stack of addresses to nodes. Tidy uses these to
|
||||
* try to limit recursion by pushing nodes to a stack when possible instead
|
||||
* of recursing.
|
||||
*/
|
||||
typedef struct _Stack {
|
||||
int top; /**< Current top position. */
|
||||
unsigned capacity; /**< Current capacity. Can be expanded. */
|
||||
Node **firstNode; /** A pointer to the first pointer to a Node in an array of node addresses. */
|
||||
TidyAllocator* allocator; /**< Tidy's allocator, used at instantiation and expanding. */
|
||||
} Stack;
|
||||
|
||||
|
||||
/**
|
||||
* Create a new stack with a given starting capacity. If memory allocation
|
||||
* fails, then the allocator will panic the program automatically.
|
||||
*/
|
||||
Stack* TY_(newStack)(TidyDocImpl *doc, uint capacity);
|
||||
|
||||
|
||||
/**
|
||||
* Increase the stack size. This will be called automatically when the
|
||||
* current stack is full. If memory allocation fails, then the allocator
|
||||
* will panic the program automatically.
|
||||
*/
|
||||
void TY_(growStack)(Stack *stack);
|
||||
|
||||
|
||||
/**
|
||||
* Stack is full when top is equal to the last index.
|
||||
*/
|
||||
Bool TY_(stackFull)(Stack *stack);
|
||||
|
||||
|
||||
/**
|
||||
* Stack is empty when top is equal to -1
|
||||
*/
|
||||
Bool TY_(stackEmpty)(Stack *stack);
|
||||
|
||||
|
||||
/**
|
||||
* Push an item to the stack.
|
||||
*/
|
||||
void TY_(push)(Stack *stack, Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Pop an item from the stack.
|
||||
*/
|
||||
Node* TY_(pop)(Stack *stack);
|
||||
|
||||
|
||||
/**
|
||||
* Peek at the stack.
|
||||
*/
|
||||
Node* TY_(peek)(Stack *stack);
|
||||
|
||||
/**
|
||||
* Frees the stack when done.
|
||||
*/
|
||||
void TY_(freeStack)(Stack *stack);
|
||||
|
||||
|
||||
/** @}
|
||||
*/
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/** @} end parser_h group */
|
||||
/** @} end internal_api group */
|
||||
|
||||
#endif /* __LEXER_H__ */
|
333
third_party/tidy/mappedio.c
vendored
Normal file
333
third_party/tidy/mappedio.c
vendored
Normal file
|
@ -0,0 +1,333 @@
|
|||
/* clang-format off */
|
||||
/* Interface to mmap style I/O
|
||||
|
||||
(c) 2006-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Originally contributed by Cory Nelson and Nuno Lopes
|
||||
|
||||
*/
|
||||
|
||||
/* keep these here to keep file non-empty */
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/struct/stat.h"
|
||||
#include "libc/calls/struct/stat.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "third_party/tidy/mappedio.h"
|
||||
|
||||
#if SUPPORT_POSIX_MAPPED_FILES
|
||||
|
||||
#include "third_party/tidy/fileio.h"
|
||||
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TidyAllocator *allocator;
|
||||
const byte *base;
|
||||
size_t pos, size;
|
||||
} MappedFileSource;
|
||||
|
||||
static int mapped_getByte( void* sourceData )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) sourceData;
|
||||
return fin->base[fin->pos++];
|
||||
}
|
||||
|
||||
static Bool mapped_eof( void* sourceData )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) sourceData;
|
||||
return (fin->pos >= fin->size);
|
||||
}
|
||||
|
||||
static void mapped_ungetByte( void* sourceData, byte ARG_UNUSED(bv) )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) sourceData;
|
||||
fin->pos--;
|
||||
}
|
||||
|
||||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
|
||||
{
|
||||
MappedFileSource* fin;
|
||||
struct stat sbuf;
|
||||
int fd;
|
||||
|
||||
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
|
||||
if ( !fin )
|
||||
return -1;
|
||||
|
||||
fd = fileno(fp);
|
||||
if ( fstat(fd, &sbuf) == -1
|
||||
|| sbuf.st_size == 0
|
||||
|| (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ,
|
||||
MAP_SHARED, fd, 0)) == MAP_FAILED)
|
||||
{
|
||||
TidyFree( allocator, fin );
|
||||
/* Fallback on standard I/O */
|
||||
return TY_(initStdIOFileSource)( allocator, inp, fp );
|
||||
}
|
||||
|
||||
fin->pos = 0;
|
||||
fin->allocator = allocator;
|
||||
fclose(fp);
|
||||
|
||||
inp->getByte = mapped_getByte;
|
||||
inp->eof = mapped_eof;
|
||||
inp->ungetByte = mapped_ungetByte;
|
||||
inp->sourceData = fin;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
|
||||
{
|
||||
if ( inp->getByte == mapped_getByte )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
|
||||
munmap( (void*)fin->base, fin->size );
|
||||
TidyFree( fin->allocator, fin );
|
||||
}
|
||||
else
|
||||
TY_(freeStdIOFileSource)( inp, closeIt );
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_POSIX_MAPPED_FILES */
|
||||
|
||||
|
||||
#if defined(_WIN32)
|
||||
# if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */
|
||||
# pragma warning(disable:4115) /* named type definition in parentheses in windows headers */
|
||||
# endif
|
||||
# include "streamio.h"
|
||||
# include "tidy-int.h"
|
||||
# include "message.h"
|
||||
|
||||
typedef struct _fp_input_mapped_source
|
||||
{
|
||||
TidyAllocator *allocator;
|
||||
LONGLONG size, pos;
|
||||
HANDLE file, map;
|
||||
byte *view, *iter, *end;
|
||||
unsigned int gran;
|
||||
} MappedFileSource;
|
||||
|
||||
static int mapped_openView( MappedFileSource *data )
|
||||
{
|
||||
DWORD numb = ( ( data->size - data->pos ) > data->gran ) ?
|
||||
data->gran : (DWORD)( data->size - data->pos );
|
||||
|
||||
if ( data->view )
|
||||
{
|
||||
UnmapViewOfFile( data->view );
|
||||
data->view = NULL;
|
||||
}
|
||||
|
||||
data->view = MapViewOfFile( data->map, FILE_MAP_READ,
|
||||
(DWORD)( data->pos >> 32 ),
|
||||
(DWORD)data->pos, numb );
|
||||
|
||||
if ( !data->view ) return -1;
|
||||
|
||||
data->iter = data->view;
|
||||
data->end = data->iter + numb;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mapped_getByte( void *sourceData )
|
||||
{
|
||||
MappedFileSource *data = sourceData;
|
||||
|
||||
if ( !data->view || data->iter >= data->end )
|
||||
{
|
||||
data->pos += data->gran;
|
||||
|
||||
if ( data->pos >= data->size || mapped_openView(data) != 0 )
|
||||
return EndOfStream;
|
||||
}
|
||||
|
||||
return *( data->iter++ );
|
||||
}
|
||||
|
||||
static Bool mapped_eof( void *sourceData )
|
||||
{
|
||||
MappedFileSource *data = sourceData;
|
||||
return ( data->pos >= data->size );
|
||||
}
|
||||
|
||||
static void mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) )
|
||||
{
|
||||
MappedFileSource *data = sourceData;
|
||||
|
||||
if ( data->iter >= data->view )
|
||||
{
|
||||
--data->iter;
|
||||
return;
|
||||
}
|
||||
|
||||
if ( data->pos < data->gran )
|
||||
{
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
data->pos -= data->gran;
|
||||
mapped_openView( data );
|
||||
}
|
||||
|
||||
static int initMappedFileSource( TidyAllocator *allocator, TidyInputSource* inp, HANDLE fp )
|
||||
{
|
||||
MappedFileSource* fin = NULL;
|
||||
|
||||
inp->getByte = mapped_getByte;
|
||||
inp->eof = mapped_eof;
|
||||
inp->ungetByte = mapped_ungetByte;
|
||||
|
||||
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
|
||||
if ( !fin )
|
||||
return -1;
|
||||
|
||||
# if defined(__MINGW32__)
|
||||
{
|
||||
DWORD lowVal, highVal;
|
||||
lowVal = GetFileSize(fp, &highVal);
|
||||
if ((lowVal == INVALID_FILE_SIZE) && (GetLastError() != NO_ERROR))
|
||||
{
|
||||
TidyFree(allocator, fin);
|
||||
return -1;
|
||||
}
|
||||
fin->size = highVal;
|
||||
fin->size = (fin->size << 32);
|
||||
fin->size += lowVal;
|
||||
}
|
||||
# else /* NOT a MinGW build */
|
||||
# if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */
|
||||
{
|
||||
LARGE_INTEGER* pli = (LARGE_INTEGER *)&fin->size;
|
||||
(DWORD)pli->LowPart = GetFileSize( fp, (DWORD *)&pli->HighPart );
|
||||
if ( GetLastError() != NO_ERROR || fin->size <= 0 )
|
||||
{
|
||||
TidyFree(allocator, fin);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
# else
|
||||
if ( !GetFileSizeEx( fp, (LARGE_INTEGER*)&fin->size )
|
||||
|| fin->size <= 0 )
|
||||
{
|
||||
TidyFree(allocator, fin);
|
||||
return -1;
|
||||
}
|
||||
# endif
|
||||
# endif /* MinGW y/n */
|
||||
|
||||
fin->map = CreateFileMapping( fp, NULL, PAGE_READONLY, 0, 0, NULL );
|
||||
|
||||
if ( !fin->map )
|
||||
{
|
||||
TidyFree(allocator, fin);
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
SYSTEM_INFO info;
|
||||
GetSystemInfo( &info );
|
||||
fin->gran = info.dwAllocationGranularity;
|
||||
}
|
||||
|
||||
fin->allocator = allocator;
|
||||
fin->pos = 0;
|
||||
fin->view = NULL;
|
||||
fin->iter = NULL;
|
||||
fin->end = NULL;
|
||||
|
||||
if ( mapped_openView( fin ) != 0 )
|
||||
{
|
||||
CloseHandle( fin->map );
|
||||
TidyFree( allocator, fin );
|
||||
return -1;
|
||||
}
|
||||
|
||||
fin->file = fp;
|
||||
inp->sourceData = fin;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void freeMappedFileSource( TidyInputSource* inp, Bool closeIt )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
|
||||
if ( closeIt && fin && fin->file != INVALID_HANDLE_VALUE )
|
||||
{
|
||||
if ( fin->view )
|
||||
UnmapViewOfFile( fin->view );
|
||||
|
||||
CloseHandle( fin->map );
|
||||
CloseHandle( fin->file );
|
||||
}
|
||||
TidyFree( fin->allocator, fin );
|
||||
}
|
||||
|
||||
StreamIn* MappedFileInput ( TidyDocImpl* doc, HANDLE fp, int encoding )
|
||||
{
|
||||
StreamIn *in = TY_(initStreamIn)( doc, encoding );
|
||||
if ( initMappedFileSource( doc->allocator, &in->source, fp ) != 0 )
|
||||
{
|
||||
TY_(freeStreamIn)( in );
|
||||
return NULL;
|
||||
}
|
||||
in->iotype = FileIO;
|
||||
return in;
|
||||
}
|
||||
|
||||
|
||||
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ) {
|
||||
int status = -ENOENT;
|
||||
HANDLE fin = CreateFileA( filnam, GENERIC_READ, FILE_SHARE_READ, NULL,
|
||||
OPEN_EXISTING, 0, NULL );
|
||||
|
||||
# if PRESERVE_FILE_TIMES
|
||||
LONGLONG actime, modtime;
|
||||
TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
|
||||
|
||||
if ( fin != INVALID_HANDLE_VALUE && cfgBool(doc,TidyKeepFileTimes) &&
|
||||
GetFileTime(fin, NULL, (FILETIME*)&actime, (FILETIME*)&modtime) )
|
||||
{
|
||||
# define TY_I64(str) TYDYAPPEND(str,LL)
|
||||
# if _MSC_VER < 1300 && !defined(__GNUC__) /* less than msvc++ 7.0 */
|
||||
# undef TY_I64
|
||||
# define TY_I64(str) TYDYAPPEND(str,i64)
|
||||
# endif
|
||||
doc->filetimes.actime =
|
||||
(time_t)( ( actime - TY_I64(116444736000000000)) / 10000000 );
|
||||
|
||||
doc->filetimes.modtime =
|
||||
(time_t)( ( modtime - TY_I64(116444736000000000)) / 10000000 );
|
||||
}
|
||||
# endif /* PRESERVE_FILE_TIMES */
|
||||
|
||||
if ( fin != INVALID_HANDLE_VALUE )
|
||||
{
|
||||
StreamIn* in = MappedFileInput( doc, fin,
|
||||
cfg( doc, TidyInCharEncoding ) );
|
||||
if ( !in )
|
||||
{
|
||||
CloseHandle( fin );
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
status = TY_(DocParseStream)( doc, in );
|
||||
freeMappedFileSource( &in->source, yes );
|
||||
TY_(freeStreamIn)( in );
|
||||
}
|
||||
else /* Error message! */
|
||||
TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
|
||||
return status;
|
||||
}
|
||||
|
||||
#endif /* defined(_WIN32) */
|
||||
|
16
third_party/tidy/mappedio.h
vendored
Normal file
16
third_party/tidy/mappedio.h
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
#ifndef __TIDY_MAPPED_IO_H__
|
||||
#define __TIDY_MAPPED_IO_H__
|
||||
/* clang-format off */
|
||||
|
||||
/* Interface to mmap style I/O
|
||||
|
||||
(c) 2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#if defined(_WIN32)
|
||||
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam );
|
||||
#endif
|
||||
|
||||
#endif /* __TIDY_MAPPED_IO_H__ */
|
1612
third_party/tidy/message.c
vendored
Normal file
1612
third_party/tidy/message.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
318
third_party/tidy/message.h
vendored
Normal file
318
third_party/tidy/message.h
vendored
Normal file
|
@ -0,0 +1,318 @@
|
|||
#ifndef __MESSAGE_H__
|
||||
#define __MESSAGE_H__
|
||||
/* clang-format off */
|
||||
|
||||
/******************************************************************************
|
||||
* @file
|
||||
* Provides General Message Writing Routines
|
||||
*
|
||||
* This module handles LibTidy's high level output routines, as well as
|
||||
* provides lookup functions and management for keys used for retrieval
|
||||
* of these messages.
|
||||
*
|
||||
* LibTidy emits two general types of output:
|
||||
*
|
||||
* - Reports, which contain data relating to what Tidy discovered in your
|
||||
* source file, and/or what Tidy did to your source file. In some cases
|
||||
* general information about your source file is emitted as well. Reports
|
||||
* are emitted in the current output buffer, but LibTidy users will probably
|
||||
* prefer to hook into a callback in order to take advantage of the data
|
||||
* that are available in a more flexible way.
|
||||
*
|
||||
* - Dialogue, consisting of footnotes related to your source file, and of
|
||||
* general information that's not related to your source file in particular.
|
||||
* This is also written to the current output buffer when appropriate, and
|
||||
* available via callbacks.
|
||||
*
|
||||
* Report information typically takes the form of a warning, an error, info,
|
||||
* etc., and the output routines keep track of the count of these.
|
||||
*
|
||||
* The preferred way of handling Tidy diagnostics output is either
|
||||
* - define a new output sink, or
|
||||
* - use a message filter callback routine.
|
||||
*
|
||||
* @author HTACG, et al (consult git log)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
|
||||
* Institute of Technology, European Research Consortium for Informatics
|
||||
* and Mathematics, Keio University) and HTACG.
|
||||
* @par
|
||||
* All Rights Reserved.
|
||||
* @par
|
||||
* See `tidy.h` for the complete license.
|
||||
*
|
||||
* @date Additional updates: consult git log
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "third_party/tidy/config.h"
|
||||
|
||||
/** @addtogroup internal_api */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup message_releaseinfo Tidy Release Information
|
||||
**
|
||||
** These functions return information about the current release version date
|
||||
** and version number. Note that the latest release date or the highest
|
||||
** version number alone do not guarantee the latest Tidy release, as we may
|
||||
** backport important fixes to older releases of Tidy.
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
/**
|
||||
* Returns the release date of this instance of HTML Tidy.
|
||||
*/
|
||||
ctmbstr TY_(ReleaseDate)(void);
|
||||
|
||||
/**
|
||||
* Returns the release version of this instance of HTML Tidy.
|
||||
*/
|
||||
ctmbstr TY_(tidyLibraryVersion)(void);
|
||||
|
||||
|
||||
/** @} message_releaseinfo group */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup message_reporting Report and Dialogue Writing Functions
|
||||
**
|
||||
** These simple functions perform the vast majority of Tidy's output, and
|
||||
** one these should be your first choice when adding your own output.
|
||||
**
|
||||
** A report is typically diagnostic output that is generated each time Tidy
|
||||
** detects an issue in your document or makes a change. A dialogue is a piece
|
||||
** of information such as a summary, a footnote, or other non-tabular data.
|
||||
** Some of these functions emit multiple reports or dialogue in order to
|
||||
** effect a summary.
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
/** @name General Report Writing
|
||||
** If one of the convenience reporting functions does not fit your required
|
||||
** message signature, then this designated reporting function will fit the
|
||||
** bill. Be sure to see if a message formatter exists that can handle the
|
||||
** variable arguments.
|
||||
*/
|
||||
/** @{ */
|
||||
|
||||
|
||||
/**
|
||||
* The designated report writing function. When a proper formatter exists,
|
||||
* this one function can handle all report output.
|
||||
*/
|
||||
void TY_(Report)(TidyDocImpl* doc, Node *element, Node *node, uint code, ...);
|
||||
|
||||
|
||||
/** @} */
|
||||
/** @name Convenience Reporting Functions
|
||||
** These convenience reporting functions are able to handle the bulk of Tidy's
|
||||
** necessary reporting, and avoid the danger of using a variadic if you are
|
||||
** unfamiliar with Tidy.
|
||||
*/
|
||||
/** @{ */
|
||||
|
||||
|
||||
void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code );
|
||||
void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code);
|
||||
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
|
||||
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c );
|
||||
void TY_(ReportFileError)( TidyDocImpl* doc, ctmbstr file, uint code );
|
||||
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
|
||||
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding);
|
||||
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name );
|
||||
void TY_(ReportSurrogateError)(TidyDocImpl* doc, uint code, uint c1, uint c2);
|
||||
void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option );
|
||||
|
||||
|
||||
/** @} */
|
||||
/** @name General Dialogue Writing
|
||||
** These functions produce dialogue output such as individual messages, or
|
||||
** several messages in summary form.
|
||||
*/
|
||||
/** @{ */
|
||||
|
||||
|
||||
/**
|
||||
* Emits a single dialogue message, and is capable of accepting a variadic
|
||||
* that is passed to the correct message formatter as needed.
|
||||
*/
|
||||
void TY_(Dialogue)( TidyDocImpl* doc, uint code, ... );
|
||||
|
||||
|
||||
/** @} */
|
||||
/** @name Output Dialogue Information */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/**
|
||||
* Outputs the footnotes and other dialogue information after document cleanup
|
||||
* is complete. LibTidy users might consider capturing these individually in
|
||||
* the message callback rather than capturing this entire buffer.
|
||||
* Called by `tidyErrorSummary()`, in console.
|
||||
* @todo: This name is a bit misleading and should probably be renamed to
|
||||
* indicate its focus on printing footnotes.
|
||||
*/
|
||||
void TY_(ErrorSummary)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Outputs document HTML version and version-related information as the final
|
||||
* report(s) in the report table.
|
||||
* Called by `tidyRunDiagnostics()`, from console.
|
||||
* Called by `tidyDocReportDoctype()`, currently unused.
|
||||
*/
|
||||
void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Reports the number of warnings and errors found in the document as dialogue
|
||||
* information.
|
||||
* Called by `tidyRunDiagnostics()`, from console.
|
||||
*/
|
||||
void TY_(ReportNumWarnings)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** @} */
|
||||
/** @} message_reporting group */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup message_mutinging Message Muting
|
||||
**
|
||||
** Message types included in the `mute` option will be be printed in
|
||||
** messageOut().
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
/** Maintains a list of messages not to display. */
|
||||
typedef struct _mutedMessages {
|
||||
tidyStrings* list; /**< A list of messages that won't be output. */
|
||||
uint count; /**< Current count of the list. */
|
||||
uint capacity; /**< Current capacity of the list. */
|
||||
} TidyMutedMessages;
|
||||
|
||||
|
||||
/** Frees the list of muted messages.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(FreeMutedMessageList)( TidyDocImpl* doc );
|
||||
|
||||
/** Adds a new message ID to the list of muted messages.
|
||||
** @param doc The Tidy document.
|
||||
** @param opt The option that is defining the muted message.
|
||||
** @param name The message code as a string.
|
||||
*/
|
||||
void TY_(DefineMutedMessage)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name );
|
||||
|
||||
/** Start an iterator for muted messages.
|
||||
** @param doc The Tidy document.
|
||||
** @returns Returns an iterator token.
|
||||
*/
|
||||
TidyIterator TY_(getMutedMessageList)( TidyDocImpl* doc );
|
||||
|
||||
/** Get the next priority attribute.
|
||||
** @param doc The Tidy document.
|
||||
** @param iter The iterator token.
|
||||
** @returns The next priority attribute.
|
||||
*/
|
||||
ctmbstr TY_(getNextMutedMessage)( TidyDocImpl* doc, TidyIterator* iter );
|
||||
|
||||
|
||||
/** @} message_muting group */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup message_keydiscovery Key Discovery
|
||||
**
|
||||
** LibTidy users may want to use `TidyReportCallback` to enable their own
|
||||
** localization lookup features. Because Tidy's report codes are enums the
|
||||
** specific values can change over time. Using these functions provides the
|
||||
** ability for LibTidy users to use LibTidy's enum values as strings for
|
||||
** lookup purposes.
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
/**
|
||||
* This function returns a string representing the enum value name that can
|
||||
* be used as a lookup key independent of changing string values.
|
||||
* `TidyReportCallback` will return this general string as the report
|
||||
* message key.
|
||||
*/
|
||||
ctmbstr TY_(tidyErrorCodeAsKey)(uint code);
|
||||
|
||||
/**
|
||||
* Given an error code string, return the integer value of it, or UINT_MAX
|
||||
* as an error flag.
|
||||
*/
|
||||
uint TY_(tidyErrorCodeFromKey)(ctmbstr code);
|
||||
|
||||
|
||||
/**
|
||||
* Initializes the TidyIterator to point to the first item
|
||||
* in Tidy's list of error codes that can be return with
|
||||
* `TidyReportFilter3`.
|
||||
* Items can be retrieved with getNextErrorCode();
|
||||
*/
|
||||
TidyIterator TY_(getErrorCodeList)(void);
|
||||
|
||||
/**
|
||||
* Returns the next error code having initialized the iterator
|
||||
* with `getErrorCodeList()`. You can use tidyErrorCodeAsKey
|
||||
* to determine the key for this value.
|
||||
*/
|
||||
uint TY_(getNextErrorCode)( TidyIterator* iter );
|
||||
|
||||
|
||||
/** @} message_keydiscovery group */
|
||||
/** @} internal_api addtogroup */
|
||||
|
||||
|
||||
|
||||
/* accessibility flaws */
|
||||
|
||||
#define BA_MISSING_IMAGE_ALT 1
|
||||
#define BA_MISSING_LINK_ALT 2
|
||||
#define BA_MISSING_SUMMARY 4
|
||||
#define BA_MISSING_IMAGE_MAP 8
|
||||
#define BA_USING_FRAMES 16
|
||||
#define BA_USING_NOFRAMES 32
|
||||
#define BA_INVALID_LINK_NOFRAMES 64 /* WAI [6.5.1.4] */
|
||||
#define BA_WAI (1 << 31)
|
||||
|
||||
/* presentation flaws */
|
||||
|
||||
#define USING_SPACER 1
|
||||
#define USING_LAYER 2
|
||||
#define USING_NOBR 4
|
||||
#define USING_FONT 8
|
||||
#define USING_BODY 16
|
||||
|
||||
/* badchar bit field */
|
||||
|
||||
#define BC_VENDOR_SPECIFIC_CHARS 1
|
||||
#define BC_INVALID_SGML_CHARS 2
|
||||
#define BC_INVALID_UTF8 4
|
||||
#define BC_INVALID_UTF16 8
|
||||
#define BC_ENCODING_MISMATCH 16 /* fatal error */
|
||||
#define BC_INVALID_URI 32
|
||||
#define BC_INVALID_NCR 64
|
||||
|
||||
/* other footnote bit field (temporary until formalized) */
|
||||
|
||||
#define FN_TRIM_EMPTY_ELEMENT 1
|
||||
|
||||
/* Lexer and I/O Macros */
|
||||
|
||||
#define REPLACED_CHAR 0
|
||||
#define DISCARDED_CHAR 1
|
||||
|
||||
|
||||
#endif /* __MESSAGE_H__ */
|
685
third_party/tidy/messageobj.c
vendored
Normal file
685
third_party/tidy/messageobj.c
vendored
Normal file
|
@ -0,0 +1,685 @@
|
|||
/* clang-format off */
|
||||
/* messageobj.c
|
||||
* Provides an external, extensible API for message reporting.
|
||||
*
|
||||
* (c) 2017 HTACG
|
||||
* See tidy.h for the copyright notice.
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/messageobj.h"
|
||||
#include "third_party/tidy/message.h"
|
||||
#include "third_party/tidy/tidy-int.h"
|
||||
#include "libc/assert.h"
|
||||
#include "third_party/tidy/tmbstr.h"
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* BuildArgArray Support - declarations and forward declarations
|
||||
*********************************************************************/
|
||||
|
||||
|
||||
/** A record of a single argument and its type. An array these
|
||||
** represents the arguments supplied to a format string, ordered
|
||||
** in the same position as they occur in the format string. Because
|
||||
** older versions of Windows don't support positional arguments,
|
||||
** Tidy doesn't either.
|
||||
*/
|
||||
|
||||
#define FORMAT_LENGTH 21
|
||||
|
||||
struct printfArg {
|
||||
TidyFormatParameterType type; /* type of the argument */
|
||||
int formatStart; /* where the format starts */
|
||||
int formatLength; /* length of the format */
|
||||
char format[FORMAT_LENGTH]; /* buffer for the format */
|
||||
union { /* the argument */
|
||||
int i;
|
||||
uint ui;
|
||||
double d;
|
||||
const char *s;
|
||||
} u;
|
||||
};
|
||||
|
||||
|
||||
/** Returns a pointer to an allocated array of `printfArg` given a format
|
||||
** string and a va_list, or NULL if not successful or no parameters were
|
||||
** given. Parameter `rv` will return with the count of zero or more
|
||||
** parameters if successful, else -1.
|
||||
**
|
||||
*/
|
||||
static struct printfArg *BuildArgArray( TidyDocImpl *doc, ctmbstr fmt, va_list ap, int *rv );
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* Tidy Message Object Support
|
||||
*********************************************************************/
|
||||
|
||||
|
||||
/** Create an internal representation of a Tidy message with all of
|
||||
** the information that that we know about the message.
|
||||
**
|
||||
** The function signature doesn't have to stay static and is a good
|
||||
** place to add instantiation if expanding the API.
|
||||
**
|
||||
** We currently know the doc, node, code, line, column, level, and
|
||||
** args, will pre-calculate all of the other members upon creation.
|
||||
** This ensures that we can use members directly, immediately,
|
||||
** without having to use accessors internally.
|
||||
**
|
||||
** If any message callback filters are setup by API clients, they
|
||||
** will be called here.
|
||||
**
|
||||
** This version serves as the designated initializer and as such
|
||||
** requires every known parameter.
|
||||
*/
|
||||
static TidyMessageImpl *tidyMessageCreateInitV( TidyDocImpl *doc,
|
||||
Node *node,
|
||||
uint code,
|
||||
int line,
|
||||
int column,
|
||||
TidyReportLevel level,
|
||||
va_list args )
|
||||
{
|
||||
TidyMessageImpl *result = TidyDocAlloc(doc, sizeof(TidyMessageImpl));
|
||||
TidyDoc tdoc = tidyImplToDoc(doc);
|
||||
va_list args_copy;
|
||||
enum { sizeMessageBuf=2048 };
|
||||
ctmbstr pattern;
|
||||
uint i = 0;
|
||||
|
||||
|
||||
/* Things we know... */
|
||||
|
||||
result->tidyDoc = doc;
|
||||
result->tidyNode = node;
|
||||
result->code = code;
|
||||
result->line = line;
|
||||
result->column = column;
|
||||
result->level = level;
|
||||
/* Is #719 - set 'muted' before any callbacks. */
|
||||
result->muted = no;
|
||||
i = 0;
|
||||
while ((doc->muted.list) && (doc->muted.list[i] != 0))
|
||||
{
|
||||
if (doc->muted.list[i] == code)
|
||||
{
|
||||
result->muted = yes;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
/* Things we create... */
|
||||
|
||||
va_copy(args_copy, args);
|
||||
result->arguments = BuildArgArray(doc, tidyDefaultString(code), args_copy, &result->argcount);
|
||||
va_end(args_copy);
|
||||
|
||||
result->messageKey = TY_(tidyErrorCodeAsKey)(code);
|
||||
|
||||
result->messageFormatDefault = tidyDefaultString(code);
|
||||
result->messageFormat = tidyLocalizedString(code);
|
||||
|
||||
result->messageDefault = TidyDocAlloc(doc, sizeMessageBuf);
|
||||
va_copy(args_copy, args);
|
||||
TY_(tmbvsnprintf)(result->messageDefault, sizeMessageBuf, result->messageFormatDefault, args_copy);
|
||||
va_end(args_copy);
|
||||
|
||||
result->message = TidyDocAlloc(doc, sizeMessageBuf);
|
||||
va_copy(args_copy, args);
|
||||
TY_(tmbvsnprintf)(result->message, sizeMessageBuf, result->messageFormat, args_copy);
|
||||
va_end(args_copy);
|
||||
|
||||
/* Some things already hit us localized, and some things need to be
|
||||
localized here. Look for these codewords and replace them here.
|
||||
*/
|
||||
TY_(strrep)(result->messageDefault, "STRING_PLAIN_TEXT", tidyDefaultString(STRING_PLAIN_TEXT));
|
||||
TY_(strrep)(result->message, "STRING_PLAIN_TEXT", tidyLocalizedString(STRING_PLAIN_TEXT));
|
||||
|
||||
TY_(strrep)(result->messageDefault, "STRING_XML_DECLARATION", tidyDefaultString(STRING_XML_DECLARATION));
|
||||
TY_(strrep)(result->message, "STRING_XML_DECLARATION", tidyLocalizedString(STRING_XML_DECLARATION));
|
||||
|
||||
TY_(strrep)(result->messageDefault, "STRING_ERROR_COUNT_WARNING", tidyDefaultStringN(STRING_ERROR_COUNT_WARNING, doc->warnings));
|
||||
TY_(strrep)(result->message, "STRING_ERROR_COUNT_WARNING", tidyLocalizedStringN(STRING_ERROR_COUNT_WARNING, doc->warnings));
|
||||
|
||||
TY_(strrep)(result->messageDefault, "STRING_ERROR_COUNT_ERROR", tidyDefaultStringN(STRING_ERROR_COUNT_ERROR, doc->errors));
|
||||
TY_(strrep)(result->message, "STRING_ERROR_COUNT_ERROR", tidyLocalizedStringN(STRING_ERROR_COUNT_ERROR, doc->errors));
|
||||
|
||||
|
||||
result->messagePosDefault = TidyDocAlloc(doc, sizeMessageBuf);
|
||||
result->messagePos = TidyDocAlloc(doc, sizeMessageBuf);
|
||||
|
||||
if ( cfgBool(doc, TidyEmacs) && cfgStr(doc, TidyEmacsFile) )
|
||||
{
|
||||
/* Change formatting to be parsable by GNU Emacs */
|
||||
TY_(tmbsnprintf)(result->messagePosDefault, sizeMessageBuf, "%s:%d:%d: ", cfgStr(doc, TidyEmacsFile), line, column);
|
||||
TY_(tmbsnprintf)(result->messagePos, sizeMessageBuf, "%s:%d:%d: ", cfgStr(doc, TidyEmacsFile), line, column);
|
||||
}
|
||||
else if ( cfgBool(doc, TidyShowFilename) && cfgStr(doc, TidyEmacsFile) )
|
||||
{
|
||||
/* Include filename in output */
|
||||
TY_(tmbsnprintf)(result->messagePosDefault, sizeMessageBuf, tidyDefaultString(FN_LINE_COLUMN_STRING),
|
||||
cfgStr(doc, TidyEmacsFile), line, column);
|
||||
TY_(tmbsnprintf)(result->messagePos, sizeMessageBuf, tidyLocalizedString(FN_LINE_COLUMN_STRING),
|
||||
cfgStr(doc, TidyEmacsFile), line, column);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* traditional format */
|
||||
TY_(tmbsnprintf)(result->messagePosDefault, sizeMessageBuf, tidyDefaultString(LINE_COLUMN_STRING), line, column);
|
||||
TY_(tmbsnprintf)(result->messagePos, sizeMessageBuf, tidyLocalizedString(LINE_COLUMN_STRING), line, column);
|
||||
}
|
||||
|
||||
result->messagePrefixDefault = tidyDefaultString(level);
|
||||
|
||||
result->messagePrefix = tidyLocalizedString(level);
|
||||
|
||||
if ( line > 0 && column > 0 )
|
||||
pattern = "%s%s%s"; /* pattern if there's location information */
|
||||
else
|
||||
pattern = "%.0s%s%s"; /* otherwise if there isn't */
|
||||
|
||||
if ( level > TidyFatal )
|
||||
pattern = "%.0s%.0s%s"; /* dialog doesn't have pos or prefix */
|
||||
|
||||
result->messageOutputDefault = TidyDocAlloc(doc, sizeMessageBuf);
|
||||
TY_(tmbsnprintf)(result->messageOutputDefault, sizeMessageBuf, pattern,
|
||||
result->messagePosDefault, result->messagePrefixDefault,
|
||||
result->messageDefault);
|
||||
|
||||
result->messageOutput = TidyDocAlloc(doc, sizeMessageBuf);
|
||||
TY_(tmbsnprintf)(result->messageOutput, sizeMessageBuf, pattern,
|
||||
result->messagePos, result->messagePrefix,
|
||||
result->message);
|
||||
|
||||
if ( ( cfgBool(doc, TidyMuteShow) == yes ) && level <= TidyFatal )
|
||||
{
|
||||
/*\ Issue #655 - Unsafe to use output buffer as one of the va_list
|
||||
* input parameters in some snprintf implementations.
|
||||
\*/
|
||||
ctmbstr pc = TY_(tidyErrorCodeAsKey)(code);
|
||||
i = TY_(tmbstrlen)(result->messageOutputDefault);
|
||||
if (i < sizeMessageBuf)
|
||||
TY_(tmbsnprintf)(result->messageOutputDefault + i, sizeMessageBuf - i, " (%s)", pc );
|
||||
i = TY_(tmbstrlen)(result->messageOutput);
|
||||
if (i < sizeMessageBuf)
|
||||
TY_(tmbsnprintf)(result->messageOutput + i, sizeMessageBuf - i, " (%s)", pc );
|
||||
}
|
||||
|
||||
result->allowMessage = yes;
|
||||
|
||||
/* reportFilter is a simple error filter that provides minimal information
|
||||
to callback functions, and includes the message buffer in LibTidy's
|
||||
configured localization. As it's a "legacy" API, it does not receive
|
||||
TidyDialogue messages.*/
|
||||
if ( (result->level <= TidyFatal) && doc->reportFilter )
|
||||
{
|
||||
result->allowMessage = result->allowMessage & doc->reportFilter( tdoc, result->level, result->line, result->column, result->messageOutput );
|
||||
}
|
||||
|
||||
/* reportCallback is intended to allow LibTidy users to localize messages
|
||||
via their own means by providing a key and the parameters to fill it.
|
||||
As it's a "legacy" API, it does not receive TidyDialogue messages. */
|
||||
if ( (result->level <= TidyFatal) && doc->reportCallback )
|
||||
{
|
||||
TidyDoc tdoc = tidyImplToDoc( doc );
|
||||
va_copy(args_copy, args);
|
||||
result->allowMessage = result->allowMessage & doc->reportCallback( tdoc, result->level, result->line, result->column, result->messageKey, args_copy );
|
||||
va_end(args_copy);
|
||||
}
|
||||
|
||||
/* messageCallback is the newest interface to interrogate Tidy's
|
||||
emitted messages. */
|
||||
if ( doc->messageCallback )
|
||||
{
|
||||
result->allowMessage = result->allowMessage & doc->messageCallback( tidyImplToMessage(result) );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
TidyMessageImpl *TY_(tidyMessageCreate)( TidyDocImpl *doc,
|
||||
uint code,
|
||||
TidyReportLevel level,
|
||||
... )
|
||||
{
|
||||
TidyMessageImpl *result;
|
||||
va_list args;
|
||||
va_start(args, level);
|
||||
result = tidyMessageCreateInitV(doc, NULL, code, 0, 0, level, args);
|
||||
va_end(args);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
TidyMessageImpl *TY_(tidyMessageCreateWithNode)( TidyDocImpl *doc,
|
||||
Node *node,
|
||||
uint code,
|
||||
TidyReportLevel level,
|
||||
... )
|
||||
{
|
||||
TidyMessageImpl *result;
|
||||
va_list args_copy;
|
||||
int line = ( node ? node->line :
|
||||
( doc->lexer ? doc->lexer->lines : 0 ) );
|
||||
int col = ( node ? node->column :
|
||||
( doc->lexer ? doc->lexer->columns : 0 ) );
|
||||
|
||||
va_start(args_copy, level);
|
||||
result = tidyMessageCreateInitV(doc, node, code, line, col, level, args_copy);
|
||||
va_end(args_copy);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
TidyMessageImpl *TY_(tidyMessageCreateWithLexer)( TidyDocImpl *doc,
|
||||
uint code,
|
||||
TidyReportLevel level,
|
||||
... )
|
||||
{
|
||||
TidyMessageImpl *result;
|
||||
va_list args_copy;
|
||||
int line = ( doc->lexer ? doc->lexer->lines : 0 );
|
||||
int col = ( doc->lexer ? doc->lexer->columns : 0 );
|
||||
|
||||
va_start(args_copy, level);
|
||||
result = tidyMessageCreateInitV(doc, NULL, code, line, col, level, args_copy);
|
||||
va_end(args_copy);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void TY_(tidyMessageRelease)( TidyMessageImpl *message )
|
||||
{
|
||||
if ( !message )
|
||||
return;
|
||||
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->arguments );
|
||||
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messageDefault );
|
||||
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->message );
|
||||
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messagePosDefault );
|
||||
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messagePos );
|
||||
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messageOutputDefault );
|
||||
TidyDocFree( tidyDocToImpl(message->tidyDoc), message->messageOutput );
|
||||
TidyDocFree(tidyDocToImpl(message->tidyDoc), message); /* Issue #597 - and discard the message structure */
|
||||
}
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* Modern Message Callback Functions
|
||||
*********************************************************************/
|
||||
|
||||
|
||||
TidyDocImpl* TY_(getMessageDoc)( TidyMessageImpl message )
|
||||
{
|
||||
return message.tidyDoc;
|
||||
}
|
||||
|
||||
uint TY_(getMessageCode)( TidyMessageImpl message )
|
||||
{
|
||||
return message.code;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessageKey)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messageKey;
|
||||
}
|
||||
|
||||
int TY_(getMessageLine)( TidyMessageImpl message )
|
||||
{
|
||||
return message.line;
|
||||
}
|
||||
|
||||
int TY_(getMessageColumn)( TidyMessageImpl message )
|
||||
{
|
||||
return message.column;
|
||||
}
|
||||
|
||||
TidyReportLevel TY_(getMessageLevel)( TidyMessageImpl message )
|
||||
{
|
||||
return message.level;
|
||||
}
|
||||
|
||||
Bool TY_(getMessageIsMuted)( TidyMessageImpl message )
|
||||
{
|
||||
return message.muted;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessageFormatDefault)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messageFormatDefault;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessageFormat)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messageFormat;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessageDefault)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messageDefault;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessage)( TidyMessageImpl message )
|
||||
{
|
||||
return message.message;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessagePosDefault)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messagePosDefault;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessagePos)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messagePos;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessagePrefixDefault)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messagePrefixDefault;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessagePrefix)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messagePrefix;
|
||||
}
|
||||
|
||||
|
||||
ctmbstr TY_(getMessageOutputDefault)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messageOutputDefault;
|
||||
}
|
||||
|
||||
ctmbstr TY_(getMessageOutput)( TidyMessageImpl message )
|
||||
{
|
||||
return message.messageOutput;
|
||||
}
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* Message Argument Interrogation
|
||||
*********************************************************************/
|
||||
|
||||
|
||||
TidyIterator TY_(getMessageArguments)( TidyMessageImpl message )
|
||||
{
|
||||
if (message.argcount > 0)
|
||||
return (TidyIterator) (size_t)1;
|
||||
else
|
||||
return (TidyIterator) (size_t)0;
|
||||
}
|
||||
|
||||
TidyMessageArgument TY_(getNextMessageArgument)( TidyMessageImpl message, TidyIterator* iter )
|
||||
{
|
||||
size_t item = 0;
|
||||
size_t itemIndex;
|
||||
assert( iter != NULL );
|
||||
|
||||
itemIndex = (size_t)*iter;
|
||||
|
||||
if ( itemIndex >= 1 && itemIndex <= (size_t)message.argcount )
|
||||
{
|
||||
item = itemIndex;
|
||||
itemIndex++;
|
||||
}
|
||||
|
||||
/* Just as TidyIterator is really just a dumb, one-based index, the
|
||||
TidyMessageArgument is really just a dumb, zero-based index; however
|
||||
this type of iterator and opaque interrogation is simply how Tidy
|
||||
does things. */
|
||||
*iter = (TidyIterator)( itemIndex <= (size_t)message.argcount ? itemIndex : (size_t)0 );
|
||||
return (TidyMessageArgument)item;
|
||||
}
|
||||
|
||||
|
||||
TidyFormatParameterType TY_(getArgType)( TidyMessageImpl message, TidyMessageArgument* arg )
|
||||
{
|
||||
int argNum = (int)(size_t)*arg - 1;
|
||||
assert( argNum <= message.argcount );
|
||||
|
||||
return message.arguments[argNum].type;
|
||||
}
|
||||
|
||||
|
||||
ctmbstr TY_(getArgFormat)( TidyMessageImpl message, TidyMessageArgument* arg )
|
||||
{
|
||||
int argNum = (int)(size_t)*arg - 1;
|
||||
assert( argNum <= message.argcount );
|
||||
|
||||
return message.arguments[argNum].format;
|
||||
}
|
||||
|
||||
|
||||
ctmbstr TY_(getArgValueString)( TidyMessageImpl message, TidyMessageArgument* arg )
|
||||
{
|
||||
int argNum = (int)(size_t)*arg - 1;
|
||||
assert( argNum <= message.argcount );
|
||||
assert( message.arguments[argNum].type == tidyFormatType_STRING);
|
||||
|
||||
return message.arguments[argNum].u.s;
|
||||
}
|
||||
|
||||
|
||||
uint TY_(getArgValueUInt)( TidyMessageImpl message, TidyMessageArgument* arg )
|
||||
{
|
||||
int argNum = (int)(size_t)*arg - 1;
|
||||
assert( argNum <= message.argcount );
|
||||
assert( message.arguments[argNum].type == tidyFormatType_UINT);
|
||||
|
||||
return message.arguments[argNum].u.ui;
|
||||
}
|
||||
|
||||
|
||||
int TY_(getArgValueInt)( TidyMessageImpl message, TidyMessageArgument* arg )
|
||||
{
|
||||
int argNum = (int)(size_t)*arg - 1;
|
||||
assert( argNum <= message.argcount );
|
||||
assert( message.arguments[argNum].type == tidyFormatType_INT);
|
||||
|
||||
return message.arguments[argNum].u.i;
|
||||
}
|
||||
|
||||
|
||||
double TY_(getArgValueDouble)( TidyMessageImpl message, TidyMessageArgument* arg )
|
||||
{
|
||||
int argNum = (int)(size_t)*arg - 1;
|
||||
assert( argNum <= message.argcount );
|
||||
assert( message.arguments[argNum].type == tidyFormatType_DOUBLE);
|
||||
|
||||
return message.arguments[argNum].u.d;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* BuildArgArray support
|
||||
* Adapted loosely from Mozilla `prprf.c`, Mozilla Public License:
|
||||
* - https://www.mozilla.org/en-US/MPL/2.0/
|
||||
*********************************************************************/
|
||||
|
||||
|
||||
/** Returns a pointer to an allocated array of `printfArg` given a format
|
||||
** string and a va_list, or NULL if not successful or no parameters were
|
||||
** given. Parameter `rv` will return with the count of zero or more
|
||||
** parameters if successful, else -1.
|
||||
**
|
||||
** We'll also be sure to use the document's allocator if specified, thus
|
||||
** the requirement to pass in a TidyDocImpl.
|
||||
**
|
||||
** Currently Tidy only uses %c, %d, %s, %u, %X, although doubles are
|
||||
** supported as well. Unsupported arguments will result in failure as
|
||||
** described above.
|
||||
*/
|
||||
static struct printfArg* BuildArgArray( TidyDocImpl *doc, ctmbstr fmt, va_list ap, int* rv )
|
||||
{
|
||||
int number = 0; /* the quantity of valid arguments found; returned as rv. */
|
||||
int cn = -1; /* keeps track of which parameter index is current. */
|
||||
int i = 0; /* typical index. */
|
||||
int pos = -1; /* starting position of current argument. */
|
||||
const char* p; /* current position in format string. */
|
||||
char c; /* current character. */
|
||||
struct printfArg* nas;
|
||||
|
||||
/* first pass: determine number of valid % to allocate space. */
|
||||
|
||||
p = fmt;
|
||||
*rv = 0;
|
||||
|
||||
while( ( c = *p++ ) != 0 )
|
||||
{
|
||||
if( c != '%' )
|
||||
continue;
|
||||
|
||||
if( ( c = *p++ ) == '%' ) /* skip %% case */
|
||||
continue;
|
||||
else
|
||||
number++;
|
||||
}
|
||||
|
||||
|
||||
if( number == 0 )
|
||||
return NULL;
|
||||
|
||||
|
||||
nas = (struct printfArg*)TidyDocAlloc( doc, number * sizeof( struct printfArg ) );
|
||||
if( !nas )
|
||||
{
|
||||
*rv = -1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
for( i = 0; i < number; i++ )
|
||||
{
|
||||
nas[i].type = tidyFormatType_UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
/* second pass: set nas[].type and location. */
|
||||
|
||||
p = fmt;
|
||||
while( ( c = *p++ ) != 0 )
|
||||
{
|
||||
if( c != '%' )
|
||||
continue;
|
||||
|
||||
if( ( c = *p++ ) == '%' )
|
||||
continue; /* skip %% case */
|
||||
|
||||
pos = p - fmt - 2; /* p already incremented twice */
|
||||
|
||||
/* width -- width via parameter */
|
||||
if (c == '*')
|
||||
{
|
||||
/* not supported feature */
|
||||
*rv = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* width field -- skip */
|
||||
while ((c >= '0') && (c <= '9'))
|
||||
{
|
||||
c = *p++;
|
||||
}
|
||||
|
||||
/* precision */
|
||||
if (c == '.')
|
||||
{
|
||||
c = *p++;
|
||||
if (c == '*') {
|
||||
/* not supported feature */
|
||||
*rv = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
while ((c >= '0') && (c <= '9'))
|
||||
{
|
||||
c = *p++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cn++;
|
||||
|
||||
/* size and format */
|
||||
nas[cn].type = tidyFormatType_UINT;
|
||||
switch (c)
|
||||
{
|
||||
case 'c': /* unsigned int (char) */
|
||||
case 'u': /* unsigned int */
|
||||
case 'X': /* unsigned int as hex */
|
||||
case 'x': /* unsigned int as hex */
|
||||
case 'o': /* octal */
|
||||
nas[cn].u.ui = va_arg( ap, unsigned int );
|
||||
break;
|
||||
|
||||
case 'd': /* signed int */
|
||||
case 'i': /* signed int */
|
||||
nas[cn].type = tidyFormatType_INT;
|
||||
nas[cn].u.i = va_arg( ap, int );
|
||||
break;
|
||||
|
||||
|
||||
case 's': /* string */
|
||||
nas[cn].type = tidyFormatType_STRING;
|
||||
nas[cn].u.s = va_arg( ap, char* );
|
||||
break;
|
||||
|
||||
case 'e': /* double */
|
||||
case 'E': /* double */
|
||||
case 'f': /* double */
|
||||
case 'F': /* double */
|
||||
case 'g': /* double */
|
||||
case 'G': /* double */
|
||||
nas[cn].type = tidyFormatType_DOUBLE;
|
||||
nas[cn].u.d = va_arg( ap, double );
|
||||
break;
|
||||
|
||||
default:
|
||||
nas[cn].type = tidyFormatType_UNKNOWN;
|
||||
*rv = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* position and format */
|
||||
nas[cn].formatStart = pos;
|
||||
nas[cn].formatLength = (p - fmt) - pos;
|
||||
|
||||
/* the format string exceeds the buffer length */
|
||||
if ( nas[cn].formatLength >= FORMAT_LENGTH )
|
||||
{
|
||||
*rv = -1;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
strncpy(nas[cn].format, fmt + nas[cn].formatStart, nas[cn].formatLength);
|
||||
nas[cn].format[nas[cn].formatLength] = 0; /* Is. #800 - If count <= srcLen, no 0 added! */
|
||||
}
|
||||
|
||||
|
||||
/* Something's not right. */
|
||||
if( nas[cn].type == tidyFormatType_UNKNOWN )
|
||||
{
|
||||
*rv = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* third pass: fill the nas[cn].ap */
|
||||
|
||||
if( *rv < 0 )
|
||||
{
|
||||
TidyDocFree( doc, nas );;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*rv = number;
|
||||
return nas;
|
||||
}
|
||||
|
182
third_party/tidy/messageobj.h
vendored
Normal file
182
third_party/tidy/messageobj.h
vendored
Normal file
|
@ -0,0 +1,182 @@
|
|||
#ifndef messageobj_h
|
||||
#define messageobj_h
|
||||
/* clang-format off */
|
||||
|
||||
/**************************************************************************//**
|
||||
* @file
|
||||
* Provides an external, extensible API for message reporting.
|
||||
*
|
||||
* This module implements the `_TidyMessageImpl` structure (declared in
|
||||
* `tidy-int.h`) in order to abstract the reporting of reports and dialogue
|
||||
* from the rest of Tidy, and to enable a robust and extensible API for
|
||||
* message interrogation by LibTidy users.
|
||||
*
|
||||
* @author Jim Derry
|
||||
* @copyright Copyright (c) 2017 HTACG. See tidy.h for license.
|
||||
* @date Created 2017-March-10
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
/** @addtogroup internal_api */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/** @defgroup messageobj_instantiation Message Creation and Releasing */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/** Creates a TidyMessageImpl, but without line numbers, such as used for
|
||||
** information report output.
|
||||
*/
|
||||
TidyMessageImpl *TY_(tidyMessageCreate)( TidyDocImpl *doc,
|
||||
uint code,
|
||||
TidyReportLevel level,
|
||||
... );
|
||||
|
||||
/** Creates a TidyMessageImpl, using the line and column from the provided
|
||||
** Node as the message position source.
|
||||
*/
|
||||
TidyMessageImpl *TY_(tidyMessageCreateWithNode)( TidyDocImpl *doc,
|
||||
Node *node,
|
||||
uint code,
|
||||
TidyReportLevel level,
|
||||
... );
|
||||
|
||||
/** Creates a TidyMessageImpl, using the line and column from the provided
|
||||
** document's Lexer as the message position source.
|
||||
*/
|
||||
TidyMessageImpl *TY_(tidyMessageCreateWithLexer)( TidyDocImpl *doc,
|
||||
uint code,
|
||||
TidyReportLevel level,
|
||||
... );
|
||||
|
||||
/** Deallocates a TidyMessageImpl in order to free up its allocated memory
|
||||
** when you're done using it.
|
||||
*/
|
||||
void TY_(tidyMessageRelease)( TidyMessageImpl *message );
|
||||
|
||||
|
||||
/** @} end messageobj_instantiation group */
|
||||
/** @defgroup messageobj_message_api Report and Dialogue API */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/** get the document the message came from. */
|
||||
TidyDocImpl* TY_(getMessageDoc)( TidyMessageImpl message );
|
||||
|
||||
/** get the message key code. */
|
||||
uint TY_(getMessageCode)( TidyMessageImpl message );
|
||||
|
||||
/** get the message key string. */
|
||||
ctmbstr TY_(getMessageKey)( TidyMessageImpl message );
|
||||
|
||||
/** get the line number the message applies to. */
|
||||
int TY_(getMessageLine)( TidyMessageImpl message );
|
||||
|
||||
/** get the column the message applies to. */
|
||||
int TY_(getMessageColumn)( TidyMessageImpl message );
|
||||
|
||||
/** get the TidyReportLevel of the message. */
|
||||
TidyReportLevel TY_(getMessageLevel)( TidyMessageImpl message );
|
||||
|
||||
/** get whether or not the message was muted by the configuration. */
|
||||
Bool TY_(getMessageIsMuted)( TidyMessageImpl message );
|
||||
|
||||
/** the built-in format string */
|
||||
ctmbstr TY_(getMessageFormatDefault)( TidyMessageImpl message );
|
||||
|
||||
/** the localized format string */
|
||||
ctmbstr TY_(getMessageFormat)( TidyMessageImpl message );
|
||||
|
||||
/** the message, formatted, default language */
|
||||
ctmbstr TY_(getMessageDefault)( TidyMessageImpl message );
|
||||
|
||||
/** the message, formatted, localized */
|
||||
ctmbstr TY_(getMessage)( TidyMessageImpl message );
|
||||
|
||||
/** the position part, default language */
|
||||
ctmbstr TY_(getMessagePosDefault)( TidyMessageImpl message );
|
||||
|
||||
/** the position part, localized */
|
||||
ctmbstr TY_(getMessagePos)( TidyMessageImpl message );
|
||||
|
||||
/** the prefix part, default language */
|
||||
ctmbstr TY_(getMessagePrefixDefault)( TidyMessageImpl message );
|
||||
|
||||
/** the prefix part, localized */
|
||||
ctmbstr TY_(getMessagePrefix)( TidyMessageImpl message );
|
||||
|
||||
/** the complete message, as would be output in the CLI */
|
||||
ctmbstr TY_(getMessageOutputDefault)( TidyMessageImpl message );
|
||||
|
||||
/* the complete message, as would be output in the CLI, localized */
|
||||
ctmbstr TY_(getMessageOutput)( TidyMessageImpl message );
|
||||
|
||||
|
||||
/** @} end messageobj_message_api group */
|
||||
/** @defgroup messageobj_args_api Report Arguments Interrogation API */
|
||||
/** @{ */
|
||||
|
||||
/**
|
||||
* Initializes the TidyIterator to point to the first item in the message's
|
||||
* argument. Use `TY_(getNextMEssageArgument)` to get an opaque instance of
|
||||
* `TidyMessageArgument` for which the subsequent interrogators will be of use.
|
||||
*/
|
||||
TidyIterator TY_(getMessageArguments)( TidyMessageImpl message );
|
||||
|
||||
/**
|
||||
* Returns the next `TidyMessageArgument`, for the given message, which can
|
||||
* then be interrogated with the API, and advances the iterator.
|
||||
*/
|
||||
TidyMessageArgument TY_(getNextMessageArgument)( TidyMessageImpl message, TidyIterator* iter );
|
||||
|
||||
|
||||
/**
|
||||
* Returns the `TidyFormatParameterType` of the given message argument.
|
||||
*/
|
||||
TidyFormatParameterType TY_(getArgType)( TidyMessageImpl message, TidyMessageArgument* arg );
|
||||
|
||||
|
||||
/**
|
||||
* Returns the format specifier of the given message argument. The memory for
|
||||
* this string is cleared upon termination of the callback, so do be sure to
|
||||
* make your own copy.
|
||||
*/
|
||||
ctmbstr TY_(getArgFormat)( TidyMessageImpl message, TidyMessageArgument* arg );
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string value of the given message argument. An assertion
|
||||
* will be generated if the argument type is not a string.
|
||||
*/
|
||||
ctmbstr TY_(getArgValueString)( TidyMessageImpl message, TidyMessageArgument* arg );
|
||||
|
||||
|
||||
/**
|
||||
* Returns the unsigned integer value of the given message argument. An
|
||||
* assertion will be generated if the argument type is not an unsigned
|
||||
* integer.
|
||||
*/
|
||||
uint TY_(getArgValueUInt)( TidyMessageImpl message, TidyMessageArgument* arg );
|
||||
|
||||
|
||||
/**
|
||||
* Returns the integer value of the given message argument. An assertion
|
||||
* will be generated if the argument type is not an integer.
|
||||
*/
|
||||
int TY_(getArgValueInt)( TidyMessageImpl message, TidyMessageArgument* arg );
|
||||
|
||||
|
||||
/**
|
||||
* Returns the double value of the given message argument. An assertion
|
||||
* will be generated if the argument type is not a double.
|
||||
*/
|
||||
double TY_(getArgValueDouble)( TidyMessageImpl message, TidyMessageArgument* arg );
|
||||
|
||||
|
||||
/** @} end messageobj_args_api group */
|
||||
/** @} end internal_api group */
|
||||
|
||||
#endif /* messageobj_h */
|
6452
third_party/tidy/parser.c
vendored
Normal file
6452
third_party/tidy/parser.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
289
third_party/tidy/parser.h
vendored
Normal file
289
third_party/tidy/parser.h
vendored
Normal file
|
@ -0,0 +1,289 @@
|
|||
#ifndef __PARSER_H__
|
||||
#define __PARSER_H__
|
||||
/* clang-format off */
|
||||
|
||||
/**************************************************************************//**
|
||||
* @file
|
||||
* HTML and XML Parsers.
|
||||
*
|
||||
* Tidy's HTML parser corrects many conditions and enforces certain user
|
||||
* preferences during the parsing process. The XML parser produces a tree
|
||||
* of nodes useful to Tidy but also suitable for use in other XML processing
|
||||
* applications.
|
||||
*
|
||||
* @author HTACG, et al (consult git log)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
|
||||
* Institute of Technology, European Research Consortium for Informatics
|
||||
* and Mathematics, Keio University) and HTACG.
|
||||
* @par
|
||||
* All Rights Reserved.
|
||||
* @par
|
||||
* See `tidy.h` for the complete license.
|
||||
*
|
||||
* @date Additional updates: consult git log
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "third_party/tidy/tags.h"
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
/** @addtogroup internal_api */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup parser_h HTML and XML Parsing
|
||||
**
|
||||
** These functions and structures form the internal API for document
|
||||
** parsing.
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* This typedef represents the state of a parser when it enters and exits.
|
||||
* When the parser needs to finish work on the way back up the stack, it will
|
||||
* push one of these records to the stack, and it will pop a record from the
|
||||
* stack upon re-entry.
|
||||
*/
|
||||
typedef struct _TidyParserMemory
|
||||
{
|
||||
Parser *identity; /**< Which parser pushed this record? */
|
||||
Node *original_node; /**< Originally provided node at entry. */
|
||||
Node *reentry_node; /**< The node with which to re-enter. */
|
||||
GetTokenMode reentry_mode; /**< The token mode to use when re-entering. */
|
||||
int reentry_state; /**< State to set during re-entry. Defined locally in each parser. */
|
||||
GetTokenMode mode; /**< The caller will peek at this value to get the correct mode. */
|
||||
int register_1; /**< Local variable storage. */
|
||||
int register_2; /**< Local variable storage. */
|
||||
} TidyParserMemory;
|
||||
|
||||
|
||||
/**
|
||||
* This typedef represents a stack of parserState. The Tidy document has its
|
||||
* own instance of this.
|
||||
*/
|
||||
typedef struct _TidyParserStack
|
||||
{
|
||||
TidyParserMemory* content; /**< A state record. */
|
||||
uint size; /**< Current size of the stack. */
|
||||
int top; /**< Top of the stack. */
|
||||
} TidyParserStack;
|
||||
|
||||
|
||||
/**
|
||||
* Allocates and initializes the parser's stack. TidyCreate will perform
|
||||
* this automatically.
|
||||
*/
|
||||
void TY_(InitParserStack)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Frees the parser's stack when done. TidyRelease will perform this
|
||||
* automatically.
|
||||
*/
|
||||
void TY_(FreeParserStack)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Indicates whether or not the stack is empty.
|
||||
*/
|
||||
Bool TY_(isEmptyParserStack)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Peek at the parser memory.
|
||||
*/
|
||||
TidyParserMemory TY_(peekMemory)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Peek at the parser memory "identity" field. This is just a convenience
|
||||
* to avoid having to create a new struct instance in the caller.
|
||||
*/
|
||||
Parser* TY_(peekMemoryIdentity)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Peek at the parser memory "mode" field. This is just a convenience
|
||||
* to avoid having to create a new struct instance in the caller.
|
||||
*/
|
||||
GetTokenMode TY_(peekMemoryMode)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Pop out a parser memory.
|
||||
*/
|
||||
TidyParserMemory TY_(popMemory)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Push the parser memory to the stack.
|
||||
*/
|
||||
void TY_(pushMemory)( TidyDocImpl* doc, TidyParserMemory data );
|
||||
|
||||
|
||||
/**
|
||||
* Is used to perform a node integrity check recursively after parsing
|
||||
* an HTML or XML document.
|
||||
* @note Actual performance of this check can be disabled by defining the
|
||||
* macro NO_NODE_INTEGRITY_CHECK.
|
||||
* @param node The root node for the integrity check.
|
||||
* @returns Returns yes or no indicating integrity of the node structure.
|
||||
*/
|
||||
Bool TY_(CheckNodeIntegrity)(Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Indicates whether or not a text node ends with a space or newline.
|
||||
* @note Implementation of this method is found in `pprint.c` for
|
||||
* some reason.
|
||||
* @param lexer A reference to the lexer used to lex the document.
|
||||
* @param node The node to check.
|
||||
* @returns The result of the check.
|
||||
*/
|
||||
Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
|
||||
|
||||
|
||||
/**
|
||||
* Used to check if a node uses CM_NEW, which determines how attributes
|
||||
* without values should be printed. This was introduced to deal with
|
||||
* user-defined tags e.g. ColdFusion.
|
||||
* @param node The node to check.
|
||||
* @returns The result of the check.
|
||||
*/
|
||||
Bool TY_(IsNewNode)(Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Transforms a given node to another element, for example, from a `p`
|
||||
* to a `br`.
|
||||
* @param doc The document which the node belongs to.
|
||||
* @param node The node to coerce.
|
||||
* @param tid The tag type to coerce the node into.
|
||||
* @param obsolete If the old node was obsolete, a report will be generated.
|
||||
* @param expected If the old node was not expected to be found in this
|
||||
* particular location, a report will be generated.
|
||||
*/
|
||||
void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool expected);
|
||||
|
||||
|
||||
/**
|
||||
* Extract a node and its children from a markup tree.
|
||||
* @param node The node to remove.
|
||||
* @returns Returns the removed node.
|
||||
*/
|
||||
Node *TY_(RemoveNode)(Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Remove node from markup tree and discard it.
|
||||
* @param doc The Tidy document from which to discard the node.
|
||||
* @param element The node to discard.
|
||||
* @returns Returns the next node.
|
||||
*/
|
||||
Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element);
|
||||
|
||||
|
||||
/**
|
||||
* Insert node into markup tree as the first element of content of element.
|
||||
* @param element The new destination node.
|
||||
* @param node The node to insert.
|
||||
*/
|
||||
void TY_(InsertNodeAtStart)(Node *element, Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Insert node into markup tree as the last element of content of element.
|
||||
* @param element The new destination node.
|
||||
* @param node The node to insert.
|
||||
*/
|
||||
void TY_(InsertNodeAtEnd)(Node *element, Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Insert node into markup tree before element.
|
||||
* @param element The node before which the node is inserted.
|
||||
* @param node The node to insert.
|
||||
*/
|
||||
void TY_(InsertNodeBeforeElement)(Node *element, Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Insert node into markup tree after element.
|
||||
* @param element The node after which the node is inserted.
|
||||
* @param node The node to insert.
|
||||
*/
|
||||
void TY_(InsertNodeAfterElement)(Node *element, Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Trims a single, empty element, returning the next node.
|
||||
* @param doc The Tidy document.
|
||||
* @param element The element to trim.
|
||||
* @returns Returns the next node.
|
||||
*/
|
||||
Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element );
|
||||
|
||||
|
||||
/**
|
||||
* Trims a tree of empty elements recursively, returning the next node.
|
||||
* @param doc The Tidy document.
|
||||
* @param node The element to trim.
|
||||
* @returns Returns the next node.
|
||||
*/
|
||||
Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node);
|
||||
|
||||
|
||||
/**
|
||||
* Indicates whether or not a text node is blank, meaning that it consists
|
||||
* of nothing, or a single space.
|
||||
* @param lexer The lexer used to lex the document.
|
||||
* @param node The node to test.
|
||||
* @returns Returns the result of the test.
|
||||
*/
|
||||
Bool TY_(IsBlank)(Lexer *lexer, Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Indicates whether or not a node is declared as containing javascript
|
||||
* code.
|
||||
* @param node The node to test.
|
||||
* @returns Returns the result of the test.
|
||||
*/
|
||||
Bool TY_(IsJavaScript)(Node *node);
|
||||
|
||||
|
||||
/**
|
||||
* Parses a document after lexing using the HTML parser. It begins by properly
|
||||
* configuring the overall HTML structure, and subsequently processes all
|
||||
* remaining nodes. HTML is the root node.
|
||||
* @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(ParseDocument)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Indicates whether or not whitespace is to be preserved in XHTML/XML
|
||||
* documents.
|
||||
* @param doc The Tidy document.
|
||||
* @param element The node to test.
|
||||
* @returns Returns the result of the test.
|
||||
*/
|
||||
Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element );
|
||||
|
||||
|
||||
/**
|
||||
* Parses a document after lexing using the XML parser.
|
||||
* @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(ParseXMLDocument)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** @} end parser_h group */
|
||||
/** @} end internal_api group */
|
||||
|
||||
#endif /* __PARSER_H__ */
|
2713
third_party/tidy/pprint.c
vendored
Normal file
2713
third_party/tidy/pprint.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
144
third_party/tidy/pprint.h
vendored
Normal file
144
third_party/tidy/pprint.h
vendored
Normal file
|
@ -0,0 +1,144 @@
|
|||
#ifndef __PPRINT_H__
|
||||
#define __PPRINT_H__
|
||||
/* clang-format off */
|
||||
|
||||
/**************************************************************************//**
|
||||
* @file
|
||||
* Pretty Print the parse tree.
|
||||
*
|
||||
* Pretty printer for HTML and XML documents.
|
||||
* - Block-level and unknown elements are printed on new lines and
|
||||
* their contents indented with a user configured amount of spaces/tabs.
|
||||
* - Inline elements are printed inline.
|
||||
* - Inline content is wrapped on spaces (except in attribute values or
|
||||
* preformatted text, after start tags and before end tags.
|
||||
*
|
||||
* @author HTACG, et al (consult git log)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (c) 1998-2021 World Wide Web Consortium (Massachusetts
|
||||
* Institute of Technology, European Research Consortium for Informatics
|
||||
* and Mathematics, Keio University) and HTACG.
|
||||
* @par
|
||||
* All Rights Reserved.
|
||||
* @par
|
||||
* See `tidy.h` for the complete license.
|
||||
*
|
||||
* @date Additional updates: consult git log
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
|
||||
/** @addtogroup internal_api */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup print_h HTML and XML Pretty Printing
|
||||
**
|
||||
** These functions and structures form the internal API for document
|
||||
** printing.
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* This typedef represents the current pretty-printing mode, and instructs
|
||||
* the printer behavior per the content currently being output.
|
||||
*/
|
||||
typedef enum {
|
||||
NORMAL = 0u, /**< Normal output. */
|
||||
PREFORMATTED = 1u, /**< Preformatted output. */
|
||||
COMMENT = 2u, /**< Comment. */
|
||||
ATTRIBVALUE = 4u, /**< An attribute's value. */
|
||||
NOWRAP = 8u, /**< Content that should not be wrapped. */
|
||||
CDATA = 16u /**< CDATA content. */
|
||||
} PrettyPrintMode;
|
||||
|
||||
|
||||
/**
|
||||
* A record of the state of a single line, capturing the indent
|
||||
* level, in-attribute, and in-string state of a line. Instances
|
||||
* of this record are used by the pretty-printing buffer.
|
||||
*
|
||||
* The pretty printer keeps at most two lines of text in the
|
||||
* buffer before flushing output. We need to capture the
|
||||
* indent state (indent level) at the _beginning_ of _each_
|
||||
* line, not the end of just the second line.
|
||||
*
|
||||
* We must also keep track "In Attribute" and "In String"
|
||||
* states at the _end_ of each line,
|
||||
*/
|
||||
typedef struct _TidyIndent
|
||||
{
|
||||
int spaces; /**< Indent level of the line. */
|
||||
int attrValStart; /**< Attribute in-value state. */
|
||||
int attrStringStart; /**< Attribute in-string state. */
|
||||
} TidyIndent;
|
||||
|
||||
|
||||
/**
|
||||
* The pretty-printing buffer.
|
||||
*/
|
||||
typedef struct _TidyPrintImpl
|
||||
{
|
||||
TidyAllocator *allocator; /**< Allocator */
|
||||
|
||||
uint *linebuf; /**< The line buffer proper. */
|
||||
uint lbufsize; /**< Current size of the buffer. */
|
||||
uint linelen; /**< Current line length. */
|
||||
uint wraphere; /**< Point in the line to wrap text. */
|
||||
uint line; /**< Current line. */
|
||||
|
||||
uint ixInd; /**< Index into the indent[] array. */
|
||||
TidyIndent indent[2]; /**< Two lines worth of indent state */
|
||||
} TidyPrintImpl;
|
||||
|
||||
|
||||
/**
|
||||
* Allocates and initializes the pretty-printing buffer for a Tidy document.
|
||||
*/
|
||||
void TY_(InitPrintBuf)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Deallocates and free a Tidy document's pretty-printing buffer.
|
||||
*/
|
||||
void TY_(FreePrintBuf)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Flushes the current buffer to the actual output sink.
|
||||
*/
|
||||
void TY_(PFlushLine)( TidyDocImpl* doc, uint indent );
|
||||
|
||||
|
||||
/**
|
||||
* Print just the content of the HTML body element, which is useful when
|
||||
* want to reuse material from other documents.
|
||||
* -- Sebastiano Vigna <vigna@dsi.unimi.it>
|
||||
*/
|
||||
void TY_(PrintBody)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/**
|
||||
* Print the HTML document tree for the given document using the given node
|
||||
* as the root of the document. Note that you can print an entire document
|
||||
* node as body using PPrintTree()
|
||||
*/
|
||||
void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node );
|
||||
|
||||
|
||||
/**
|
||||
* Print the XML document tree for the given document using the given node
|
||||
* as the root of the document.
|
||||
*/
|
||||
void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node );
|
||||
|
||||
|
||||
/** @} end print_h group */
|
||||
/** @} end internal_api group */
|
||||
|
||||
#endif /* __PPRINT_H__ */
|
414
third_party/tidy/sprtf.c
vendored
Normal file
414
third_party/tidy/sprtf.c
vendored
Normal file
|
@ -0,0 +1,414 @@
|
|||
/* clang-format off */
|
||||
/* sprtf.c
|
||||
* SPRTF - Log output utility - part of the HTML Tidy project
|
||||
*
|
||||
* Copyright (c) 1998-2017 Geoff R. McLane and HTACG
|
||||
*
|
||||
* See tidy.h for the copyright notice.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning( disable : 4995 )
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# if (defined(UNICODE) || defined(_UNICODE))
|
||||
# endif
|
||||
#else /* !_MSC_VER */
|
||||
#endif /* _MSC_VER y/n */
|
||||
|
||||
#include "third_party/tidy/sprtf.h"
|
||||
|
||||
#ifdef ENABLE_DEBUG_LOG
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# ifndef _CRT_SECURE_NO_DEPRECATE
|
||||
# define _CRT_SECURE_NO_DEPRECATE
|
||||
# endif /* #ifndef _CRT_SECURE_NO_DEPRECATE */
|
||||
# pragma warning( disable:4996 )
|
||||
#else
|
||||
# define strcmpi strcasecmp
|
||||
#endif
|
||||
|
||||
#ifndef MX_ONE_BUF
|
||||
# define MX_ONE_BUF 1024
|
||||
#endif
|
||||
#ifndef MX_BUFFERS
|
||||
# define MX_BUFFERS 1024
|
||||
#endif
|
||||
|
||||
static char _s_strbufs[MX_ONE_BUF * MX_BUFFERS];
|
||||
static int iNextBuf = 0;
|
||||
|
||||
char *GetNxtBuf()
|
||||
{
|
||||
iNextBuf++;
|
||||
if(iNextBuf >= MX_BUFFERS)
|
||||
iNextBuf = 0;
|
||||
return &_s_strbufs[MX_ONE_BUF * iNextBuf];
|
||||
}
|
||||
|
||||
#define MXIO 512
|
||||
|
||||
static char def_log[] = "temptidy.txt"; /* use local log */
|
||||
static char logfile[264] = "\0";
|
||||
static FILE * outfile = NULL;
|
||||
static int addsystime = 0;
|
||||
static int addsysdate = 0;
|
||||
static int addstdout = 1;
|
||||
static int addflush = 1;
|
||||
static int add2screen = 0;
|
||||
static int add2listview = 0;
|
||||
static int append_to_log = 0;
|
||||
|
||||
#ifndef VFP
|
||||
# define VFP(a) ( a && ( a != (FILE *)-1 ) )
|
||||
#endif
|
||||
|
||||
int add_list_out( int val )
|
||||
{
|
||||
int i = add2listview;
|
||||
add2listview = val;
|
||||
return i;
|
||||
}
|
||||
|
||||
int add_std_out( int val )
|
||||
{
|
||||
int i = addstdout;
|
||||
addstdout = val;
|
||||
return i;
|
||||
}
|
||||
|
||||
int add_screen_out( int val )
|
||||
{
|
||||
int i = add2screen;
|
||||
add2screen = val;
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
int add_sys_time( int val )
|
||||
{
|
||||
int i = addsystime;
|
||||
addsystime = val;
|
||||
return i;
|
||||
}
|
||||
|
||||
int add_sys_date( int val )
|
||||
{
|
||||
int i = addsysdate;
|
||||
addsysdate = val;
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
int add_append_log( int val )
|
||||
{
|
||||
int i = append_to_log;
|
||||
append_to_log = val;
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static const char *mode = "wb"; /* in window sprtf looks after the line endings */
|
||||
#else
|
||||
static const char *mode = "w";
|
||||
#endif
|
||||
|
||||
int open_log_file( void )
|
||||
{
|
||||
if (logfile[0] == 0)
|
||||
strcpy(logfile,def_log);
|
||||
if (append_to_log) {
|
||||
#ifdef _MSC_VER
|
||||
mode = "ab"; /* in window sprtf looks after the line endings */
|
||||
#else
|
||||
mode = "a";
|
||||
#endif
|
||||
}
|
||||
outfile = fopen(logfile, mode);
|
||||
if( outfile == 0 ) {
|
||||
outfile = (FILE *)-1;
|
||||
sprtf("ERROR: Failed to open log file [%s] ...\n", logfile);
|
||||
/* exit(1); failed */
|
||||
return 0; /* failed */
|
||||
}
|
||||
return 1; /* success */
|
||||
}
|
||||
|
||||
void close_log_file( void )
|
||||
{
|
||||
if( VFP(outfile) ) {
|
||||
fclose(outfile);
|
||||
}
|
||||
outfile = NULL;
|
||||
}
|
||||
|
||||
char * get_log_file( void )
|
||||
{
|
||||
if (logfile[0] == 0)
|
||||
strcpy(logfile,def_log);
|
||||
if (outfile == (FILE *)-1) /* disable the log file */
|
||||
return (char *)"none";
|
||||
return logfile;
|
||||
}
|
||||
|
||||
void set_log_file( char * nf, int open )
|
||||
{
|
||||
if (logfile[0] == 0)
|
||||
strcpy(logfile,def_log);
|
||||
if ( nf && *nf && strcmpi(nf,logfile) ) {
|
||||
close_log_file(); /* remove any previous */
|
||||
strcpy(logfile,nf); /* set new name */
|
||||
if (strcmp(logfile,"none") == 0) { /* if equal 'none' */
|
||||
outfile = (FILE *)-1; /* disable the log file */
|
||||
} else if (open) {
|
||||
open_log_file(); /* and open it ... anything previous written is 'lost' */
|
||||
} else
|
||||
outfile = 0; /* else set 0 to open on first write */
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
int gettimeofday(struct timeval *tp, void *tzp)
|
||||
{
|
||||
#ifdef WIN32
|
||||
struct _timeb timebuffer;
|
||||
_ftime(&timebuffer);
|
||||
tp->tv_sec = (long)timebuffer.time;
|
||||
tp->tv_usec = timebuffer.millitm * 1000;
|
||||
#else
|
||||
tp->tv_sec = time(NULL);
|
||||
tp->tv_usec = 0;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
void add_date_stg( char *ps, struct timeval *ptv )
|
||||
{
|
||||
time_t curtime;
|
||||
struct tm * ptm;
|
||||
curtime = (ptv->tv_sec & 0xffffffff);
|
||||
ptm = localtime(&curtime);
|
||||
if (ptm) {
|
||||
strftime(EndBuf(ps),128,"%Y/%m/%d",ptm);
|
||||
}
|
||||
}
|
||||
|
||||
void add_time_stg( char *ps, struct timeval *ptv )
|
||||
{
|
||||
time_t curtime;
|
||||
struct tm * ptm;
|
||||
curtime = (ptv->tv_sec & 0xffffffff);
|
||||
ptm = localtime(&curtime);
|
||||
if (ptm) {
|
||||
strftime(EndBuf(ps),128,"%H:%M:%S",ptm);
|
||||
}
|
||||
}
|
||||
|
||||
char *get_date_stg()
|
||||
{
|
||||
char *ps;
|
||||
struct timeval tv;
|
||||
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 );
|
||||
ps = GetNxtBuf();
|
||||
*ps = 0;
|
||||
add_date_stg( ps, &tv );
|
||||
return ps;
|
||||
}
|
||||
|
||||
char *get_time_stg()
|
||||
{
|
||||
char *ps;
|
||||
struct timeval tv;
|
||||
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 );
|
||||
ps = GetNxtBuf();
|
||||
*ps = 0;
|
||||
add_time_stg( ps, &tv );
|
||||
return ps;
|
||||
}
|
||||
|
||||
char *get_date_time_stg()
|
||||
{
|
||||
char *ps;
|
||||
struct timeval tv;
|
||||
gettimeofday( (struct timeval *)&tv, (struct timezone *)0 );
|
||||
ps = GetNxtBuf();
|
||||
*ps = 0;
|
||||
add_date_stg( ps, &tv );
|
||||
strcat(ps," ");
|
||||
add_time_stg( ps, &tv );
|
||||
return ps;
|
||||
}
|
||||
|
||||
static void oi( char * psin )
|
||||
{
|
||||
int len, w;
|
||||
char * ps = psin;
|
||||
if (!ps)
|
||||
return;
|
||||
|
||||
len = (int)strlen(ps);
|
||||
if (len) {
|
||||
|
||||
if( outfile == 0 ) {
|
||||
open_log_file();
|
||||
}
|
||||
if( VFP(outfile) ) {
|
||||
char *tb;
|
||||
if (addsysdate) {
|
||||
tb = GetNxtBuf();
|
||||
len = sprintf( tb, "%s - %s", get_date_time_stg(), ps );
|
||||
ps = tb;
|
||||
} else if( addsystime ) {
|
||||
tb = GetNxtBuf();
|
||||
len = sprintf( tb, "%s - %s", get_time_stg(), ps );
|
||||
ps = tb;
|
||||
}
|
||||
|
||||
w = (int)fwrite( ps, 1, len, outfile );
|
||||
if( w != len ) {
|
||||
fclose(outfile);
|
||||
outfile = (FILE *)-1;
|
||||
sprtf("WARNING: Failed write to log file [%s] ...\n", logfile);
|
||||
exit(1);
|
||||
} else if (addflush) {
|
||||
fflush( outfile );
|
||||
}
|
||||
}
|
||||
|
||||
if( addstdout ) {
|
||||
fwrite( ps, 1, len, stderr ); /* 20170917 - Switch to using 'stderr' in place of 'stdout' */
|
||||
}
|
||||
#ifdef ADD_LISTVIEW
|
||||
if (add2listview) {
|
||||
LVInsertItem(ps);
|
||||
}
|
||||
#endif /* ADD_LISTVIEW */
|
||||
#ifdef ADD_SCREENOUT
|
||||
if (add2screen) {
|
||||
Add_String(ps); /* add string to screen list */
|
||||
}
|
||||
#endif /* #ifdef ADD_SCREENOUT */
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* service to ensure line endings in windows only */
|
||||
static void prt( char * ps )
|
||||
{
|
||||
static char _s_buf[1024];
|
||||
char * pb = _s_buf;
|
||||
size_t i, j, k;
|
||||
char c, d;
|
||||
i = strlen(ps);
|
||||
k = 0;
|
||||
d = 0;
|
||||
if(i) {
|
||||
k = 0;
|
||||
d = 0;
|
||||
for( j = 0; j < i; j++ ) {
|
||||
c = ps[j];
|
||||
if( c == 0x0d ) {
|
||||
if( (j+1) < i ) {
|
||||
if( ps[j+1] != 0x0a ) {
|
||||
pb[k++] = c;
|
||||
c = 0x0a;
|
||||
}
|
||||
} else {
|
||||
pb[k++] = c;
|
||||
c = 0x0a;
|
||||
}
|
||||
} else if( c == 0x0a ) {
|
||||
if( d != 0x0d ) {
|
||||
pb[k++] = 0x0d;
|
||||
}
|
||||
}
|
||||
pb[k++] = c;
|
||||
d = c;
|
||||
if( k >= MXIO ) {
|
||||
pb[k] = 0;
|
||||
oi(pb);
|
||||
k = 0;
|
||||
}
|
||||
} /* for length of string */
|
||||
if( k ) {
|
||||
pb[k] = 0;
|
||||
oi( pb );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* #ifdef _MSC_VER */
|
||||
|
||||
int direct_out_it( char *cp )
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
prt(cp);
|
||||
#else
|
||||
oi(cp);
|
||||
#endif
|
||||
return (int)strlen(cp);
|
||||
}
|
||||
|
||||
/* STDAPI StringCchVPrintf( OUT LPTSTR pszDest,
|
||||
* IN size_t cchDest, IN LPCTSTR pszFormat, IN va_list argList ); */
|
||||
int MCDECL sprtf( const char *pf, ... )
|
||||
{
|
||||
static char _s_sprtfbuf[M_MAX_SPRTF+4];
|
||||
char * pb = _s_sprtfbuf;
|
||||
int i;
|
||||
va_list arglist;
|
||||
va_start(arglist, pf);
|
||||
i = vsnprintf( pb, M_MAX_SPRTF, pf, arglist );
|
||||
va_end(arglist);
|
||||
#ifdef _MSC_VER
|
||||
prt(pb); /* ensure CR/LF */
|
||||
#else
|
||||
oi(pb);
|
||||
#endif
|
||||
return i;
|
||||
}
|
||||
|
||||
#ifdef UNICODE
|
||||
/* WIDE VARIETY */
|
||||
static void wprt( PTSTR ps )
|
||||
{
|
||||
static char _s_woibuf[1024];
|
||||
char * cp = _s_woibuf;
|
||||
int len = (int)lstrlen(ps);
|
||||
if(len) {
|
||||
int ret = WideCharToMultiByte( CP_ACP, /* UINT CodePage, // code page */
|
||||
0, /* DWORD dwFlags, // performance and mapping flags */
|
||||
ps, /* LPCWSTR lpWideCharStr, // wide-character string */
|
||||
len, /* int cchWideChar, // number of chars in string. */
|
||||
cp, /* LPSTR lpMultiByteStr, // buffer for new string */
|
||||
1024, /* int cbMultiByte, // size of buffer */
|
||||
NULL, /* LPCSTR lpDefaultChar, // default for unmappable chars */
|
||||
NULL ); /* LPBOOL lpUsedDefaultChar // set when default char used */
|
||||
/* oi(cp); */
|
||||
prt(cp);
|
||||
}
|
||||
}
|
||||
|
||||
int MCDECL wsprtf( PTSTR pf, ... )
|
||||
{
|
||||
static WCHAR _s_sprtfwbuf[1024];
|
||||
PWSTR pb = _s_sprtfwbuf;
|
||||
int i = 1;
|
||||
va_list arglist;
|
||||
va_start(arglist, pf);
|
||||
*pb = 0;
|
||||
StringCchVPrintf(pb,1024,pf,arglist);
|
||||
va_end(arglist);
|
||||
wprt(pb);
|
||||
return i;
|
||||
}
|
||||
|
||||
#endif /* #ifdef UNICODE */
|
||||
|
||||
#endif /* #ifdef ENABLE_DEBUG_LOG */
|
||||
/* eof - sprtf.c */
|
101
third_party/tidy/sprtf.h
vendored
Normal file
101
third_party/tidy/sprtf.h
vendored
Normal file
|
@ -0,0 +1,101 @@
|
|||
#ifndef _SPRTF_HXX_
|
||||
#define _SPRTF_HXX_
|
||||
/* clang-format off */
|
||||
|
||||
/**************************************************************************//**
|
||||
* @file
|
||||
* Log output utility - part of the HTML Tidy project
|
||||
*
|
||||
* @author Geoff R. McLane [reports _at_ geoffair _dot_ info]
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (c) 1998-2017 Geoff R. McLane and HTACG.
|
||||
* @par
|
||||
* All Rights Reserved.
|
||||
* @par
|
||||
* See `tidy.h` for the complete license.
|
||||
*
|
||||
* @date 2017/02/12 17:06:02 Revision 1.0.2 geoff - correct license and coding style
|
||||
* @date 2012/11/06 13:01:25 Revision 1.0.1 geoff
|
||||
* @date 2012/10/17 00:00:00 Revision 1.0.0 geoff
|
||||
* @date Additional updates: consult git log
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "third_party/tidy/tidyplatform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#ifdef ENABLE_DEBUG_LOG
|
||||
/*=============================================================================
|
||||
* EXTRA Debugging, and information aid.
|
||||
*
|
||||
* When building and defining the ENABLE_DEBUG_LOG macro, Tidy will output
|
||||
* extensive debug information. In addition to this macro, you can supply
|
||||
* cmake build flags for additional diagnostic information:
|
||||
* - -DENABLE_ALLOC_DEBUG:BOOL=ON - DEBUG_ALLOCATION
|
||||
* - -DENABLE_MEMORY_DEBUG:BOOL=ON - DEBUG_MEMORY
|
||||
* - -DENABLE_CRTDBG_MEMORY:BOOL=ON - _CRTDBG_MAP_ALLOC (WIN32 only)
|
||||
*
|
||||
* _MSC_VER Only - ENABLE_DEBUG_LOG is automatically enabled in the Debug
|
||||
* build, unless DISABLE_DEBUG_LOG is defined. See 'tidyplatform.h'
|
||||
*
|
||||
* You can use DEBUG_LOG( SPRTF() ) to avoid #ifdef ENABLE_DEBUG_LOG for
|
||||
* one-liners.
|
||||
*
|
||||
* This EXTRA Debug information is also written to a 'temptidy.txt' log
|
||||
* file, for review, and analysis.
|
||||
*
|
||||
*===========================================================================*/
|
||||
|
||||
#ifndef SPRTF
|
||||
# define SPRTF sprtf
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define MCDECL _cdecl
|
||||
#else
|
||||
# define MCDECL
|
||||
#endif
|
||||
|
||||
int add_std_out( int val );
|
||||
int add_sys_time( int val );
|
||||
int add_sys_date( int val );
|
||||
|
||||
int add_screen_out( int val );
|
||||
int add_list_out( int val );
|
||||
int add_append_log( int val );
|
||||
|
||||
int open_log_file( void );
|
||||
void close_log_file( void );
|
||||
void set_log_file( char * nf, int open );
|
||||
char * get_log_file( void );
|
||||
|
||||
int MCDECL sprtf( const char *pf, ... );
|
||||
#define M_MAX_SPRTF 2048
|
||||
int direct_out_it( char *cp );
|
||||
|
||||
char *GetNxtBuf(void);
|
||||
|
||||
#define EndBuf(a) ( a + strlen(a) )
|
||||
|
||||
char *get_date_stg(void);
|
||||
char *get_time_stg(void);
|
||||
char *get_date_time_stg(void);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
int gettimeofday(struct timeval *tp, void *tzp);
|
||||
#endif
|
||||
|
||||
# define DEBUG_LOG(ARG) do { ARG; } while(0)
|
||||
|
||||
#else
|
||||
# define DEBUG_LOG(ARG)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* #ifndef _SPRTF_HXX_*/
|
||||
/* eof - sprtf.h */
|
1149
third_party/tidy/streamio.c
vendored
Normal file
1149
third_party/tidy/streamio.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
175
third_party/tidy/streamio.h
vendored
Normal file
175
third_party/tidy/streamio.h
vendored
Normal file
|
@ -0,0 +1,175 @@
|
|||
#ifndef __STREAMIO_H__
|
||||
#define __STREAMIO_H__
|
||||
/* clang-format off */
|
||||
|
||||
/* streamio.h -- handles character stream I/O
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Wrapper around Tidy input source and output sink
|
||||
that calls appropriate interfaces, and applies
|
||||
necessary char encoding transformations: to/from
|
||||
ISO-10646 and/or UTF-8.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "third_party/tidy/tidybuffio.h"
|
||||
#include "third_party/tidy/fileio.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
typedef enum
|
||||
{
|
||||
FileIO,
|
||||
BufferIO,
|
||||
UserIO
|
||||
} IOType;
|
||||
|
||||
/* states for ISO 2022
|
||||
|
||||
A document in ISO-2022 based encoding uses some ESC sequences called
|
||||
"designator" to switch character sets. The designators defined and
|
||||
used in ISO-2022-JP are:
|
||||
|
||||
"ESC" + "(" + ? for ISO646 variants
|
||||
|
||||
"ESC" + "$" + ? and
|
||||
"ESC" + "$" + "(" + ? for multibyte character sets
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
FSM_ASCII,
|
||||
FSM_ESC,
|
||||
FSM_ESCD,
|
||||
FSM_ESCDP,
|
||||
FSM_ESCP,
|
||||
FSM_NONASCII
|
||||
} ISO2022State;
|
||||
|
||||
/************************
|
||||
** Source
|
||||
************************/
|
||||
|
||||
enum
|
||||
{
|
||||
CHARBUF_SIZE=5,
|
||||
LASTPOS_SIZE=64
|
||||
};
|
||||
|
||||
/* non-raw input is cleaned up*/
|
||||
struct _StreamIn
|
||||
{
|
||||
ISO2022State state; /* FSM for ISO2022 */
|
||||
Bool pushed;
|
||||
TidyAllocator *allocator;
|
||||
tchar* charbuf;
|
||||
uint bufpos;
|
||||
uint bufsize;
|
||||
int tabs;
|
||||
int lastcols[LASTPOS_SIZE];
|
||||
unsigned short curlastpos; /* current last position in lastcols */
|
||||
unsigned short firstlastpos; /* first valid last position in lastcols */
|
||||
int curcol;
|
||||
int curline;
|
||||
int encoding;
|
||||
IOType iotype;
|
||||
|
||||
TidyInputSource source;
|
||||
|
||||
/* Pointer back to document for error reporting */
|
||||
TidyDocImpl* doc;
|
||||
};
|
||||
|
||||
StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
|
||||
void TY_(freeStreamIn)(StreamIn* in);
|
||||
|
||||
StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
|
||||
StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
|
||||
StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );
|
||||
|
||||
int TY_(ReadBOMEncoding)(StreamIn *in);
|
||||
uint TY_(ReadChar)( StreamIn* in );
|
||||
void TY_(UngetChar)( uint c, StreamIn* in );
|
||||
Bool TY_(IsEOF)( StreamIn* in );
|
||||
|
||||
|
||||
/************************
|
||||
** Sink
|
||||
************************/
|
||||
|
||||
struct _StreamOut
|
||||
{
|
||||
int encoding;
|
||||
ISO2022State state; /* for ISO 2022 */
|
||||
uint nl;
|
||||
IOType iotype;
|
||||
TidyOutputSink sink;
|
||||
};
|
||||
|
||||
StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln );
|
||||
StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln );
|
||||
StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln );
|
||||
|
||||
StreamOut* TY_(StdErrOutput)(void);
|
||||
/* StreamOut* StdOutOutput(void); */
|
||||
void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );
|
||||
|
||||
void TY_(WriteChar)( uint c, StreamOut* out );
|
||||
void TY_(outBOM)( StreamOut *out );
|
||||
|
||||
ctmbstr TY_(GetEncodingNameFromTidyId)(uint id);
|
||||
ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id);
|
||||
int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
|
||||
|
||||
/************************
|
||||
** Misc
|
||||
************************/
|
||||
|
||||
/* character encodings
|
||||
*/
|
||||
#define RAW 0
|
||||
#define ASCII 1
|
||||
#define LATIN0 2
|
||||
#define LATIN1 3
|
||||
#define UTF8 4
|
||||
#define ISO2022 5
|
||||
#define MACROMAN 6
|
||||
#define WIN1252 7
|
||||
#define IBM858 8
|
||||
#define UTF16LE 9
|
||||
#define UTF16BE 10
|
||||
#define UTF16 11
|
||||
#define BIG5 12
|
||||
#define SHIFTJIS 13
|
||||
|
||||
/* Function for conversion from Windows-1252 to Unicode */
|
||||
uint TY_(DecodeWin1252)(uint c);
|
||||
|
||||
/* Function to convert from MacRoman to Unicode */
|
||||
uint TY_(DecodeMacRoman)(uint c);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Use numeric constants as opposed to escape chars (\r, \n)
|
||||
** to avoid conflict Mac compilers that may re-define these.
|
||||
*/
|
||||
#define CR 0xD
|
||||
#define LF 0xA
|
||||
|
||||
#if defined(MAC_OS_CLASSIC)
|
||||
# define DEFAULT_NL_CONFIG TidyCR
|
||||
#elif defined(_WIN32) || defined(OS2_OS)
|
||||
# define DEFAULT_NL_CONFIG TidyCRLF
|
||||
#else
|
||||
# define DEFAULT_NL_CONFIG TidyLF
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __STREAMIO_H__ */
|
40
third_party/tidy/tagask.c
vendored
Normal file
40
third_party/tidy/tagask.c
vendored
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* clang-format off */
|
||||
/* tagask.c -- Interrogate node type
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/tidy-int.h"
|
||||
#include "third_party/tidy/tags.h"
|
||||
#include "third_party/tidy/tidy.h"
|
||||
|
||||
Bool tidyNodeIsText( TidyNode tnod )
|
||||
{ return TY_(nodeIsText)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool tidyNodeCMIsBlock( TidyNode tnod ); /* not exported yet */
|
||||
Bool tidyNodeCMIsBlock( TidyNode tnod )
|
||||
{ return TY_(nodeCMIsBlock)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool tidyNodeCMIsInline( TidyNode tnod ); /* not exported yet */
|
||||
Bool tidyNodeCMIsInline( TidyNode tnod )
|
||||
{ return TY_(nodeCMIsInline)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool tidyNodeCMIsEmpty( TidyNode tnod ); /* not exported yet */
|
||||
Bool tidyNodeCMIsEmpty( TidyNode tnod )
|
||||
{ return TY_(nodeCMIsEmpty)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool tidyNodeIsHeader( TidyNode tnod )
|
||||
{ return TY_(nodeIsHeader)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
1190
third_party/tidy/tags.c
vendored
Normal file
1190
third_party/tidy/tags.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
482
third_party/tidy/tags.h
vendored
Normal file
482
third_party/tidy/tags.h
vendored
Normal file
|
@ -0,0 +1,482 @@
|
|||
#ifndef __TAGS_H__
|
||||
#define __TAGS_H__
|
||||
/* clang-format off */
|
||||
|
||||
/**************************************************************************//**
|
||||
* @file
|
||||
* Recognize HTML Tags.
|
||||
*
|
||||
* The HTML tags are stored as 8 bit ASCII strings.
|
||||
* Use lookupw() to find a tag given a wide char string.
|
||||
*
|
||||
* @author HTACG, et al (consult git log)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
|
||||
* Institute of Technology, European Research Consortium for Informatics
|
||||
* and Mathematics, Keio University) and HTACG.
|
||||
* @par
|
||||
* All Rights Reserved.
|
||||
* @par
|
||||
* See `tidy.h` for the complete license.
|
||||
*
|
||||
* @date Additional updates: consult git log
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "third_party/tidy/lexer.h"
|
||||
#include "third_party/tidy/config.h"
|
||||
#include "third_party/tidy/attrdict.h"
|
||||
|
||||
/** @addtogroup internal_api */
|
||||
/** @{ */
|
||||
|
||||
|
||||
/***************************************************************************//**
|
||||
** @defgroup tags_h HTML Tags
|
||||
**
|
||||
** This module organizes all of Tidy's HTML tag operations, such as parsing
|
||||
** tags, defining tags, and user-defined tags.
|
||||
**
|
||||
** @{
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
/** @name Basic Structures and Tag Operations.
|
||||
** These structures form the backbone of Tidy tag processing, and the
|
||||
** functions in this group provide basic operations with tags and nodes.
|
||||
*/
|
||||
/** @{ */
|
||||
|
||||
|
||||
/** This enumeration defines the types of user-defined tags that can be
|
||||
** created.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
tagtype_null = 0, /**< First item marker. */
|
||||
tagtype_empty = 1, /**< Tag is an empty element. */
|
||||
tagtype_inline = 2, /**< Tag is an inline element. */
|
||||
tagtype_block = 4, /**< Tag is a block level element. */
|
||||
tagtype_pre = 8 /**< Tag is a preformatted tag. */
|
||||
} UserTagType;
|
||||
|
||||
|
||||
/** This typedef describes a function to be used to parse HTML of a Tidy tag.
|
||||
** @param doc The Tidy document.
|
||||
** @param node The node being parsed.
|
||||
** @param mode The GetTokenMode to be used for parsing the node contents.
|
||||
** @param popStack A flag indicating that we are re-entering this parser, and
|
||||
** it should restore a state from the stack.
|
||||
*/
|
||||
typedef Node* (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode );
|
||||
|
||||
|
||||
/** This typedef describes a function be be used to check the attributes
|
||||
** of a Tidy tag.
|
||||
*/
|
||||
typedef void (CheckAttribs)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
|
||||
/** Defines a dictionary entry for a single Tidy tag, including all of the
|
||||
** relevant information that it requires.
|
||||
*/
|
||||
struct _Dict
|
||||
{
|
||||
TidyTagId id; /**< Identifier for this tag. */
|
||||
tmbstr name; /**< The tag name. */
|
||||
uint versions; /**< Accumulates potential HTML versions. See TY_(ConstrainVersion). */
|
||||
AttrVersion const * attrvers; /**< Accumulates potential HTML versions for attributes. */
|
||||
uint model; /**< Indicates the relevant content models for the tag. See lexer.h; there is no enum. */
|
||||
Parser* parser; /**< Specifies the parser to use for this tag. */
|
||||
CheckAttribs* chkattrs; /**< Specifies the function to check this tag's attributes. */
|
||||
Dict* next; /**< Link to next tag. */
|
||||
};
|
||||
|
||||
|
||||
/** This enum indicates the maximum size of the has table for tag hash lookup.
|
||||
*/
|
||||
enum
|
||||
{
|
||||
ELEMENT_HASH_SIZE=178u /**< Maximum number of tags in the hash table. */
|
||||
};
|
||||
|
||||
|
||||
/** This structure provide hash lookup for Tidy tags.
|
||||
*/
|
||||
typedef struct _DictHash
|
||||
{
|
||||
Dict const* tag; /**< The current tag. */
|
||||
struct _DictHash* next; /**< The next tag. */
|
||||
} DictHash;
|
||||
|
||||
|
||||
/** This structure consists of the lists of all tags known to Tidy.
|
||||
*/
|
||||
typedef struct _TidyTagImpl
|
||||
{
|
||||
Dict* xml_tags; /**< Placeholder for all xml tags. */
|
||||
Dict* declared_tag_list; /**< User-declared tags. */
|
||||
DictHash* hashtab[ELEMENT_HASH_SIZE]; /**< All of Tidy's built-in tags. */
|
||||
} TidyTagImpl;
|
||||
|
||||
|
||||
/** Coordinates Config update and Tags data.
|
||||
** @param doc The Tidy document.
|
||||
** @param opt The option the tag is intended for.
|
||||
** @param name The name of the new tag.
|
||||
*/
|
||||
void TY_(DeclareUserTag)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name );
|
||||
|
||||
|
||||
/** Interface for finding a tag by TidyTagId.
|
||||
** @param tid The TidyTagId to search for.
|
||||
** @returns An instance of a Tidy tag.
|
||||
*/
|
||||
const Dict* TY_(LookupTagDef)( TidyTagId tid );
|
||||
|
||||
/** Assigns the node's tag.
|
||||
** @param doc The Tidy document.
|
||||
** @param node The node to assign the tag to.
|
||||
** @returns Returns a bool indicating whether or not the tag was assigned.
|
||||
*/
|
||||
Bool TY_(FindTag)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
|
||||
/** Finds the parser function for a given node.
|
||||
** @param doc The Tidy document.
|
||||
** @param node The node to lookup.
|
||||
** @returns The parser for the given node.
|
||||
*/
|
||||
Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
|
||||
/** Defines a new user-defined tag.
|
||||
** @param doc The Tidy document.
|
||||
** @param tagType The type of user-defined tag to define.
|
||||
** @param name The name of the new tag.
|
||||
*/
|
||||
void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name );
|
||||
|
||||
|
||||
/** Frees user-defined tags of the given type, or all user tags in given
|
||||
** `tagtype_null`.
|
||||
** @param doc The Tidy document.
|
||||
** @param tagType The type of tag to free, or `tagtype_null` to free all
|
||||
** user-defined tags.
|
||||
*/
|
||||
void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType );
|
||||
|
||||
|
||||
/** Initiates an iterator for a list of user-declared tags, including autonomous
|
||||
** custom tags detected in the document if @ref TidyUseCustomTags is not set to
|
||||
** **no**.
|
||||
** @param doc An instance of a TidyDocImp to query.
|
||||
** @result Returns a TidyIterator, which is a token used to represent the
|
||||
** current position in a list within LibTidy.
|
||||
*/
|
||||
TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Given a valid TidyIterator initiated with TY_(GetDeclaredTagList)(),
|
||||
** returns a string representing a user-declared or autonomous custom tag.
|
||||
** @remark Specifying tagType limits the scope of the tags to one of
|
||||
** @ref UserTagType types. Note that autonomous custom tags (if used)
|
||||
** are added to one of these option types, depending on the value of
|
||||
** @ref TidyUseCustomTags.
|
||||
** @param doc The Tidy document.
|
||||
** @param tagType The type of tag to iterate through.
|
||||
** @param iter The iterator token provided initially by
|
||||
** TY_(GetDeclaredTagList)().
|
||||
** @result A string containing the next tag.
|
||||
*/
|
||||
ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType,
|
||||
TidyIterator* iter );
|
||||
|
||||
|
||||
/** Initializes tags and tag structures for the given Tidy document.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(InitTags)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Frees the tags and structures used by Tidy for tags.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(FreeTags)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/** Tidy defaults to HTML5 mode. If the <!DOCTYPE ...> is found to NOT be
|
||||
** HTML5, then adjust the tags table to HTML4 mode.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(AdjustTags)( TidyDocImpl *doc );
|
||||
|
||||
|
||||
/** Reset the tags table back to default HTML5 mode.
|
||||
** @param doc The Tidy document.
|
||||
*/
|
||||
void TY_(ResetTags)( TidyDocImpl *doc );
|
||||
|
||||
|
||||
/** Indicates whether or not the Tidy is processing in HTML5 mode.
|
||||
** @param doc The Tidy document.
|
||||
** @returns Returns `yes` if processing in HTML5 mode.
|
||||
*/
|
||||
Bool TY_(IsHTML5Mode)( TidyDocImpl *doc );
|
||||
|
||||
|
||||
/** @} */
|
||||
/** @name Parser Methods And Attribute Checker Functions for Tags
|
||||
** These functions define the parsers and attribute checking functions for
|
||||
** each of Tidy's tags.
|
||||
*/
|
||||
/** @{ */
|
||||
|
||||
|
||||
Parser TY_(ParseHTML);
|
||||
Parser TY_(ParseHead);
|
||||
Parser TY_(ParseTitle);
|
||||
Parser TY_(ParseScript);
|
||||
Parser TY_(ParseFrameSet);
|
||||
Parser TY_(ParseNoFrames);
|
||||
Parser TY_(ParseBody);
|
||||
Parser TY_(ParsePre);
|
||||
Parser TY_(ParseList);
|
||||
Parser TY_(ParseDefList);
|
||||
Parser TY_(ParseBlock);
|
||||
Parser TY_(ParseInline);
|
||||
Parser TY_(ParseEmpty);
|
||||
Parser TY_(ParseTableTag);
|
||||
Parser TY_(ParseColGroup);
|
||||
Parser TY_(ParseRowGroup);
|
||||
Parser TY_(ParseRow);
|
||||
Parser TY_(ParseSelect);
|
||||
Parser TY_(ParseOptGroup);
|
||||
Parser TY_(ParseText);
|
||||
Parser TY_(ParseDatalist);
|
||||
Parser TY_(ParseNamespace);
|
||||
|
||||
CheckAttribs TY_(CheckAttributes);
|
||||
|
||||
|
||||
/** @} */
|
||||
/** @name Other Tag and Node Lookup Functions
|
||||
** These functions perform additional lookup on tags and nodes.
|
||||
*/
|
||||
/** @{ */
|
||||
|
||||
|
||||
/** Gets the TidyTagId of the given node. 0 == TidyTag_UNKNOWN.
|
||||
*/
|
||||
#define TagId(node) ((node) && (node)->tag ? (node)->tag->id : TidyTag_UNKNOWN)
|
||||
|
||||
|
||||
/** Determines if the given node is of the given tag id type.
|
||||
*/
|
||||
#define TagIsId(node, tid) ((node) && (node)->tag && (node)->tag->id == tid)
|
||||
|
||||
|
||||
/** Inquires whether or not the given node is a text node.
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeIsText)( Node* node );
|
||||
|
||||
|
||||
/** Inquires whether or not the given node is an element node.
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeIsElement)( Node* node );
|
||||
|
||||
|
||||
/** Inquires whether or not the given node has any text.
|
||||
** @param doc The Tidy document.
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
|
||||
/** Inquires whether the given element looks like it's an autonomous custom
|
||||
** element tag.
|
||||
** @param element A string to be checked.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(elementIsAutonomousCustomFormat)( ctmbstr element );
|
||||
|
||||
|
||||
/** Inquires whether the given node looks like it's an autonomous custom
|
||||
** element tag.
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeIsAutonomousCustomFormat)( Node* node );
|
||||
|
||||
|
||||
/** True if the node looks like it's an autonomous custom element tag, and
|
||||
** TidyCustomTags is not disabled, and we're in HTML5 mode, which are all
|
||||
** requirements for valid autonomous custom tags.
|
||||
** @param doc The Tidy document.
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeIsAutonomousCustomTag)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
|
||||
/** Does the node have the indicated content model? True if any of the bits
|
||||
** requested are set.
|
||||
** @param node The node being interrogated.
|
||||
** @param contentModel The content model to check against.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeHasCM)( Node* node, uint contentModel );
|
||||
|
||||
|
||||
/** Does the content model of the node include block?
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeCMIsBlock)( Node* node );
|
||||
|
||||
|
||||
/** Does the content model of the node include inline?
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeCMIsInline)( Node* node );
|
||||
|
||||
|
||||
/** Does the content model of the node include empty?
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeCMIsEmpty)( Node* node );
|
||||
|
||||
|
||||
/** Is the node a header, such as H1, H2, ..., H6?
|
||||
** @param node The node being interrogated.
|
||||
** @returns The status of the inquiry.
|
||||
*/
|
||||
Bool TY_(nodeIsHeader)( Node* node );
|
||||
|
||||
|
||||
/** Inquires as to the header level of the given node: 1, 2, ..., 6.
|
||||
** @param node The node being interrogated.
|
||||
** @returns The header level.
|
||||
*/
|
||||
uint TY_(nodeHeaderLevel)( Node* node );
|
||||
|
||||
|
||||
#define nodeIsHTML( node ) TagIsId( node, TidyTag_HTML )
|
||||
#define nodeIsHEAD( node ) TagIsId( node, TidyTag_HEAD )
|
||||
#define nodeIsTITLE( node ) TagIsId( node, TidyTag_TITLE )
|
||||
#define nodeIsBASE( node ) TagIsId( node, TidyTag_BASE )
|
||||
#define nodeIsMETA( node ) TagIsId( node, TidyTag_META )
|
||||
#define nodeIsBODY( node ) TagIsId( node, TidyTag_BODY )
|
||||
#define nodeIsFRAMESET( node ) TagIsId( node, TidyTag_FRAMESET )
|
||||
#define nodeIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
|
||||
#define nodeIsIFRAME( node ) TagIsId( node, TidyTag_IFRAME )
|
||||
#define nodeIsNOFRAMES( node ) TagIsId( node, TidyTag_NOFRAMES )
|
||||
#define nodeIsHR( node ) TagIsId( node, TidyTag_HR )
|
||||
#define nodeIsH1( node ) TagIsId( node, TidyTag_H1 )
|
||||
#define nodeIsH2( node ) TagIsId( node, TidyTag_H2 )
|
||||
#define nodeIsPRE( node ) TagIsId( node, TidyTag_PRE )
|
||||
#define nodeIsLISTING( node ) TagIsId( node, TidyTag_LISTING )
|
||||
#define nodeIsP( node ) TagIsId( node, TidyTag_P )
|
||||
#define nodeIsUL( node ) TagIsId( node, TidyTag_UL )
|
||||
#define nodeIsOL( node ) TagIsId( node, TidyTag_OL )
|
||||
#define nodeIsDL( node ) TagIsId( node, TidyTag_DL )
|
||||
#define nodeIsDIR( node ) TagIsId( node, TidyTag_DIR )
|
||||
#define nodeIsLI( node ) TagIsId( node, TidyTag_LI )
|
||||
#define nodeIsDT( node ) TagIsId( node, TidyTag_DT )
|
||||
#define nodeIsDD( node ) TagIsId( node, TidyTag_DD )
|
||||
#define nodeIsTABLE( node ) TagIsId( node, TidyTag_TABLE )
|
||||
#define nodeIsCAPTION( node ) TagIsId( node, TidyTag_CAPTION )
|
||||
#define nodeIsTD( node ) TagIsId( node, TidyTag_TD )
|
||||
#define nodeIsTH( node ) TagIsId( node, TidyTag_TH )
|
||||
#define nodeIsTR( node ) TagIsId( node, TidyTag_TR )
|
||||
#define nodeIsCOL( node ) TagIsId( node, TidyTag_COL )
|
||||
#define nodeIsCOLGROUP( node ) TagIsId( node, TidyTag_COLGROUP )
|
||||
#define nodeIsBR( node ) TagIsId( node, TidyTag_BR )
|
||||
#define nodeIsA( node ) TagIsId( node, TidyTag_A )
|
||||
#define nodeIsLINK( node ) TagIsId( node, TidyTag_LINK )
|
||||
#define nodeIsB( node ) TagIsId( node, TidyTag_B )
|
||||
#define nodeIsI( node ) TagIsId( node, TidyTag_I )
|
||||
#define nodeIsSTRONG( node ) TagIsId( node, TidyTag_STRONG )
|
||||
#define nodeIsEM( node ) TagIsId( node, TidyTag_EM )
|
||||
#define nodeIsBIG( node ) TagIsId( node, TidyTag_BIG )
|
||||
#define nodeIsSMALL( node ) TagIsId( node, TidyTag_SMALL )
|
||||
#define nodeIsPARAM( node ) TagIsId( node, TidyTag_PARAM )
|
||||
#define nodeIsOPTION( node ) TagIsId( node, TidyTag_OPTION )
|
||||
#define nodeIsOPTGROUP( node ) TagIsId( node, TidyTag_OPTGROUP )
|
||||
#define nodeIsIMG( node ) TagIsId( node, TidyTag_IMG )
|
||||
#define nodeIsMAP( node ) TagIsId( node, TidyTag_MAP )
|
||||
#define nodeIsAREA( node ) TagIsId( node, TidyTag_AREA )
|
||||
#define nodeIsNOBR( node ) TagIsId( node, TidyTag_NOBR )
|
||||
#define nodeIsWBR( node ) TagIsId( node, TidyTag_WBR )
|
||||
#define nodeIsFONT( node ) TagIsId( node, TidyTag_FONT )
|
||||
#define nodeIsLAYER( node ) TagIsId( node, TidyTag_LAYER )
|
||||
#define nodeIsSPACER( node ) TagIsId( node, TidyTag_SPACER )
|
||||
#define nodeIsCENTER( node ) TagIsId( node, TidyTag_CENTER )
|
||||
#define nodeIsSTYLE( node ) TagIsId( node, TidyTag_STYLE )
|
||||
#define nodeIsSCRIPT( node ) TagIsId( node, TidyTag_SCRIPT )
|
||||
#define nodeIsNOSCRIPT( node ) TagIsId( node, TidyTag_NOSCRIPT )
|
||||
#define nodeIsFORM( node ) TagIsId( node, TidyTag_FORM )
|
||||
#define nodeIsTEXTAREA( node ) TagIsId( node, TidyTag_TEXTAREA )
|
||||
#define nodeIsBLOCKQUOTE( node ) TagIsId( node, TidyTag_BLOCKQUOTE )
|
||||
#define nodeIsAPPLET( node ) TagIsId( node, TidyTag_APPLET )
|
||||
#define nodeIsOBJECT( node ) TagIsId( node, TidyTag_OBJECT )
|
||||
#define nodeIsDIV( node ) TagIsId( node, TidyTag_DIV )
|
||||
#define nodeIsSPAN( node ) TagIsId( node, TidyTag_SPAN )
|
||||
#define nodeIsINPUT( node ) TagIsId( node, TidyTag_INPUT )
|
||||
#define nodeIsQ( node ) TagIsId( node, TidyTag_Q )
|
||||
#define nodeIsLABEL( node ) TagIsId( node, TidyTag_LABEL )
|
||||
#define nodeIsH3( node ) TagIsId( node, TidyTag_H3 )
|
||||
#define nodeIsH4( node ) TagIsId( node, TidyTag_H4 )
|
||||
#define nodeIsH5( node ) TagIsId( node, TidyTag_H5 )
|
||||
#define nodeIsH6( node ) TagIsId( node, TidyTag_H6 )
|
||||
#define nodeIsADDRESS( node ) TagIsId( node, TidyTag_ADDRESS )
|
||||
#define nodeIsXMP( node ) TagIsId( node, TidyTag_XMP )
|
||||
#define nodeIsSELECT( node ) TagIsId( node, TidyTag_SELECT )
|
||||
#define nodeIsBLINK( node ) TagIsId( node, TidyTag_BLINK )
|
||||
#define nodeIsMARQUEE( node ) TagIsId( node, TidyTag_MARQUEE )
|
||||
#define nodeIsEMBED( node ) TagIsId( node, TidyTag_EMBED )
|
||||
#define nodeIsBASEFONT( node ) TagIsId( node, TidyTag_BASEFONT )
|
||||
#define nodeIsISINDEX( node ) TagIsId( node, TidyTag_ISINDEX )
|
||||
#define nodeIsS( node ) TagIsId( node, TidyTag_S )
|
||||
#define nodeIsSTRIKE( node ) TagIsId( node, TidyTag_STRIKE )
|
||||
#define nodeIsSUB( node ) TagIsId( node, TidyTag_SUB )
|
||||
#define nodeIsSUP( node ) TagIsId( node, TidyTag_SUP )
|
||||
#define nodeIsU( node ) TagIsId( node, TidyTag_U )
|
||||
#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU )
|
||||
#define nodeIsMAIN( node ) TagIsId( node, TidyTag_MAIN )
|
||||
#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON )
|
||||
#define nodeIsCANVAS( node ) TagIsId( node, TidyTag_CANVAS )
|
||||
#define nodeIsPROGRESS( node ) TagIsId( node, TidyTag_PROGRESS )
|
||||
|
||||
#define nodeIsINS( node ) TagIsId( node, TidyTag_INS )
|
||||
#define nodeIsDEL( node ) TagIsId( node, TidyTag_DEL )
|
||||
|
||||
#define nodeIsSVG( node ) TagIsId( node, TidyTag_SVG )
|
||||
|
||||
/* HTML5 */
|
||||
#define nodeIsDATALIST( node ) TagIsId( node, TidyTag_DATALIST )
|
||||
#define nodeIsDATA( node ) TagIsId( node, TidyTag_DATA )
|
||||
#define nodeIsMATHML( node ) TagIsId( node, TidyTag_MATHML ) /* #130 MathML attr and entity fix! */
|
||||
#define nodeIsTEMPLATE( node ) TagIsId( node, TidyTag_TEMPLATE )
|
||||
|
||||
/* NOT in HTML 5 */
|
||||
#define nodeIsACRONYM( node ) TagIsId( node, TidyTag_ACRONYM )
|
||||
#define nodesIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
|
||||
#define nodeIsTT( node ) TagIsId( node, TidyTag_TT )
|
||||
|
||||
|
||||
/** @} name */
|
||||
/** @} tags_h group */
|
||||
/** @} internal_api addtogroup */
|
||||
|
||||
|
||||
#endif /* __TAGS_H__ */
|
188
third_party/tidy/tidy-int.h
vendored
Normal file
188
third_party/tidy/tidy-int.h
vendored
Normal file
|
@ -0,0 +1,188 @@
|
|||
/* clang-format off */
|
||||
#ifndef __TIDY_INT_H__
|
||||
#define __TIDY_INT_H__
|
||||
|
||||
/* tidy-int.h -- internal library declarations
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/tidy.h"
|
||||
#include "third_party/tidy/config.h"
|
||||
#include "third_party/tidy/lexer.h"
|
||||
#include "third_party/tidy/tags.h"
|
||||
#include "third_party/tidy/attrs.h"
|
||||
#include "third_party/tidy/pprint.h"
|
||||
#include "third_party/tidy/access.h"
|
||||
#include "third_party/tidy/message.h"
|
||||
#include "libc/time/struct/utimbuf.h"
|
||||
#include "third_party/tidy/parser.h"
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(a,b) (((a) > (b))?(a):(b))
|
||||
#endif
|
||||
#ifndef MIN
|
||||
#define MIN(a,b) (((a) < (b))?(a):(b))
|
||||
#endif
|
||||
|
||||
/*\
|
||||
* Issue #166 - repeated <main> element
|
||||
* Change the previous on/off uint flag badForm
|
||||
* to a BIT flag to support other than <form>
|
||||
* errors. This could be extended more...
|
||||
\*/
|
||||
#define flg_BadForm 0x00000001
|
||||
#define flg_BadMain 0x00000002
|
||||
|
||||
struct _TidyDocImpl
|
||||
{
|
||||
/* The Document Tree (and backing store buffer) */
|
||||
Node root; /* This MUST remain the first declared
|
||||
variable in this structure */
|
||||
Lexer* lexer;
|
||||
|
||||
/* Config + Markup Declarations */
|
||||
TidyConfigImpl config;
|
||||
TidyTagImpl tags;
|
||||
TidyAttribImpl attribs;
|
||||
TidyAccessImpl access;
|
||||
TidyMutedMessages muted;
|
||||
|
||||
/* The Pretty Print buffer */
|
||||
TidyPrintImpl pprint;
|
||||
|
||||
/* I/O */
|
||||
StreamIn* docIn;
|
||||
StreamOut* docOut;
|
||||
StreamOut* errout;
|
||||
|
||||
TidyReportFilter reportFilter;
|
||||
TidyReportCallback reportCallback;
|
||||
TidyMessageCallback messageCallback;
|
||||
TidyOptCallback pOptCallback;
|
||||
TidyConfigCallback pConfigCallback;
|
||||
TidyConfigChangeCallback pConfigChangeCallback;
|
||||
TidyPPProgress progressCallback;
|
||||
|
||||
TidyParserStack stack;
|
||||
|
||||
/* Parse + Repair Results */
|
||||
uint optionErrors;
|
||||
uint errors;
|
||||
uint warnings;
|
||||
uint accessErrors;
|
||||
uint infoMessages;
|
||||
uint docErrors;
|
||||
int parseStatus;
|
||||
|
||||
uint badAccess; /* for accessibility errors */
|
||||
uint badLayout; /* for bad style errors */
|
||||
uint badChars; /* for bad char encodings */
|
||||
uint badForm; /* bit field, for badly placed form tags, or other format errors */
|
||||
uint footnotes; /* bit field, for other footnotes, until formalized */
|
||||
|
||||
Bool HTML5Mode; /* current mode is html5 */
|
||||
Bool xmlDetected; /* true if XML was used/detected */
|
||||
|
||||
uint indent_char; /* space or tab character, for indenting */
|
||||
|
||||
/* Memory allocator */
|
||||
TidyAllocator* allocator;
|
||||
|
||||
/* Miscellaneous */
|
||||
void* appData;
|
||||
uint nClassId;
|
||||
Bool inputHadBOM;
|
||||
|
||||
#if PRESERVE_FILE_TIMES
|
||||
struct utimbuf filetimes;
|
||||
#endif
|
||||
tmbstr givenDoctype;
|
||||
};
|
||||
|
||||
/** The basic struct for communicating a message within LibTidy. All of the
|
||||
** relevant information pertaining to a message can be retrieved with the
|
||||
** accessor functions and one of these records.
|
||||
*/
|
||||
struct _TidyMessageImpl
|
||||
{
|
||||
TidyDocImpl *tidyDoc; /* document instance this message is attributed to */
|
||||
Node *tidyNode; /* the node reporting the message, if applicable */
|
||||
uint code; /* the message code */
|
||||
int line; /* the line message applies to */
|
||||
int column; /* the column the message applies to */
|
||||
TidyReportLevel level; /* the severity level of the message */
|
||||
Bool allowMessage; /* indicates whether or not a filter rejected a message */
|
||||
Bool muted; /* indicates whether or not a configuration mutes this message */
|
||||
|
||||
int argcount; /* the number of arguments */
|
||||
struct printfArg* arguments; /* the arguments' values and types */
|
||||
|
||||
ctmbstr messageKey; /* the message code as a key string */
|
||||
|
||||
ctmbstr messageFormatDefault; /* the built-in format string */
|
||||
ctmbstr messageFormat; /* the localized format string */
|
||||
|
||||
tmbstr messageDefault; /* the message, formatted, default language */
|
||||
tmbstr message; /* the message, formatted, localized */
|
||||
|
||||
tmbstr messagePosDefault; /* the position part, default language */
|
||||
tmbstr messagePos; /* the position part, localized */
|
||||
|
||||
ctmbstr messagePrefixDefault; /* the prefix part, default language */
|
||||
ctmbstr messagePrefix; /* the prefix part, localized */
|
||||
|
||||
tmbstr messageOutputDefault; /* the complete string Tidy would output */
|
||||
tmbstr messageOutput; /* the complete string, localized */
|
||||
};
|
||||
|
||||
|
||||
#define tidyDocToImpl( tdoc ) ((TidyDocImpl*)(tdoc))
|
||||
#define tidyImplToDoc( doc ) ((TidyDoc)(doc))
|
||||
|
||||
#define tidyMessageToImpl( tmessage ) ((TidyMessageImpl*)(tmessage))
|
||||
#define tidyImplToMessage( message ) ((TidyMessage)(message))
|
||||
|
||||
#define tidyNodeToImpl( tnod ) ((Node*)(tnod))
|
||||
#define tidyImplToNode( node ) ((TidyNode)(node))
|
||||
|
||||
#define tidyAttrToImpl( tattr ) ((AttVal*)(tattr))
|
||||
#define tidyImplToAttr( attval ) ((TidyAttr)(attval))
|
||||
|
||||
#define tidyOptionToImpl( topt ) ((const TidyOptionImpl*)(topt))
|
||||
#define tidyImplToOption( option ) ((TidyOption)(option))
|
||||
|
||||
|
||||
/** Wrappers for easy memory allocation using the document's allocator */
|
||||
#define TidyDocAlloc(doc, size) TidyAlloc((doc)->allocator, size)
|
||||
#define TidyDocRealloc(doc, block, size) TidyRealloc((doc)->allocator, block, size)
|
||||
#define TidyDocFree(doc, block) TidyFree((doc)->allocator, block)
|
||||
#define TidyDocPanic(doc, msg) TidyPanic((doc)->allocator, msg)
|
||||
|
||||
int TY_(DocParseStream)( TidyDocImpl* impl, StreamIn* in );
|
||||
|
||||
/*
|
||||
[i_a] generic node tree traversal code; used in several spots.
|
||||
|
||||
Define your own callback, which returns one of the NodeTraversalSignal values
|
||||
to instruct the tree traversal routine TraverseNodeTree() what to do.
|
||||
|
||||
Pass custom data to/from the callback using the 'propagate' reference.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ContinueTraversal, /* visit siblings and children */
|
||||
SkipChildren, /* visit siblings of this node; ignore its children */
|
||||
SkipSiblings, /* ignore subsequent siblings of this node; ignore their children; traverse */
|
||||
SkipChildrenAndSiblings, /* visit siblings of this node; ignore its children */
|
||||
VisitParent, /* REVERSE traversal: visit the parent of the current node */
|
||||
ExitTraversal /* terminate traversal on the spot */
|
||||
} NodeTraversalSignal;
|
||||
|
||||
typedef NodeTraversalSignal NodeTraversalCallBack(TidyDocImpl* doc, Node* node, void *propagate);
|
||||
|
||||
NodeTraversalSignal TY_(TraverseNodeTree)(TidyDocImpl* doc, Node* node, NodeTraversalCallBack *cb, void *propagate);
|
||||
|
||||
#endif /* __TIDY_INT_H__ */
|
2595
third_party/tidy/tidy.c
vendored
Normal file
2595
third_party/tidy/tidy.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
2222
third_party/tidy/tidy.h
vendored
Normal file
2222
third_party/tidy/tidy.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
83
third_party/tidy/tidy.mk
vendored
Normal file
83
third_party/tidy/tidy.mk
vendored
Normal file
|
@ -0,0 +1,83 @@
|
|||
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||
|
||||
PKGS += THIRD_PARTY_TIDY
|
||||
|
||||
THIRD_PARTY_TIDY_SRCS = $(THIRD_PARTY_TIDY_A_SRCS)
|
||||
THIRD_PARTY_TIDY_HDRS = $(THIRD_PARTY_TIDY_A_HDRS)
|
||||
THIRD_PARTY_TIDY_INCS = $(THIRD_PARTY_TIDY_A_INCS)
|
||||
THIRD_PARTY_TIDY_BINS = $(THIRD_PARTY_TIDY_COMS) $(THIRD_PARTY_TIDY_COMS:%=%.dbg)
|
||||
|
||||
THIRD_PARTY_TIDY_ARTIFACTS += THIRD_PARTY_TIDY_A
|
||||
THIRD_PARTY_TIDY = $(THIRD_PARTY_TIDY_A_DEPS) $(THIRD_PARTY_TIDY_A)
|
||||
THIRD_PARTY_TIDY_A = o/$(MODE)/third_party/tidy/tidy.a
|
||||
THIRD_PARTY_TIDY_A_FILES := $(wildcard third_party/tidy/*)
|
||||
THIRD_PARTY_TIDY_A_HDRS = $(filter %.h,$(THIRD_PARTY_TIDY_A_FILES))
|
||||
THIRD_PARTY_TIDY_A_INCS = $(filter %.inc,$(THIRD_PARTY_TIDY_A_FILES))
|
||||
THIRD_PARTY_TIDY_A_SRCS = $(filter %.c,$(THIRD_PARTY_TIDY_A_FILES))
|
||||
THIRD_PARTY_TIDY_A_OBJS = $(THIRD_PARTY_TIDY_A_SRCS:%.c=o/$(MODE)/%.o)
|
||||
|
||||
THIRD_PARTY_TIDY_A_DIRECTDEPS = \
|
||||
LIBC_FMT \
|
||||
LIBC_INTRIN \
|
||||
LIBC_MEM \
|
||||
LIBC_NEXGEN32E \
|
||||
LIBC_RUNTIME \
|
||||
LIBC_CALLS \
|
||||
LIBC_UNICODE \
|
||||
LIBC_STDIO \
|
||||
LIBC_SYSV \
|
||||
LIBC_STR \
|
||||
LIBC_STUBS
|
||||
|
||||
THIRD_PARTY_TIDY_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(THIRD_PARTY_TIDY_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
THIRD_PARTY_TIDY_A_CHECKS = \
|
||||
$(THIRD_PARTY_TIDY_A).pkg \
|
||||
$(THIRD_PARTY_TIDY_A_HDRS:%=o/$(MODE)/%.ok)
|
||||
|
||||
$(THIRD_PARTY_TIDY_A): \
|
||||
third_party/tidy/ \
|
||||
$(THIRD_PARTY_TIDY_A).pkg \
|
||||
$(THIRD_PARTY_TIDY_A_OBJS)
|
||||
|
||||
$(THIRD_PARTY_TIDY_A).pkg: \
|
||||
$(THIRD_PARTY_TIDY_A_OBJS) \
|
||||
$(foreach x,$(THIRD_PARTY_TIDY_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
o/$(MODE)/third_party/tidy/tidy.com.dbg: \
|
||||
$(THIRD_PARTY_TIDY) \
|
||||
o/$(MODE)/third_party/tidy/tidy.o \
|
||||
o/$(MODE)/third_party/tidy/.tidyrc.zip.o \
|
||||
$(CRT) \
|
||||
$(APE_NO_MODIFY_SELF)
|
||||
@$(APELINK)
|
||||
|
||||
o/$(MODE)/third_party/tidy/tidy.com: \
|
||||
o/$(MODE)/third_party/tidy/tidy.com.dbg \
|
||||
o/$(MODE)/third_party/zip/zip.com \
|
||||
o/$(MODE)/tool/build/symtab.com
|
||||
@$(COMPILE) -AOBJCOPY -T$@ $(OBJCOPY) -S -O binary $< $@
|
||||
@$(COMPILE) -ASYMTAB o/$(MODE)/tool/build/symtab.com \
|
||||
-o o/$(MODE)/third_party/tidy/.tidy/.symtab $<
|
||||
@$(COMPILE) -AZIP -T$@ o/$(MODE)/third_party/zip/zip.com -9qj $@ \
|
||||
o/$(MODE)/third_party/tidy/.tidy/.symtab
|
||||
|
||||
o/$(MODE)/third_party/tidy/.tidyrc.zip.o: \
|
||||
ZIPOBJ_FLAGS += \
|
||||
-B
|
||||
|
||||
THIRD_PARTY_TIDY_COMS = \
|
||||
o/$(MODE)/third_party/tidy/tidy.com
|
||||
|
||||
THIRD_PARTY_TIDY_LIBS = $(foreach x,$(THIRD_PARTY_TIDY_ARTIFACTS),$($(x)))
|
||||
THIRD_PARTY_TIDY_SRCS = $(foreach x,$(THIRD_PARTY_TIDY_ARTIFACTS),$($(x)_SRCS))
|
||||
THIRD_PARTY_TIDY_CHECKS = $(foreach x,$(THIRD_PARTY_TIDY_ARTIFACTS),$($(x)_CHECKS))
|
||||
THIRD_PARTY_TIDY_OBJS = $(foreach x,$(THIRD_PARTY_TIDY_ARTIFACTS),$($(x)_OBJS))
|
||||
$(THIRD_PARTY_TIDY_OBJS): $(BUILD_FILES) third_party/tidy/tidy.mk
|
||||
|
||||
.PHONY: o/$(MODE)/third_party/tidy
|
||||
o/$(MODE)/third_party/tidy: \
|
||||
$(THIRD_PARTY_TIDY_BINS) \
|
||||
$(THIRD_PARTY_TIDY_CHECKS)
|
126
third_party/tidy/tidybuffio.h
vendored
Normal file
126
third_party/tidy/tidybuffio.h
vendored
Normal file
|
@ -0,0 +1,126 @@
|
|||
#ifndef __TIDY_BUFFIO_H__
|
||||
#define __TIDY_BUFFIO_H__
|
||||
/* clang-format off */
|
||||
|
||||
/**************************************************************************//**
|
||||
* @file
|
||||
* Treat buffer as a stream that Tidy can use for I/O operations. It offers
|
||||
* the ability for the buffer to grow as bytes are added, and keeps track
|
||||
* of current read and write points.
|
||||
*
|
||||
* @author
|
||||
* HTACG, et al (consult git log)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
|
||||
* Institute of Technology, European Research Consortium for Informatics
|
||||
* and Mathematics, Keio University).
|
||||
* @copyright
|
||||
* See tidy.h for license.
|
||||
*
|
||||
* @date
|
||||
* Consult git log.
|
||||
******************************************************************************/
|
||||
|
||||
#include "third_party/tidy/tidyplatform.h"
|
||||
#include "third_party/tidy/tidy.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** A TidyBuffer is chunk of memory that can be used for multiple I/O purposes
|
||||
** within Tidy.
|
||||
** @ingroup IO
|
||||
*/
|
||||
TIDY_STRUCT
|
||||
struct _TidyBuffer
|
||||
{
|
||||
TidyAllocator* allocator; /**< Memory allocator */
|
||||
byte* bp; /**< Pointer to bytes */
|
||||
uint size; /**< Number of bytes currently in use */
|
||||
uint allocated; /**< Number of bytes allocated */
|
||||
uint next; /**< Offset of current input position */
|
||||
};
|
||||
|
||||
/** Initialize data structure using the default allocator */
|
||||
void tidyBufInit( TidyBuffer* buf );
|
||||
|
||||
/** Initialize data structure using the given custom allocator */
|
||||
void tidyBufInitWithAllocator( TidyBuffer* buf, TidyAllocator* allocator );
|
||||
|
||||
/** Free current buffer, allocate given amount, reset input pointer,
|
||||
use the default allocator */
|
||||
void tidyBufAlloc( TidyBuffer* buf, uint allocSize );
|
||||
|
||||
/** Free current buffer, allocate given amount, reset input pointer,
|
||||
use the given custom allocator */
|
||||
void tidyBufAllocWithAllocator( TidyBuffer* buf,
|
||||
TidyAllocator* allocator,
|
||||
uint allocSize );
|
||||
|
||||
/** Expand buffer to given size.
|
||||
** Chunk size is minimum growth. Pass 0 for default of 256 bytes.
|
||||
*/
|
||||
void tidyBufCheckAlloc( TidyBuffer* buf,
|
||||
uint allocSize, uint chunkSize );
|
||||
|
||||
/** Free current contents and zero out */
|
||||
void tidyBufFree( TidyBuffer* buf );
|
||||
|
||||
/** Set buffer bytes to 0 */
|
||||
void tidyBufClear( TidyBuffer* buf );
|
||||
|
||||
/** Attach to existing buffer */
|
||||
void tidyBufAttach( TidyBuffer* buf, byte* bp, uint size );
|
||||
|
||||
/** Detach from buffer. Caller must free. */
|
||||
void tidyBufDetach( TidyBuffer* buf );
|
||||
|
||||
|
||||
/** Append bytes to buffer. Expand if necessary. */
|
||||
void tidyBufAppend( TidyBuffer* buf, void* vp, uint size );
|
||||
|
||||
/** Append one byte to buffer. Expand if necessary. */
|
||||
void tidyBufPutByte( TidyBuffer* buf, byte bv );
|
||||
|
||||
/** Get byte from end of buffer */
|
||||
int tidyBufPopByte( TidyBuffer* buf );
|
||||
|
||||
|
||||
/** Get byte from front of buffer. Increment input offset. */
|
||||
int tidyBufGetByte( TidyBuffer* buf );
|
||||
|
||||
/** At end of buffer? */
|
||||
Bool tidyBufEndOfInput( TidyBuffer* buf );
|
||||
|
||||
/** Put a byte back into the buffer. Decrement input offset. */
|
||||
void tidyBufUngetByte( TidyBuffer* buf, byte bv );
|
||||
|
||||
|
||||
/**************
|
||||
TIDY
|
||||
**************/
|
||||
|
||||
/* Forward declarations
|
||||
*/
|
||||
|
||||
/** Initialize a buffer input source */
|
||||
void tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf );
|
||||
|
||||
/** Initialize a buffer output sink */
|
||||
void tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* __TIDY_BUFFIO_H__ */
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
1472
third_party/tidy/tidyenum.h
vendored
Normal file
1472
third_party/tidy/tidyenum.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
2765
third_party/tidy/tidylib.c
vendored
Normal file
2765
third_party/tidy/tidylib.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
65
third_party/tidy/tidyplatform.h
vendored
Normal file
65
third_party/tidy/tidyplatform.h
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
#ifndef __TIDY_PLATFORM_H__
|
||||
#define __TIDY_PLATFORM_H__
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define LINUX_OS
|
||||
#define PLATFORM_NAME "Cosmopolitan"
|
||||
#define TIDY_CONFIG_FILE "/zip/.tidyrc"
|
||||
#define TIDY_USER_CONFIG_FILE "~/.tidyrc"
|
||||
#define SUPPORT_LOCALIZATIONS 0
|
||||
#define SUPPORT_CONSOLE_APP 1
|
||||
#define FILENAMES_CASE_SENSITIVE 1
|
||||
#define PRESERVE_FILE_TIMES 1
|
||||
#define HAS_FUTIME 0
|
||||
#define UTIME_NEEDS_CLOSED_FILE 1
|
||||
#define HAS_VSNPRINTF 1
|
||||
#define SUPPORT_POSIX_MAPPED_FILES 1
|
||||
#define TIDY_EXPORT
|
||||
#define TIDY_STRUCT
|
||||
#define TIDY_THREAD_LOCAL
|
||||
#define TIDY_INDENTATION_LIMIT 50
|
||||
#define TIDY_CALL
|
||||
/* #define SUPPORT_GETPWNAM */
|
||||
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
|
||||
#define ARG_UNUSED(x) x __attribute__((__unused__))
|
||||
#define FUNC_UNUSED __attribute__((__unused__))
|
||||
#else
|
||||
#define ARG_UNUSED(x) x
|
||||
#define FUNC_UNUSED
|
||||
#endif
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned char byte;
|
||||
typedef uint tchar; /* single, full character */
|
||||
typedef char tmbchar; /* single, possibly partial character */
|
||||
typedef enum { no, yes } Bool;
|
||||
|
||||
typedef tmbchar* tmbstr; /* pointer to buffer of possibly partial chars */
|
||||
typedef const tmbchar* ctmbstr; /* Ditto, but const */
|
||||
#define NULLSTR (tmbstr) ""
|
||||
#define TMBSTR_DEFINED
|
||||
|
||||
/* Opaque data structure.
|
||||
* Cast to implementation type struct within lib.
|
||||
* This will reduce inter-dependencies/conflicts w/ application code.
|
||||
*/
|
||||
#if 1
|
||||
#define opaque_type(typenam) \
|
||||
struct _##typenam { \
|
||||
int _opaque; \
|
||||
}; \
|
||||
typedef struct _##typenam const* typenam
|
||||
#else
|
||||
#define opaque_type(typenam) typedef const void* typenam
|
||||
#endif
|
||||
|
||||
/* Opaque data structure used to pass back
|
||||
** and forth to keep current position in a
|
||||
** list or other collection.
|
||||
*/
|
||||
opaque_type(TidyIterator);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* __TIDY_PLATFORM_H__ */
|
289
third_party/tidy/tmbstr.c
vendored
Normal file
289
third_party/tidy/tmbstr.c
vendored
Normal file
|
@ -0,0 +1,289 @@
|
|||
/* clang-format off */
|
||||
/* tmbstr.c -- Tidy string utility functions
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "third_party/tidy/tmbstr.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "third_party/tidy/lexer.h"
|
||||
|
||||
/* like strdup but using an allocator */
|
||||
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str )
|
||||
{
|
||||
tmbstr s = NULL;
|
||||
if ( str )
|
||||
{
|
||||
uint len = TY_(tmbstrlen)( str );
|
||||
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
|
||||
while ( 0 != (*cp++ = *str++) )
|
||||
/**/;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/* like strndup but using an allocator */
|
||||
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len )
|
||||
{
|
||||
tmbstr s = NULL;
|
||||
if ( str && len > 0 )
|
||||
{
|
||||
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
|
||||
while ( len-- > 0 && (*cp++ = *str++) )
|
||||
/**/;
|
||||
*cp = 0;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/* exactly same as strncpy */
|
||||
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size )
|
||||
{
|
||||
if ( s1 != NULL && s2 != NULL )
|
||||
{
|
||||
tmbstr cp = s1;
|
||||
while ( *s2 && --size ) /* Predecrement: reserve byte */
|
||||
*cp++ = *s2++; /* for NULL terminator. */
|
||||
*cp = 0;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/* Allows expressions like: cp += tmbstrcpy( cp, "joebob" );
|
||||
*/
|
||||
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
uint ncpy = 0;
|
||||
while (0 != (*s1++ = *s2++) )
|
||||
++ncpy;
|
||||
return ncpy;
|
||||
}
|
||||
|
||||
/* Allows expressions like: cp += tmbstrcat( cp, "joebob" );
|
||||
*/
|
||||
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
uint ncpy = 0;
|
||||
while ( *s1 )
|
||||
++s1;
|
||||
|
||||
while (0 != (*s1++ = *s2++) )
|
||||
++ncpy;
|
||||
return ncpy;
|
||||
}
|
||||
|
||||
/* exactly same as strcmp */
|
||||
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
int c;
|
||||
while ((c = *s1) == *s2)
|
||||
{
|
||||
if (c == '\0')
|
||||
return 0;
|
||||
|
||||
++s1;
|
||||
++s2;
|
||||
}
|
||||
|
||||
return (*s1 > *s2 ? 1 : -1);
|
||||
}
|
||||
|
||||
/* returns byte count, not char count */
|
||||
uint TY_(tmbstrlen)( ctmbstr str )
|
||||
{
|
||||
uint len = 0;
|
||||
if ( str )
|
||||
{
|
||||
while ( *str++ )
|
||||
++len;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
MS C 4.2 (and ANSI C) doesn't include strcasecmp.
|
||||
Note that tolower and toupper won't
|
||||
work on chars > 127.
|
||||
|
||||
Neither does ToLower()!
|
||||
*/
|
||||
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
uint c;
|
||||
|
||||
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
|
||||
{
|
||||
if (c == '\0')
|
||||
return 0;
|
||||
|
||||
++s1;
|
||||
++s2;
|
||||
}
|
||||
|
||||
return (*s1 > *s2 ? 1 : -1);
|
||||
}
|
||||
|
||||
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n )
|
||||
{
|
||||
uint c;
|
||||
|
||||
if (s1 == NULL || s2 == NULL)
|
||||
{
|
||||
if (s1 == s2)
|
||||
return 0;
|
||||
return (s1 == NULL ? -1 : 1);
|
||||
}
|
||||
|
||||
while ((c = (byte)*s1) == (byte)*s2)
|
||||
{
|
||||
if (c == '\0')
|
||||
return 0;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
++s1;
|
||||
++s2;
|
||||
--n;
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
return (*s1 > *s2 ? 1 : -1);
|
||||
}
|
||||
|
||||
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n )
|
||||
{
|
||||
uint c;
|
||||
|
||||
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
|
||||
{
|
||||
if (c == '\0')
|
||||
return 0;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
++s1;
|
||||
++s2;
|
||||
--n;
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
return (*s1 > *s2 ? 1 : -1);
|
||||
}
|
||||
|
||||
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 )
|
||||
{
|
||||
uint len2 = TY_(tmbstrlen)(s2);
|
||||
int ix, diff = len1 - len2;
|
||||
|
||||
for ( ix = 0; ix <= diff; ++ix )
|
||||
{
|
||||
if ( TY_(tmbstrncmp)(s1+ix, s2, len2) == 0 )
|
||||
return (ctmbstr) s1+ix;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
uint len1 = TY_(tmbstrlen)(s1), len2 = TY_(tmbstrlen)(s2);
|
||||
int ix, diff = len1 - len2;
|
||||
|
||||
for ( ix = 0; ix <= diff; ++ix )
|
||||
{
|
||||
if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
|
||||
return (ctmbstr) s1+ix;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Transform ASCII chars in string to lower case */
|
||||
tmbstr TY_(tmbstrtolower)( tmbstr s )
|
||||
{
|
||||
tmbstr cp;
|
||||
for ( cp=s; *cp; ++cp )
|
||||
*cp = (tmbchar) TY_(ToLower)( *cp );
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Transform ASCII chars in string to upper case */
|
||||
tmbstr TY_(tmbstrtoupper)(tmbstr s)
|
||||
{
|
||||
tmbstr cp;
|
||||
|
||||
for (cp = s; *cp; ++cp)
|
||||
*cp = (tmbchar)TY_(ToUpper)(*cp);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
|
||||
{
|
||||
int retval;
|
||||
|
||||
#if HAS_VSNPRINTF
|
||||
retval = vsnprintf(buffer, count - 1, format, args);
|
||||
/* todo: conditionally null-terminate the string? */
|
||||
buffer[count - 1] = 0;
|
||||
#else
|
||||
retval = vsprintf(buffer, format, args);
|
||||
#endif /* HAS_VSNPRINTF */
|
||||
return retval;
|
||||
}
|
||||
|
||||
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
|
||||
{
|
||||
int retval;
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
retval = TY_(tmbvsnprintf)(buffer, count, format, args);
|
||||
va_end(args);
|
||||
return retval;
|
||||
}
|
||||
|
||||
void TY_(strrep)(tmbstr buffer, ctmbstr str, ctmbstr rep)
|
||||
{
|
||||
char *p = strstr(buffer, str);
|
||||
do
|
||||
{
|
||||
if(p)
|
||||
{
|
||||
char buf[1024];
|
||||
memset(buf,'\0',strlen(buf));
|
||||
|
||||
if(buffer == p)
|
||||
{
|
||||
strcpy(buf,rep);
|
||||
strcat(buf,p+strlen(str));
|
||||
}
|
||||
else
|
||||
{
|
||||
strncpy(buf,buffer,strlen(buffer) - strlen(p));
|
||||
strcat(buf,rep);
|
||||
strcat(buf,p+strlen(str));
|
||||
}
|
||||
|
||||
memset(buffer,'\0',strlen(buffer));
|
||||
strcpy(buffer,buf);
|
||||
}
|
||||
|
||||
} while(p && (p = strstr(buffer, str)));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
92
third_party/tidy/tmbstr.h
vendored
Normal file
92
third_party/tidy/tmbstr.h
vendored
Normal file
|
@ -0,0 +1,92 @@
|
|||
#ifndef __TMBSTR_H__
|
||||
#define __TMBSTR_H__
|
||||
/* clang-format off */
|
||||
|
||||
/* tmbstr.h - Tidy string utility functions
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/tidy.h"
|
||||
#include "third_party/tidy/access.h"
|
||||
#include "third_party/tidy/tidyplatform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/* like strdup but using an allocator */
|
||||
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str );
|
||||
|
||||
/* like strndup but using an allocator */
|
||||
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len);
|
||||
|
||||
/* exactly same as strncpy */
|
||||
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size );
|
||||
|
||||
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 );
|
||||
|
||||
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 );
|
||||
|
||||
/* exactly same as strcmp */
|
||||
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 );
|
||||
|
||||
/* returns byte count, not char count */
|
||||
uint TY_(tmbstrlen)( ctmbstr str );
|
||||
|
||||
/*
|
||||
MS C 4.2 doesn't include strcasecmp.
|
||||
Note that tolower and toupper won't
|
||||
work on chars > 127.
|
||||
|
||||
Neither do Lexer.ToLower() or Lexer.ToUpper()!
|
||||
|
||||
We get away with this because, except for XML tags,
|
||||
we are always comparing to ascii element and
|
||||
attribute names defined by HTML specs.
|
||||
*/
|
||||
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 );
|
||||
|
||||
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n );
|
||||
|
||||
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n );
|
||||
|
||||
/* return offset of cc from beginning of s1,
|
||||
** -1 if not found.
|
||||
*/
|
||||
/* TY_PRIVATE int TY_(tmbstrnchr)( ctmbstr s1, uint len1, tmbchar cc ); */
|
||||
|
||||
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 );
|
||||
/* TY_PRIVATE ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 ); */
|
||||
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 );
|
||||
|
||||
/* transform string to lower case */
|
||||
tmbstr TY_(tmbstrtolower)( tmbstr s );
|
||||
|
||||
/* Transform ASCII chars in string to upper case */
|
||||
tmbstr TY_(tmbstrtoupper)( tmbstr s );
|
||||
|
||||
/* TY_PRIVATE Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ); */
|
||||
|
||||
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
|
||||
#ifdef __GNUC__
|
||||
__attribute__((format(printf, 3, 0)))
|
||||
#endif
|
||||
;
|
||||
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__((format(printf, 3, 4)))
|
||||
#endif
|
||||
;
|
||||
|
||||
void TY_(strrep)(tmbstr buffer, ctmbstr str, ctmbstr rep);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* __TMBSTR_H__ */
|
525
third_party/tidy/utf8.c
vendored
Normal file
525
third_party/tidy/utf8.c
vendored
Normal file
|
@ -0,0 +1,525 @@
|
|||
/* clang-format off */
|
||||
/* utf8.c -- convert characters to/from UTF-8
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Uses public interfaces to abstract input source and output
|
||||
sink, which may be user supplied or either FILE* or memory
|
||||
based Tidy implementations. Encoding support is uniform
|
||||
regardless of I/O mechanism.
|
||||
|
||||
Note, UTF-8 encoding, by itself, does not affect the actual
|
||||
"codepoints" of the underlying character encoding. In the
|
||||
cases of ASCII, Latin1, Unicode (16-bit, BMP), these all
|
||||
refer to ISO-10646 "codepoints". For anything else, they
|
||||
refer to some other "codepoint" set.
|
||||
|
||||
Put another way, UTF-8 is a variable length method to
|
||||
represent any non-negative integer value. The glyph
|
||||
that a integer value represents is unchanged and defined
|
||||
externally (e.g. by ISO-10646, Big5, Win1252, MacRoman,
|
||||
Latin2-9, and so on).
|
||||
|
||||
Put still another way, UTF-8 is more of a _transfer_ encoding
|
||||
than a _character_ encoding, per se.
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/tidy.h"
|
||||
#include "third_party/tidy/forward.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "third_party/tidy/utf8.h"
|
||||
|
||||
/*
|
||||
UTF-8 encoding/decoding functions
|
||||
Return # of bytes in UTF-8 sequence; result < 0 if illegal sequence
|
||||
|
||||
Also see below for UTF-16 encoding/decoding functions
|
||||
|
||||
References :
|
||||
|
||||
1) UCS Transformation Format 8 (UTF-8):
|
||||
ISO/IEC 10646-1:1996 Amendment 2 or ISO/IEC 10646-1:2000 Annex D
|
||||
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335>
|
||||
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html>
|
||||
|
||||
Table 4 - Mapping from UCS-4 to UTF-8
|
||||
|
||||
2) Unicode standards:
|
||||
<https://www.unicode.org/standard/standard.html>
|
||||
|
||||
3) Legal UTF-8 byte sequences:
|
||||
<https://www.unicode.org/versions/corrigendum1.html>
|
||||
|
||||
Code point 1st byte 2nd byte 3rd byte 4th byte
|
||||
---------- -------- -------- -------- --------
|
||||
U+0000..U+007F 00..7F
|
||||
U+0080..U+07FF C2..DF 80..BF
|
||||
U+0800..U+0FFF E0 A0..BF 80..BF
|
||||
U+1000..U+FFFF E1..EF 80..BF 80..BF
|
||||
U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
||||
U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
|
||||
U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
||||
|
||||
The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also
|
||||
allows for the use of five- and six-byte sequences to encode
|
||||
characters that are outside the range of the Unicode character
|
||||
set; those five- and six-byte sequences are illegal for the use
|
||||
of UTF-8 as a transformation of Unicode characters. ISO/IEC 10646
|
||||
does not allow mapping of unpaired surrogates, nor U+FFFE and U+FFFF
|
||||
(but it does allow other noncharacters).
|
||||
|
||||
4) RFC 2279: UTF-8, a transformation format of ISO 10646:
|
||||
<http://www.ietf.org/rfc/rfc2279.txt>
|
||||
|
||||
5) UTF-8 and Unicode FAQ:
|
||||
<http://www.cl.cam.ac.uk/~mgk25/unicode.html>
|
||||
|
||||
6) Markus Kuhn's UTF-8 decoder stress test file:
|
||||
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt>
|
||||
|
||||
7) UTF-8 Demo:
|
||||
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt>
|
||||
|
||||
8) UTF-8 Sampler:
|
||||
<http://www.columbia.edu/kermit/utf8.html>
|
||||
|
||||
9) Transformation Format for 16 Planes of Group 00 (UTF-16):
|
||||
ISO/IEC 10646-1:1996 Amendment 1 or ISO/IEC 10646-1:2000 Annex C
|
||||
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n2005/n2005.pdf>
|
||||
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-16.html>
|
||||
|
||||
10) RFC 2781: UTF-16, an encoding of ISO 10646:
|
||||
<http://www.ietf.org/rfc/rfc2781.txt>
|
||||
|
||||
11) UTF-16 invalid surrogate pairs:
|
||||
<https://www.unicode.org/faq/utf_bom.html#16>
|
||||
|
||||
UTF-16 UTF-8 UCS-4
|
||||
D83F DFF* F0 9F BF B* 0001FFF*
|
||||
D87F DFF* F0 AF BF B* 0002FFF*
|
||||
D8BF DFF* F0 BF BF B* 0003FFF*
|
||||
D8FF DFF* F1 8F BF B* 0004FFF*
|
||||
D93F DFF* F1 9F BF B* 0005FFF*
|
||||
D97F DFF* F1 AF BF B* 0006FFF*
|
||||
...
|
||||
DBBF DFF* F3 BF BF B* 000FFFF*
|
||||
DBFF DFF* F4 8F BF B* 0010FFF*
|
||||
|
||||
* = E or F
|
||||
|
||||
1010 A
|
||||
1011 B
|
||||
1100 C
|
||||
1101 D
|
||||
1110 E
|
||||
1111 F
|
||||
|
||||
*/
|
||||
|
||||
#define kNumUTF8Sequences 7
|
||||
#define kMaxUTF8Bytes 4
|
||||
|
||||
#define kUTF8ByteSwapNotAChar 0xFFFE
|
||||
#define kUTF8NotAChar 0xFFFF
|
||||
|
||||
#define kMaxUTF8FromUCS4 0x10FFFF
|
||||
|
||||
#define kUTF16SurrogatesBegin 0x10000
|
||||
#define kMaxUTF16FromUCS4 0x10FFFF
|
||||
|
||||
/* UTF-16 surrogate pair areas */
|
||||
#define kUTF16LowSurrogateBegin 0xD800
|
||||
#define kUTF16LowSurrogateEnd 0xDBFF
|
||||
#define kUTF16HighSurrogateBegin 0xDC00
|
||||
#define kUTF16HighSurrogateEnd 0xDFFF
|
||||
|
||||
|
||||
/* offsets into validUTF8 table below */
|
||||
static const int offsetUTF8Sequences[kMaxUTF8Bytes + 1] =
|
||||
{
|
||||
0, /* 1 byte */
|
||||
1, /* 2 bytes */
|
||||
2, /* 3 bytes */
|
||||
4, /* 4 bytes */
|
||||
kNumUTF8Sequences /* must be last */
|
||||
};
|
||||
|
||||
static const struct validUTF8Sequence
|
||||
{
|
||||
uint lowChar;
|
||||
uint highChar;
|
||||
int numBytes;
|
||||
byte validBytes[8];
|
||||
} validUTF8[kNumUTF8Sequences] =
|
||||
{
|
||||
/* low high #bytes byte 1 byte 2 byte 3 byte 4 */
|
||||
{0x0000, 0x007F, 1, {0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
|
||||
{0x0080, 0x07FF, 2, {0xC2, 0xDF, 0x80, 0xBF, 0x00, 0x00, 0x00, 0x00}},
|
||||
{0x0800, 0x0FFF, 3, {0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF, 0x00, 0x00}},
|
||||
{0x1000, 0xFFFF, 3, {0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF, 0x00, 0x00}},
|
||||
{0x10000, 0x3FFFF, 4, {0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}},
|
||||
{0x40000, 0xFFFFF, 4, {0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}},
|
||||
{0x100000, 0x10FFFF, 4, {0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}}
|
||||
};
|
||||
|
||||
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
|
||||
TidyInputSource* inp, int* count )
|
||||
{
|
||||
byte tempbuf[10];
|
||||
byte *buf = &tempbuf[0];
|
||||
uint ch = 0, n = 0;
|
||||
int i, bytes = 0;
|
||||
Bool hasError = no;
|
||||
|
||||
if ( successorBytes )
|
||||
buf = (byte*) successorBytes;
|
||||
|
||||
/* special check if we have been passed an EOF char */
|
||||
if ( firstByte == EndOfStream )
|
||||
{
|
||||
/* at present */
|
||||
*c = firstByte;
|
||||
*count = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ch = firstByte; /* first byte is passed in separately */
|
||||
|
||||
if (ch <= 0x7F) /* 0XXX XXXX one byte */
|
||||
{
|
||||
n = ch;
|
||||
bytes = 1;
|
||||
}
|
||||
else if ((ch & 0xE0) == 0xC0) /* 110X XXXX two bytes */
|
||||
{
|
||||
n = ch & 31;
|
||||
bytes = 2;
|
||||
}
|
||||
else if ((ch & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
|
||||
{
|
||||
n = ch & 15;
|
||||
bytes = 3;
|
||||
}
|
||||
else if ((ch & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
|
||||
{
|
||||
n = ch & 7;
|
||||
bytes = 4;
|
||||
}
|
||||
else if ((ch & 0xFC) == 0xF8) /* 1111 10XX five bytes */
|
||||
{
|
||||
n = ch & 3;
|
||||
bytes = 5;
|
||||
hasError = yes;
|
||||
}
|
||||
else if ((ch & 0xFE) == 0xFC) /* 1111 110X six bytes */
|
||||
{
|
||||
n = ch & 1;
|
||||
bytes = 6;
|
||||
hasError = yes;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* not a valid first byte of a UTF-8 sequence */
|
||||
n = ch;
|
||||
bytes = 1;
|
||||
hasError = yes;
|
||||
}
|
||||
|
||||
/* successor bytes should have the form 10XX XXXX */
|
||||
|
||||
/* If caller supplied buffer, use it. Else see if caller
|
||||
** supplied an input source, use that.
|
||||
*/
|
||||
if ( successorBytes )
|
||||
{
|
||||
for ( i=0; i < bytes-1; ++i )
|
||||
{
|
||||
if ( !buf[i] || (buf[i] & 0xC0) != 0x80 )
|
||||
{
|
||||
hasError = yes;
|
||||
bytes = i+1;
|
||||
break;
|
||||
}
|
||||
n = (n << 6) | (buf[i] & 0x3F);
|
||||
}
|
||||
}
|
||||
else if ( inp )
|
||||
{
|
||||
for ( i=0; i < bytes-1 && !inp->eof(inp->sourceData); ++i )
|
||||
{
|
||||
int b = inp->getByte( inp->sourceData );
|
||||
buf[i] = (tmbchar) b;
|
||||
|
||||
/* End of data or illegal successor byte value */
|
||||
if ( b == EOF || (buf[i] & 0xC0) != 0x80 )
|
||||
{
|
||||
hasError = yes;
|
||||
bytes = i+1;
|
||||
if ( b != EOF )
|
||||
inp->ungetByte( inp->sourceData, buf[i] );
|
||||
break;
|
||||
}
|
||||
n = (n << 6) | (buf[i] & 0x3F);
|
||||
}
|
||||
}
|
||||
else if ( bytes > 1 )
|
||||
{
|
||||
hasError = yes;
|
||||
bytes = 1;
|
||||
}
|
||||
|
||||
if (!hasError && ((n == kUTF8ByteSwapNotAChar) || (n == kUTF8NotAChar)))
|
||||
hasError = yes;
|
||||
|
||||
if (!hasError && (n > kMaxUTF8FromUCS4))
|
||||
hasError = yes;
|
||||
|
||||
if (!hasError)
|
||||
{
|
||||
int lo, hi;
|
||||
|
||||
lo = offsetUTF8Sequences[bytes - 1];
|
||||
hi = offsetUTF8Sequences[bytes] - 1;
|
||||
|
||||
/* check for overlong sequences */
|
||||
if ((n < validUTF8[lo].lowChar) || (n > validUTF8[hi].highChar))
|
||||
hasError = yes;
|
||||
else
|
||||
{
|
||||
hasError = yes; /* assume error until proven otherwise */
|
||||
|
||||
for (i = lo; i <= hi; i++)
|
||||
{
|
||||
int tempCount;
|
||||
byte theByte;
|
||||
|
||||
for (tempCount = 0; tempCount < bytes; tempCount++)
|
||||
{
|
||||
if (!tempCount)
|
||||
theByte = (tmbchar) firstByte;
|
||||
else
|
||||
theByte = buf[tempCount - 1];
|
||||
|
||||
if ( theByte >= validUTF8[i].validBytes[(tempCount * 2)] &&
|
||||
theByte <= validUTF8[i].validBytes[(tempCount * 2) + 1] )
|
||||
hasError = no;
|
||||
if (hasError)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 1 && defined(_DEBUG)
|
||||
if ( hasError )
|
||||
{
|
||||
/* debug */
|
||||
fprintf( stderr, "UTF-8 decoding error of %d bytes : ", bytes );
|
||||
fprintf( stderr, "0x%02x ", firstByte );
|
||||
for (i = 1; i < bytes; i++)
|
||||
fprintf( stderr, "0x%02x ", buf[i - 1] );
|
||||
fprintf( stderr, " = U+%04X\n", n );
|
||||
}
|
||||
#endif
|
||||
|
||||
*count = bytes;
|
||||
*c = n;
|
||||
if ( hasError )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
|
||||
TidyOutputSink* outp, int* count )
|
||||
{
|
||||
byte tempbuf[10] = {0};
|
||||
byte* buf = &tempbuf[0];
|
||||
int bytes = 0;
|
||||
Bool hasError = no;
|
||||
|
||||
if ( encodebuf )
|
||||
buf = (byte*) encodebuf;
|
||||
|
||||
if (c <= 0x7F) /* 0XXX XXXX one byte */
|
||||
{
|
||||
buf[0] = (tmbchar) c;
|
||||
bytes = 1;
|
||||
}
|
||||
else if (c <= 0x7FF) /* 110X XXXX two bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) ( 0xC0 | (c >> 6) );
|
||||
buf[1] = (tmbchar) ( 0x80 | (c & 0x3F) );
|
||||
bytes = 2;
|
||||
}
|
||||
else if (c <= 0xFFFF) /* 1110 XXXX three bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) (0xE0 | (c >> 12));
|
||||
buf[1] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[2] = (tmbchar) (0x80 | (c & 0x3F));
|
||||
bytes = 3;
|
||||
if ( c == kUTF8ByteSwapNotAChar || c == kUTF8NotAChar )
|
||||
hasError = yes;
|
||||
}
|
||||
else if (c <= 0x1FFFFF) /* 1111 0XXX four bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) (0xF0 | (c >> 18));
|
||||
buf[1] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
|
||||
buf[2] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[3] = (tmbchar) (0x80 | (c & 0x3F));
|
||||
bytes = 4;
|
||||
if (c > kMaxUTF8FromUCS4)
|
||||
hasError = yes;
|
||||
}
|
||||
else if (c <= 0x3FFFFFF) /* 1111 10XX five bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) (0xF8 | (c >> 24));
|
||||
buf[1] = (tmbchar) (0x80 | (c >> 18));
|
||||
buf[2] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
|
||||
buf[3] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[4] = (tmbchar) (0x80 | (c & 0x3F));
|
||||
bytes = 5;
|
||||
hasError = yes;
|
||||
}
|
||||
else if (c <= 0x7FFFFFFF) /* 1111 110X six bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) (0xFC | (c >> 30));
|
||||
buf[1] = (tmbchar) (0x80 | ((c >> 24) & 0x3F));
|
||||
buf[2] = (tmbchar) (0x80 | ((c >> 18) & 0x3F));
|
||||
buf[3] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
|
||||
buf[4] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[5] = (tmbchar) (0x80 | (c & 0x3F));
|
||||
bytes = 6;
|
||||
hasError = yes;
|
||||
}
|
||||
else
|
||||
hasError = yes;
|
||||
|
||||
/* don't output invalid UTF-8 byte sequence to a stream */
|
||||
if ( !hasError && outp != NULL )
|
||||
{
|
||||
int ix;
|
||||
for ( ix=0; ix < bytes; ++ix )
|
||||
outp->putByte( outp->sinkData, buf[ix] );
|
||||
}
|
||||
|
||||
#if 1 && defined(_DEBUG)
|
||||
if ( hasError )
|
||||
{
|
||||
int i;
|
||||
fprintf( stderr, "UTF-8 encoding error for U+%x : ", c );
|
||||
for (i = 0; i < bytes; i++)
|
||||
fprintf( stderr, "0x%02x ", buf[i] );
|
||||
fprintf( stderr, "\n" );
|
||||
}
|
||||
#endif
|
||||
|
||||
*count = bytes;
|
||||
if (hasError)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* return one less than the number of bytes used by the UTF-8 byte sequence */
|
||||
/* str points to the UTF-8 byte sequence */
|
||||
/* the Unicode char is returned in *ch */
|
||||
uint TY_(GetUTF8)( ctmbstr str, uint *ch )
|
||||
{
|
||||
uint n;
|
||||
int bytes;
|
||||
|
||||
int err;
|
||||
|
||||
bytes = 0;
|
||||
|
||||
/* first byte "str[0]" is passed in separately from the */
|
||||
/* rest of the UTF-8 byte sequence starting at "str[1]" */
|
||||
err = TY_(DecodeUTF8BytesToChar)( &n, str[0], str+1, NULL, &bytes );
|
||||
if (err)
|
||||
{
|
||||
#if 1 && defined(_DEBUG)
|
||||
fprintf(stderr, "pprint UTF-8 decoding error for U+%x : ", n);
|
||||
#endif
|
||||
n = 0xFFFD; /* replacement char */
|
||||
}
|
||||
|
||||
*ch = n;
|
||||
return bytes - 1;
|
||||
}
|
||||
|
||||
/* store char c as UTF-8 encoded byte stream */
|
||||
tmbstr TY_(PutUTF8)( tmbstr buf, uint c )
|
||||
{
|
||||
int err, count = 0;
|
||||
|
||||
err = TY_(EncodeCharToUTF8Bytes)( c, buf, NULL, &count );
|
||||
if (err)
|
||||
{
|
||||
#if 1 && defined(_DEBUG)
|
||||
fprintf(stderr, "pprint UTF-8 encoding error for U+%x : ", c);
|
||||
#endif
|
||||
/* replacement char 0xFFFD encoded as UTF-8 */
|
||||
buf[0] = (byte) 0xEF;
|
||||
buf[1] = (byte) 0xBF;
|
||||
buf[2] = (byte) 0xBD;
|
||||
count = 3;
|
||||
}
|
||||
|
||||
buf += count;
|
||||
return buf;
|
||||
}
|
||||
|
||||
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 )
|
||||
{
|
||||
return ( ucs4 <= kMaxUTF16FromUCS4 );
|
||||
}
|
||||
|
||||
Bool TY_(IsHighSurrogate)( tchar ch )
|
||||
{
|
||||
return ( ch >= kUTF16HighSurrogateBegin && ch <= kUTF16HighSurrogateEnd );
|
||||
}
|
||||
Bool TY_(IsLowSurrogate)( tchar ch )
|
||||
{
|
||||
return ( ch >= kUTF16LowSurrogateBegin && ch <= kUTF16LowSurrogateEnd );
|
||||
}
|
||||
|
||||
tchar TY_(CombineSurrogatePair)( tchar high, tchar low )
|
||||
{
|
||||
assert( TY_(IsHighSurrogate)(high) && TY_(IsLowSurrogate)(low) );
|
||||
return ( ((low - kUTF16LowSurrogateBegin) * 0x400) +
|
||||
high - kUTF16HighSurrogateBegin + 0x10000 );
|
||||
}
|
||||
|
||||
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* low, tchar* high )
|
||||
{
|
||||
Bool status = ( TY_(IsValidCombinedChar)( utf16 ) && high && low );
|
||||
if ( status )
|
||||
{
|
||||
*low = (utf16 - kUTF16SurrogatesBegin) / 0x400 + kUTF16LowSurrogateBegin;
|
||||
*high = (utf16 - kUTF16SurrogatesBegin) % 0x400 + kUTF16HighSurrogateBegin;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Bool TY_(IsValidCombinedChar)( tchar ch )
|
||||
{
|
||||
return ( ch >= kUTF16SurrogatesBegin &&
|
||||
(ch & 0x0000FFFE) != 0x0000FFFE &&
|
||||
(ch & 0x0000FFFF) != 0x0000FFFF );
|
||||
}
|
||||
|
||||
Bool TY_(IsCombinedChar)( tchar ch )
|
||||
{
|
||||
return ( ch >= kUTF16SurrogatesBegin );
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
48
third_party/tidy/utf8.h
vendored
Normal file
48
third_party/tidy/utf8.h
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
#ifndef __UTF8_H__
|
||||
#define __UTF8_H__
|
||||
/* clang-format off */
|
||||
|
||||
/* utf8.h -- convert characters to/from UTF-8
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#include "third_party/tidy/tidyplatform.h"
|
||||
#include "third_party/tidy/access.h"
|
||||
#include "third_party/tidy/tidybuffio.h"
|
||||
|
||||
/* UTF-8 encoding/decoding support
|
||||
** Does not convert character "codepoints", i.e. to/from 10646.
|
||||
*/
|
||||
|
||||
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
|
||||
TidyInputSource* inp, int* count );
|
||||
|
||||
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
|
||||
TidyOutputSink* outp, int* count );
|
||||
|
||||
|
||||
uint TY_(GetUTF8)( ctmbstr str, uint *ch );
|
||||
tmbstr TY_(PutUTF8)( tmbstr buf, uint c );
|
||||
|
||||
#define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */
|
||||
#define UNICODE_BOM UNICODE_BOM_BE
|
||||
#define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */
|
||||
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
|
||||
|
||||
|
||||
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
|
||||
Bool TY_(IsHighSurrogate)( tchar ch );
|
||||
Bool TY_(IsLowSurrogate)( tchar ch );
|
||||
|
||||
Bool TY_(IsCombinedChar)( tchar ch );
|
||||
Bool TY_(IsValidCombinedChar)( tchar ch );
|
||||
|
||||
tchar TY_(CombineSurrogatePair)( tchar high, tchar low );
|
||||
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );
|
||||
|
||||
|
||||
|
||||
#endif /* __UTF8_H__ */
|
24
third_party/tidy/version.inc
vendored
Normal file
24
third_party/tidy/version.inc
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
/* clang-format off */
|
||||
/* version information
|
||||
|
||||
(c) 2007-2015 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
*/
|
||||
|
||||
#ifdef RELEASE_DATE
|
||||
static const char TY_(release_date)[] = RELEASE_DATE;
|
||||
#else
|
||||
static const char TY_(release_date)[] = "2015/01/22";
|
||||
#endif
|
||||
#ifdef LIBTIDY_VERSION
|
||||
#ifdef RC_NUMBER
|
||||
static const char TY_(library_version)[] = LIBTIDY_VERSION "." RC_NUMBER;
|
||||
#else
|
||||
static const char TY_(library_version)[] = LIBTIDY_VERSION;
|
||||
#endif
|
||||
#else
|
||||
static const char TY_(library_version)[] = "5.0.0";
|
||||
#endif
|
||||
|
||||
/* eof */
|
Loading…
Add table
Reference in a new issue