Add w3c html tidy

This commit is contained in:
Justine Tunney 2022-06-09 06:33:31 -07:00
parent ecc8962555
commit 3c7ae0fc72
63 changed files with 56239 additions and 0 deletions

21
third_party/tidy/gdoc.h vendored Normal file
View file

@ -0,0 +1,21 @@
#ifndef __GDOC_H__
#define __GDOC_H__
#include "third_party/tidy/forward.h"
/* clang-format off */
/* gdoc.h -- clean up html exported by Google Docs
(c) 2012 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
- strip the script element, as the style sheet is a mess
- strip class attributes
- strip span elements, leaving their content in place
- replace <a name=...></a> by id on parent element
- strip empty <p> elements
*/
void TY_(CleanGoogleDocument)( TidyDocImpl* doc );
#endif /* __GDOC_H__ */