Switch to Python-Markdown

Commonmark doesn't have any built-in HTML sanitization
This commit is contained in:
Tulir Asokan 2018-11-28 15:28:34 +02:00
parent 6a6e8a818e
commit c39cacbab4
3 changed files with 23 additions and 7 deletions

View file

@ -13,11 +13,13 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Dict, List, Union, Callable, Awaitable, Optional
from typing import Dict, List, Union, Callable, Awaitable, Optional, Tuple
from markdown.extensions import Extension
import markdown as md
import attr
import commonmark
from mautrix import Client as MatrixClient
from mautrix.util.formatter import parse_html
from mautrix.client import EventHandler
from mautrix.types import (EventType, MessageEvent, Event, EventID, RoomID, MessageEventContent,
MessageType, TextMessageEventContent, Format, RelatesTo)
@ -25,6 +27,20 @@ from mautrix.types import (EventType, MessageEvent, Event, EventID, RoomID, Mess
from .command_spec import ParsedCommand, CommandSpec
class EscapeHTML(Extension):
def extendMarkdown(self, md):
md.preprocessors.deregister("html_block")
md.inlinePatterns.deregister("html")
escape_html = EscapeHTML()
def parse_markdown(markdown: str, allow_html: bool = False) -> Tuple[str, str]:
html = md.markdown(markdown, extensions=[escape_html] if not allow_html else [])
return parse_html(html), html
class MaubotMessageEvent(MessageEvent):
_client: MatrixClient
@ -40,7 +56,7 @@ class MaubotMessageEvent(MessageEvent):
content = TextMessageEventContent(msgtype=MessageType.NOTICE, body=content)
if markdown:
content.format = Format.HTML
content.formatted_body = commonmark.commonmark(content.body)
content.body, content.formatted_body = parse_markdown(content.body)
if reply:
content.set_reply(self)
return self._client.send_message_event(self.room_id, event_type, content)
@ -65,8 +81,8 @@ class MaubotMatrixClient(MatrixClient):
def send_markdown(self, room_id: RoomID, markdown: str, msgtype: MessageType = MessageType.TEXT,
relates_to: Optional[RelatesTo] = None, **kwargs) -> Awaitable[EventID]:
content = TextMessageEventContent(msgtype=msgtype, body=markdown, format=Format.HTML,
formatted_body=commonmark.commonmark(markdown))
content = TextMessageEventContent(msgtype=msgtype, format=Format.HTML)
content.body, content.formatted_body = parse_markdown(markdown)
if relates_to:
content.relates_to = relates_to
return self.send_message(room_id, content, **kwargs)

View file

@ -2,7 +2,7 @@ mautrix
aiohttp
SQLAlchemy
alembic
commonmark
Markdown
ruamel.yaml
attrs
bcrypt

View file

@ -25,7 +25,7 @@ setuptools.setup(
"aiohttp>=3.0.1,<4",
"SQLAlchemy>=1.2.3,<2",
"alembic>=1.0.0,<2",
"commonmark>=0.8.1,<1",
"Markdown>=3.0.0,<4",
"ruamel.yaml>=0.15.35,<0.16",
"attrs>=18.1.0,<19",
"bcrypt>=3.1.4,<4",