dusty/tools/markdown.py (104 lines of code) (raw):

#!/usr/bin/python3 # coding=utf-8 # pylint: disable=I0011,E0401 # Copyright 2019 getcarrier.io # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Markdown tools """ import re import traceback import markdown2 import inscriptis from bs4 import BeautifulSoup from dusty.tools import log def markdown_to_html(text): """ Convert markdown to HTML """ # Install markdown2 hooks to support "{panel}", "{code}" and "|| tables |" markdown2.Markdown.preprocess = _markdown2_preprocess markdown2.Markdown.postprocess = _markdown2_postprocess # Run markdown2 return markdown2.markdown(text, extras=["tables", "wiki-tables", "fenced-code-blocks"]) def _markdown2_preprocess(self, text): # pylint: disable=W0613 # Handle {code} def _code_handler(item): return \ "{code:title=" + \ item.group("title") + \ "|" + item.group("style") + \ "}\n```\n" text = re.sub( r'{code:title=(?P<title>.*?)\|(?P<style>.*?)}', _code_handler, text ) text = text.replace("{code}", "```\n{code}") # Handle || tables | def _table_panel_handler(item): return \ "\n\n{panel:title=Instance:}\n" \ f'{item.group("data")}\n' \ "{panel}\n\n" text = re.sub( r'\n\n(?P<data>\|\|(.*?[\n]*?)+\|)\n\n', _table_panel_handler, text, flags=re.MULTILINE ) def _table_item_handler(item): return \ f'**{item.group("name")}**: {item.group("value")}' text = re.sub( r'\|\| \*(?P<name>.*?)\* \| (?P<value>.*?) \|', _table_item_handler, text ) return text def _markdown2_postprocess(self, text): # pylint: disable=W0613 # Handle {panel} def _panel_handler(item): return \ f'<div class="card">' \ f'<div class="card-header">{item.group("title")}</div><div class="card-body">' text = re.sub( r'(\<p\>)?\s*{panel:title=(?P<title>.*?):(?P<style>.*?)}\s*(\<\/p\>)?', _panel_handler, text ) text = re.sub( r'(\<p\>)?\s*{panel}\s*(\<\/p\>)?', "</div></div>", text ) # Handle {code} def _code_handler(item): return \ f'<div class="card">' \ f'<div class="card-header">{item.group("title")}</div><div class="card-body">' text = re.sub( r'(\<p\>)?\s*{code:title=(?P<title>.*?)\|(?P<style>.*?)}\s*(\<\/p\>)?', _code_handler, text ) text = re.sub( r'(\<p\>)?\s*{code}\s*(\<\/p\>)?', "</div></div>", text ) # Return result return text def markdown_escape(string): """ Escape markdown special symbols """ to_escape = [ "\\", "`", "*", "_", "{", "}", "[", "]", "(", ")", "#", "|", "+", "-", ".", "!" ] for item in to_escape: string = string.replace(item, f"\\{item}") return string def markdown_unescape(string): """ Un-escape markdown special symbols """ to_escape = [ "\\", "`", "*", "_", "{", "}", "[", "]", "(", ")", "#", "|", "+", "-", ".", "!" ] for item in to_escape: string = string.replace(f"\\{item}", item) return string def markdown_table_escape(string): """ Escape markdown special symbols in tables """ return markdown_escape(string).replace("\n", " ").replace("\r", " ") def markdown_to_text(string): """ Convert markdown to plain text """ return "".join(BeautifulSoup(markdown_to_html(string), "html.parser").findAll(text=True)) def html_to_text(html, escape=True): """ Convert HTML to markdown """ try: text = inscriptis.get_text(html, display_links=True) except: # pylint: disable=W0702 log.debug("Exception during HTML to text conversion\n%s", traceback.format_exc()) text = "" if escape: text = markdown_escape(text) return text