Source code for synapse.lib.stormlib.mime

import bs4

import synapse.lib.coro as s_coro
import synapse.lib.stormtypes as s_stormtypes

[docs] def htmlToText(html, separator='\n', strip=True): soup = bs4.BeautifulSoup(html, 'html5lib') return soup.get_text(separator=separator, strip=strip)
[docs] @s_stormtypes.registry.registerLib class LibMimeHtml(s_stormtypes.Lib): ''' A Storm library for manipulating HTML text. ''' _storm_locals = ( {'name': 'totext', 'desc': 'Return inner text from all tags within an HTML document.', 'type': {'type': 'function', '_funcname': 'totext', 'args': ( {'name': 'html', 'type': 'str', 'desc': 'The HTML text to be parsed.'}, {'name': 'separator', 'type': 'str', 'default': '\n', 'desc': 'The string used to join text.'}, {'name': 'strip', 'type': 'boolean', 'default': True, 'desc': 'Strip whitespace from the beginning and end of tag text.'}, ), 'returns': {'type': 'str', 'desc': 'The separator-joined inner HTML text.', } }}, ) _storm_lib_path = ('mime', 'html')
[docs] def getObjLocals(self): return { 'totext': self.totext, }
[docs] @s_stormtypes.stormfunc(readonly=True) async def totext(self, html, separator='\n', strip=True): html = await s_stormtypes.tostr(html) separator = await s_stormtypes.tostr(separator, noneok=True) strip = await s_stormtypes.tobool(strip) if separator is None: separator = '' return await s_coro.semafork(htmlToText, html, separator, strip)