hub/venv/lib/python3.7/site-packages/nbconvert/preprocessors/sanitize.py

"""
NBConvert Preprocessor for sanitizing HTML rendering of notebooks.
"""

from bleach import (
    ALLOWED_ATTRIBUTES,
    ALLOWED_STYLES,
    ALLOWED_TAGS,
    clean,
)
from traitlets import (
    Any,
    Bool,
    List,
    Set,
    Unicode,
)
from .base import Preprocessor


class SanitizeHTML(Preprocessor):

    # Bleach config.
    attributes = Any(
        config=True,
        default_value=ALLOWED_ATTRIBUTES,
        help="Allowed HTML tag attributes",
    )
    tags = List(
        Unicode(),
        config=True,
        default_value=ALLOWED_TAGS,
        help="List of HTML tags to allow",
    )
    styles = List(
        Unicode(),
        config=True,
        default_value=ALLOWED_STYLES,
        help="Allowed CSS styles if <style> tag is whitelisted"
    )
    strip = Bool(
        config=True,
        default_value=False,
        help="If True, remove unsafe markup entirely instead of escaping"
    )
    strip_comments = Bool(
        config=True,
        default_value=True,
        help="If True, strip comments from escaped HTML",
    )

    # Display data config.
    safe_output_keys = Set(
        config=True,
        default_value={
            'metadata',  # Not a mimetype per-se, but expected and safe.
            'text/plain',
            'text/latex',
            'application/json',
            'image/png',
            'image/jpeg',
        },
        help="Cell output mimetypes to render without modification",
    )
    sanitized_output_types = Set(
        config=True,
        default_value={
            'text/html',
            'text/markdown',
        },
        help="Cell output types to display after escaping with Bleach.",
    )

    def preprocess_cell(self, cell, resources, cell_index):
        """
        Sanitize potentially-dangerous contents of the cell.

        Cell Types:
          raw:
            Sanitize literal HTML
          markdown:
            Sanitize literal HTML
          code:
            Sanitize outputs that could result in code execution
        """
        if cell.cell_type == 'raw':
            # Sanitize all raw cells anyway.
            # Only ones with the text/html mimetype should be emitted
            # but erring on the side of safety maybe.
            cell.source = self.sanitize_html_tags(cell.source)
            return cell, resources
        elif cell.cell_type == 'markdown':
            cell.source = self.sanitize_html_tags(cell.source)
            return cell, resources
        elif cell.cell_type == 'code':
            cell.outputs = self.sanitize_code_outputs(cell.outputs)
            return cell, resources

    def sanitize_code_outputs(self, outputs):
        """
        Sanitize code cell outputs.

        Removes 'text/javascript' fields from display_data outputs, and
        runs `sanitize_html_tags` over 'text/html'.
        """
        for output in outputs:
            # These are always ascii, so nothing to escape.
            if output['output_type'] in ('stream', 'error'):
                continue
            data = output.data
            to_remove = []
            for key in data:
                if key in self.safe_output_keys:
                    continue
                elif key in self.sanitized_output_types:
                    self.log.info("Sanitizing %s" % key)
                    data[key] = self.sanitize_html_tags(data[key])
                else:
                    # Mark key for removal. (Python doesn't allow deletion of
                    # keys from a dict during iteration)
                    to_remove.append(key)
            for key in to_remove:
                self.log.info("Removing %s" % key)
                del data[key]
        return outputs

    def sanitize_html_tags(self, html_str):
        """
        Sanitize a string containing raw HTML tags.
        """
        return clean(
            html_str,
            tags=self.tags,
            attributes=self.attributes,
            styles=self.styles,
            strip=self.strip,
            strip_comments=self.strip_comments,
        )
Add virtual environment to the git repository, this isn't 100% right but it's practical at this development point 2020-06-16 10:34:17 -04:00			`"""`
			`NBConvert Preprocessor for sanitizing HTML rendering of notebooks.`
			`"""`

			`from bleach import (`
			`ALLOWED_ATTRIBUTES,`
			`ALLOWED_STYLES,`
			`ALLOWED_TAGS,`
			`clean,`
			`)`
			`from traitlets import (`
			`Any,`
			`Bool,`
			`List,`
			`Set,`
			`Unicode,`
			`)`
			`from .base import Preprocessor`


			`class SanitizeHTML(Preprocessor):`

			`# Bleach config.`
			`attributes = Any(`
			`config=True,`
			`default_value=ALLOWED_ATTRIBUTES,`
			`help="Allowed HTML tag attributes",`
			`)`
			`tags = List(`
			`Unicode(),`
			`config=True,`
			`default_value=ALLOWED_TAGS,`
			`help="List of HTML tags to allow",`
			`)`
			`styles = List(`
			`Unicode(),`
			`config=True,`
			`default_value=ALLOWED_STYLES,`
			`help="Allowed CSS styles if <style> tag is whitelisted"`
			`)`
			`strip = Bool(`
			`config=True,`
			`default_value=False,`
			`help="If True, remove unsafe markup entirely instead of escaping"`
			`)`
			`strip_comments = Bool(`
			`config=True,`
			`default_value=True,`
			`help="If True, strip comments from escaped HTML",`
			`)`

			`# Display data config.`
			`safe_output_keys = Set(`
			`config=True,`
			`default_value={`
			`'metadata', # Not a mimetype per-se, but expected and safe.`
			`'text/plain',`
			`'text/latex',`
			`'application/json',`
			`'image/png',`
			`'image/jpeg',`
			`},`
			`help="Cell output mimetypes to render without modification",`
			`)`
			`sanitized_output_types = Set(`
			`config=True,`
			`default_value={`
			`'text/html',`
			`'text/markdown',`
			`},`
			`help="Cell output types to display after escaping with Bleach.",`
			`)`

			`def preprocess_cell(self, cell, resources, cell_index):`
			`"""`
			`Sanitize potentially-dangerous contents of the cell.`

			`Cell Types:`
			`raw:`
			`Sanitize literal HTML`
			`markdown:`
			`Sanitize literal HTML`
			`code:`
			`Sanitize outputs that could result in code execution`
			`"""`
			`if cell.cell_type == 'raw':`
			`# Sanitize all raw cells anyway.`
			`# Only ones with the text/html mimetype should be emitted`
			`# but erring on the side of safety maybe.`
			`cell.source = self.sanitize_html_tags(cell.source)`
			`return cell, resources`
			`elif cell.cell_type == 'markdown':`
			`cell.source = self.sanitize_html_tags(cell.source)`
			`return cell, resources`
			`elif cell.cell_type == 'code':`
			`cell.outputs = self.sanitize_code_outputs(cell.outputs)`
			`return cell, resources`

			`def sanitize_code_outputs(self, outputs):`
			`"""`
			`Sanitize code cell outputs.`

			`Removes 'text/javascript' fields from display_data outputs, and`
			runs `sanitize_html_tags` over 'text/html'.
			`"""`
			`for output in outputs:`
			`# These are always ascii, so nothing to escape.`
			`if output['output_type'] in ('stream', 'error'):`
			`continue`
			`data = output.data`
			`to_remove = []`
			`for key in data:`
			`if key in self.safe_output_keys:`
			`continue`
			`elif key in self.sanitized_output_types:`
			`self.log.info("Sanitizing %s" % key)`
			`data[key] = self.sanitize_html_tags(data[key])`
			`else:`
			`# Mark key for removal. (Python doesn't allow deletion of`
			`# keys from a dict during iteration)`
			`to_remove.append(key)`
			`for key in to_remove:`
			`self.log.info("Removing %s" % key)`
			`del data[key]`
			`return outputs`

			`def sanitize_html_tags(self, html_str):`
			`"""`
			`Sanitize a string containing raw HTML tags.`
			`"""`
			`return clean(`
			`html_str,`
			`tags=self.tags,`
			`attributes=self.attributes,`
			`styles=self.styles,`
			`strip=self.strip,`
			`strip_comments=self.strip_comments,`
			`)`