hub/venv/lib/python3.7/site-packages/notebook/bundler/tools.py

231 lines
7.6 KiB
Python
Raw Normal View History

"""Set of common tools to aid bundler implementations."""
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import os
import shutil
import errno
import nbformat
import fnmatch
import glob
def get_file_references(abs_nb_path, version):
"""Gets a list of files referenced either in Markdown fenced code blocks
or in HTML comments from the notebook. Expands patterns expressed in
gitignore syntax (https://git-scm.com/docs/gitignore). Returns the
fully expanded list of filenames relative to the notebook dirname.
Parameters
----------
abs_nb_path: str
Absolute path of the notebook on disk
version: int
Version of the notebook document format to use
Returns
-------
list
Filename strings relative to the notebook path
"""
ref_patterns = get_reference_patterns(abs_nb_path, version)
expanded = expand_references(os.path.dirname(abs_nb_path), ref_patterns)
return expanded
def get_reference_patterns(abs_nb_path, version):
"""Gets a list of reference patterns either in Markdown fenced code blocks
or in HTML comments from the notebook.
Parameters
----------
abs_nb_path: str
Absolute path of the notebook on disk
version: int
Version of the notebook document format to use
Returns
-------
list
Pattern strings from the notebook
"""
notebook = nbformat.read(abs_nb_path, version)
referenced_list = []
for cell in notebook.cells:
references = get_cell_reference_patterns(cell)
if references:
referenced_list = referenced_list + references
return referenced_list
def get_cell_reference_patterns(cell):
'''
Retrieves the list of references from a single notebook cell. Looks for
fenced code blocks or HTML comments in Markdown cells, e.g.,
```
some.csv
foo/
!foo/bar
```
or
<!--associate:
some.csv
foo/
!foo/bar
-->
Parameters
----------
cell: dict
Notebook cell object
Returns
-------
list
Reference patterns found in the cell
'''
referenced = []
# invisible after execution: unrendered HTML comment
if cell.get('cell_type').startswith('markdown') and cell.get('source').startswith('<!--associate:'):
lines = cell.get('source')[len('<!--associate:'):].splitlines()
for line in lines:
if line.startswith('-->'):
break
# Trying to go out of the current directory leads to
# trouble when deploying
if line.find('../') < 0 and not line.startswith('#'):
referenced.append(line)
# visible after execution: rendered as a code element within a pre element
elif cell.get('cell_type').startswith('markdown') and cell.get('source').find('```') >= 0:
source = cell.get('source')
offset = source.find('```')
lines = source[offset + len('```'):].splitlines()
for line in lines:
if line.startswith('```'):
break
# Trying to go out of the current directory leads to
# trouble when deploying
if line.find('../') < 0 and not line.startswith('#'):
referenced.append(line)
# Clean out blank references
return [ref for ref in referenced if ref.strip()]
def expand_references(root_path, references):
"""Expands a set of reference patterns by evaluating them against the
given root directory. Expansions are performed against patterns
expressed in the same manner as in gitignore
(https://git-scm.com/docs/gitignore).
NOTE: Temporarily changes the current working directory when called.
Parameters
----------
root_path: str
Assumed root directory for the patterns
references: list
Reference patterns from get_reference_patterns expressed with
forward-slash directory separators
Returns
-------
list
Filename strings relative to the root path
"""
# Use normpath to convert to platform specific slashes, but be sure
# to retain a trailing slash which normpath pulls off
normalized_references = []
for ref in references:
normalized_ref = os.path.normpath(ref)
# un-normalized separator
if ref.endswith('/'):
normalized_ref += os.sep
normalized_references.append(normalized_ref)
references = normalized_references
globbed = []
negations = []
must_walk = []
for pattern in references:
if pattern and pattern.find(os.sep) < 0:
# simple shell glob
cwd = os.getcwd()
os.chdir(root_path)
if pattern.startswith('!'):
negations = negations + glob.glob(pattern[1:])
else:
globbed = globbed + glob.glob(pattern)
os.chdir(cwd)
elif pattern:
must_walk.append(pattern)
for pattern in must_walk:
pattern_is_negation = pattern.startswith('!')
if pattern_is_negation:
testpattern = pattern[1:]
else:
testpattern = pattern
for root, _, filenames in os.walk(root_path):
for filename in filenames:
joined = os.path.join(root[len(root_path) + 1:], filename)
if testpattern.endswith(os.sep):
if joined.startswith(testpattern):
if pattern_is_negation:
negations.append(joined)
else:
globbed.append(joined)
elif testpattern.find('**') >= 0:
# path wildcard
ends = testpattern.split('**')
if len(ends) == 2:
if joined.startswith(ends[0]) and joined.endswith(ends[1]):
if pattern_is_negation:
negations.append(joined)
else:
globbed.append(joined)
else:
# segments should be respected
if fnmatch.fnmatch(joined, testpattern):
if pattern_is_negation:
negations.append(joined)
else:
globbed.append(joined)
for negated in negations:
try:
globbed.remove(negated)
except ValueError as err:
pass
return set(globbed)
def copy_filelist(src, dst, src_relative_filenames):
"""Copies the given list of files, relative to src, into dst, creating
directories along the way as needed and ignore existence errors.
Skips any files that do not exist. Does not create empty directories
from src in dst.
Parameters
----------
src: str
Root of the source directory
dst: str
Root of the destination directory
src_relative_filenames: list
Filenames relative to src
"""
for filename in src_relative_filenames:
# Only consider the file if it exists in src
if os.path.isfile(os.path.join(src, filename)):
parent_relative = os.path.dirname(filename)
if parent_relative:
# Make sure the parent directory exists
parent_dst = os.path.join(dst, parent_relative)
try:
os.makedirs(parent_dst)
except OSError as exc:
if exc.errno == errno.EEXIST:
pass
else:
raise exc
shutil.copy2(os.path.join(src, filename), os.path.join(dst, filename))