165 lines
5.0 KiB
Python
165 lines
5.0 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
"""
|
||
|
pygments.formatters.other
|
||
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
|
|
||
|
Other formatters: NullFormatter, RawTokenFormatter.
|
||
|
|
||
|
:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
|
||
|
:license: BSD, see LICENSE for details.
|
||
|
"""
|
||
|
|
||
|
from pygments.formatter import Formatter
|
||
|
from pygments.util import get_choice_opt
|
||
|
from pygments.token import Token
|
||
|
from pygments.console import colorize
|
||
|
|
||
|
__all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter']
|
||
|
|
||
|
|
||
|
class NullFormatter(Formatter):
|
||
|
"""
|
||
|
Output the text unchanged without any formatting.
|
||
|
"""
|
||
|
name = 'Text only'
|
||
|
aliases = ['text', 'null']
|
||
|
filenames = ['*.txt']
|
||
|
|
||
|
def format(self, tokensource, outfile):
|
||
|
enc = self.encoding
|
||
|
for ttype, value in tokensource:
|
||
|
if enc:
|
||
|
outfile.write(value.encode(enc))
|
||
|
else:
|
||
|
outfile.write(value)
|
||
|
|
||
|
|
||
|
class RawTokenFormatter(Formatter):
|
||
|
r"""
|
||
|
Format tokens as a raw representation for storing token streams.
|
||
|
|
||
|
The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later
|
||
|
be converted to a token stream with the `RawTokenLexer`, described in the
|
||
|
:doc:`lexer list <lexers>`.
|
||
|
|
||
|
Only two options are accepted:
|
||
|
|
||
|
`compress`
|
||
|
If set to ``'gz'`` or ``'bz2'``, compress the output with the given
|
||
|
compression algorithm after encoding (default: ``''``).
|
||
|
`error_color`
|
||
|
If set to a color name, highlight error tokens using that color. If
|
||
|
set but with no value, defaults to ``'red'``.
|
||
|
|
||
|
.. versionadded:: 0.11
|
||
|
|
||
|
"""
|
||
|
name = 'Raw tokens'
|
||
|
aliases = ['raw', 'tokens']
|
||
|
filenames = ['*.raw']
|
||
|
|
||
|
unicodeoutput = False
|
||
|
|
||
|
def __init__(self, **options):
|
||
|
Formatter.__init__(self, **options)
|
||
|
# We ignore self.encoding if it is set, since it gets set for lexer
|
||
|
# and formatter if given with -Oencoding on the command line.
|
||
|
# The RawTokenFormatter outputs only ASCII. Override here.
|
||
|
self.encoding = 'ascii' # let pygments.format() do the right thing
|
||
|
self.compress = get_choice_opt(options, 'compress',
|
||
|
['', 'none', 'gz', 'bz2'], '')
|
||
|
self.error_color = options.get('error_color', None)
|
||
|
if self.error_color is True:
|
||
|
self.error_color = 'red'
|
||
|
if self.error_color is not None:
|
||
|
try:
|
||
|
colorize(self.error_color, '')
|
||
|
except KeyError:
|
||
|
raise ValueError("Invalid color %r specified" %
|
||
|
self.error_color)
|
||
|
|
||
|
def format(self, tokensource, outfile):
|
||
|
try:
|
||
|
outfile.write(b'')
|
||
|
except TypeError:
|
||
|
raise TypeError('The raw tokens formatter needs a binary '
|
||
|
'output file')
|
||
|
if self.compress == 'gz':
|
||
|
import gzip
|
||
|
outfile = gzip.GzipFile('', 'wb', 9, outfile)
|
||
|
|
||
|
def write(text):
|
||
|
outfile.write(text.encode())
|
||
|
flush = outfile.flush
|
||
|
elif self.compress == 'bz2':
|
||
|
import bz2
|
||
|
compressor = bz2.BZ2Compressor(9)
|
||
|
|
||
|
def write(text):
|
||
|
outfile.write(compressor.compress(text.encode()))
|
||
|
|
||
|
def flush():
|
||
|
outfile.write(compressor.flush())
|
||
|
outfile.flush()
|
||
|
else:
|
||
|
def write(text):
|
||
|
outfile.write(text.encode())
|
||
|
flush = outfile.flush
|
||
|
|
||
|
if self.error_color:
|
||
|
for ttype, value in tokensource:
|
||
|
line = "%s\t%r\n" % (ttype, value)
|
||
|
if ttype is Token.Error:
|
||
|
write(colorize(self.error_color, line))
|
||
|
else:
|
||
|
write(line)
|
||
|
else:
|
||
|
for ttype, value in tokensource:
|
||
|
write("%s\t%r\n" % (ttype, value))
|
||
|
flush()
|
||
|
|
||
|
|
||
|
TESTCASE_BEFORE = u'''\
|
||
|
def testNeedsName(lexer):
|
||
|
fragment = %r
|
||
|
tokens = [
|
||
|
'''
|
||
|
TESTCASE_AFTER = u'''\
|
||
|
]
|
||
|
assert list(lexer.get_tokens(fragment)) == tokens
|
||
|
'''
|
||
|
|
||
|
|
||
|
class TestcaseFormatter(Formatter):
|
||
|
"""
|
||
|
Format tokens as appropriate for a new testcase.
|
||
|
|
||
|
.. versionadded:: 2.0
|
||
|
"""
|
||
|
name = 'Testcase'
|
||
|
aliases = ['testcase']
|
||
|
|
||
|
def __init__(self, **options):
|
||
|
Formatter.__init__(self, **options)
|
||
|
if self.encoding is not None and self.encoding != 'utf-8':
|
||
|
raise ValueError("Only None and utf-8 are allowed encodings.")
|
||
|
|
||
|
def format(self, tokensource, outfile):
|
||
|
indentation = ' ' * 12
|
||
|
rawbuf = []
|
||
|
outbuf = []
|
||
|
for ttype, value in tokensource:
|
||
|
rawbuf.append(value)
|
||
|
outbuf.append('%s(%s, %r),\n' % (indentation, ttype, value))
|
||
|
|
||
|
before = TESTCASE_BEFORE % (u''.join(rawbuf),)
|
||
|
during = u''.join(outbuf)
|
||
|
after = TESTCASE_AFTER
|
||
|
if self.encoding is None:
|
||
|
outfile.write(before + during + after)
|
||
|
else:
|
||
|
outfile.write(before.encode('utf-8'))
|
||
|
outfile.write(during.encode('utf-8'))
|
||
|
outfile.write(after.encode('utf-8'))
|
||
|
outfile.flush()
|