142 lines
4.0 KiB
Python
142 lines
4.0 KiB
Python
# vim:fileencoding=utf-8:noet
|
||
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||
|
||
import codecs
|
||
|
||
from powerline.lint.markedjson.error import MarkedError, Mark, NON_PRINTABLE_RE
|
||
from powerline.lib.unicode import unicode
|
||
|
||
|
||
# This module contains abstractions for the input stream. You don’t have to
|
||
# looks further, there are no pretty code.
|
||
|
||
|
||
class ReaderError(MarkedError):
|
||
pass
|
||
|
||
|
||
class Reader(object):
|
||
# Reader:
|
||
# - determines the data encoding and converts it to a unicode string,
|
||
# - checks if characters are in allowed range,
|
||
# - adds '\0' to the end.
|
||
|
||
# Reader accepts
|
||
# - a file-like object with its `read` method returning `str`,
|
||
|
||
# Yeah, it’s ugly and slow.
|
||
def __init__(self, stream):
|
||
self.name = None
|
||
self.stream = None
|
||
self.stream_pointer = 0
|
||
self.eof = True
|
||
self.buffer = ''
|
||
self.pointer = 0
|
||
self.full_buffer = unicode('')
|
||
self.full_pointer = 0
|
||
self.raw_buffer = None
|
||
self.raw_decode = codecs.utf_8_decode
|
||
self.encoding = 'utf-8'
|
||
self.index = 0
|
||
self.line = 0
|
||
self.column = 0
|
||
|
||
self.stream = stream
|
||
self.name = getattr(stream, 'name', '<file>')
|
||
self.eof = False
|
||
self.raw_buffer = None
|
||
|
||
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
|
||
self.update_raw()
|
||
self.update(1)
|
||
|
||
def peek(self, index=0):
|
||
try:
|
||
return self.buffer[self.pointer + index]
|
||
except IndexError:
|
||
self.update(index + 1)
|
||
return self.buffer[self.pointer + index]
|
||
|
||
def prefix(self, length=1):
|
||
if self.pointer + length >= len(self.buffer):
|
||
self.update(length)
|
||
return self.buffer[self.pointer:self.pointer + length]
|
||
|
||
def update_pointer(self, length):
|
||
while length:
|
||
ch = self.buffer[self.pointer]
|
||
self.pointer += 1
|
||
self.full_pointer += 1
|
||
self.index += 1
|
||
if ch == '\n':
|
||
self.line += 1
|
||
self.column = 0
|
||
else:
|
||
self.column += 1
|
||
length -= 1
|
||
|
||
def forward(self, length=1):
|
||
if self.pointer + length + 1 >= len(self.buffer):
|
||
self.update(length + 1)
|
||
self.update_pointer(length)
|
||
|
||
def get_mark(self):
|
||
return Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer)
|
||
|
||
def check_printable(self, data):
|
||
match = NON_PRINTABLE_RE.search(data)
|
||
if match:
|
||
self.update_pointer(match.start())
|
||
raise ReaderError(
|
||
'while reading from stream', None,
|
||
'found special characters which are not allowed',
|
||
Mark(self.name, self.line, self.column, self.full_buffer, self.full_pointer)
|
||
)
|
||
|
||
def update(self, length):
|
||
if self.raw_buffer is None:
|
||
return
|
||
self.buffer = self.buffer[self.pointer:]
|
||
self.pointer = 0
|
||
while len(self.buffer) < length:
|
||
if not self.eof:
|
||
self.update_raw()
|
||
try:
|
||
data, converted = self.raw_decode(self.raw_buffer, 'strict', self.eof)
|
||
except UnicodeDecodeError as exc:
|
||
character = self.raw_buffer[exc.start]
|
||
position = self.stream_pointer - len(self.raw_buffer) + exc.start
|
||
data, converted = self.raw_decode(self.raw_buffer[:exc.start], 'strict', self.eof)
|
||
self.buffer += data
|
||
self.full_buffer += data + '<' + str(ord(character)) + '>'
|
||
self.raw_buffer = self.raw_buffer[converted:]
|
||
self.update_pointer(exc.start - 1)
|
||
raise ReaderError(
|
||
'while reading from stream', None,
|
||
'found character #x%04x that cannot be decoded by UTF-8 codec' % ord(character),
|
||
Mark(self.name, self.line, self.column, self.full_buffer, position)
|
||
)
|
||
self.buffer += data
|
||
self.full_buffer += data
|
||
self.raw_buffer = self.raw_buffer[converted:]
|
||
self.check_printable(data)
|
||
if self.eof:
|
||
self.buffer += '\0'
|
||
self.raw_buffer = None
|
||
break
|
||
|
||
def update_raw(self, size=-1):
|
||
# Was size=4096
|
||
assert(size < 0)
|
||
# WARNING: reading the whole stream at once. To change this behaviour to
|
||
# former reading N characters at once one must make sure that reading
|
||
# never ends at partial unicode character.
|
||
data = self.stream.read(size)
|
||
if self.raw_buffer is None:
|
||
self.raw_buffer = data
|
||
else:
|
||
self.raw_buffer += data
|
||
self.stream_pointer += len(data)
|
||
if not data:
|
||
self.eof = True
|