duplicati/Tools/Commandline/RestoreFromPython/ijson.py

288 lines
9.4 KiB
Python

# ijson 2.3
# Copyright (c) 2010, Ivan Sagalaev
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name "ijson" nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# This software is provided by the regents and contributors ``as is'' and any
# express or implied warranties, including, but not limited to, the implied
# warranties of merchantability and fitness for a particular purpose are
# disclaimed. in no event shall the regents and contributors be liable for any
# direct, indirect, incidental, special, exemplary, or consequential damages
# (including, but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however caused and
# on any theory of liability, whether in contract, strict liability, or tort
# (including negligence or otherwise) arising in any way out of the use of this
# software, even if advised of the possibility of such damage.
from __future__ import unicode_literals
import decimal
import re
import sys
from codecs import getreader
class JSONError(Exception):
pass # base exception for all parsing errors.
class IncompleteJSONError(JSONError):
pass # raised when the parser can't read expected data from a stream.
class ObjectBuilder(object):
def __init__(self):
def initial_set(value):
self.value = value
self.containers = [initial_set]
def event(self, event, value):
if event == 'map_key':
self.key = value
elif event == 'start_map':
mapval = {}
self.containers[-1](mapval)
def setter(value):
mapval[self.key] = value
self.containers.append(setter)
elif event == 'start_array':
array = []
self.containers[-1](array)
self.containers.append(array.append)
elif event == 'end_array' or event == 'end_map':
self.containers.pop()
else:
self.containers[-1](value)
def parse_impl(basic_events):
path = []
for event, value in basic_events:
if event == 'map_key':
prefix = '.'.join(path[:-1])
path[-1] = value
elif event == 'start_map':
prefix = '.'.join(path)
path.append(None)
elif event == 'end_map':
path.pop()
prefix = '.'.join(path)
elif event == 'start_array':
prefix = '.'.join(path)
path.append('item')
elif event == 'end_array':
path.pop()
prefix = '.'.join(path)
else: # any scalar value
prefix = '.'.join(path)
yield prefix, event, value
def items_impl(prefixed_events, prefix):
prefixed_events = iter(prefixed_events)
try:
while True:
current, event, value = next(prefixed_events)
if current == prefix:
if event in ('start_map', 'start_array'):
builder = ObjectBuilder()
end_event = event.replace('start', 'end')
while (current, event) != (prefix, end_event):
builder.event(event, value)
current, event, value = next(prefixed_events)
yield builder.value
else:
yield value
except StopIteration:
pass
def number(str_value):
number = decimal.Decimal(str_value)
int_number = int(number)
if int_number == number:
number = int_number
return number
class UnexpectedSymbol(JSONError):
def __init__(self, symbol, pos):
super(UnexpectedSymbol, self).__init__(
'Unexpected symbol %r at %d' % (symbol, pos)
)
BUFSIZE = 16 * 1024
LEXEME_RE = re.compile(r'[a-z0-9eE\.\+-]+|\S')
def Lexer(f, buf_size=BUFSIZE):
if isinstance(f.read(0), bytetype):
f = getreader('utf-8')(f)
buf = f.read(buf_size)
pos = 0
discarded = 0
while True:
match = LEXEME_RE.search(buf, pos)
if match:
lexeme = match.group()
if lexeme == '"':
pos = match.start()
start = pos + 1
while True:
try:
end = buf.index('"', start)
escpos = end - 1
while buf[escpos] == '\\':
escpos -= 1
if (end - escpos) % 2 == 0:
start = end + 1
else:
break
except ValueError:
data = f.read(buf_size)
if not data:
raise IncompleteJSONError('Incomplete string lexeme')
buf += data
yield discarded + pos, buf[pos:end + 1]
pos = end + 1
else:
while match.end() == len(buf):
data = f.read(buf_size)
if not data:
break
buf += data
match = LEXEME_RE.search(buf, pos)
lexeme = match.group()
yield discarded + match.start(), lexeme
pos = match.end()
else:
data = f.read(buf_size)
if not data:
break
discarded += len(buf)
buf = data
pos = 0
def unescape(s):
start = 0
result = ''
while start < len(s):
pos = s.find('\\', start)
if pos == -1:
if start == 0:
return s
result += s[start:]
break
result += s[start:pos]
pos += 1
esc = s[pos]
if esc == 'u':
result += chr(int(s[pos + 1:pos + 5], 16))
pos += 4
elif esc == 'b':
result += '\b'
elif esc == 'f':
result += '\f'
elif esc == 'n':
result += '\n'
elif esc == 'r':
result += '\r'
elif esc == 't':
result += '\t'
else:
result += esc
start = pos + 1
return result
def parse_value(lexer, symbol=None, pos=0):
try:
if symbol is None:
pos, symbol = next(lexer)
if symbol == 'null':
yield ('null', None)
elif symbol == 'true':
yield ('boolean', True)
elif symbol == 'false':
yield ('boolean', False)
elif symbol == '[':
for event in parse_array(lexer):
yield event
elif symbol == '{':
for event in parse_object(lexer):
yield event
elif symbol[0] == '"':
yield ('string', unescape(symbol[1:-1]))
else:
try:
yield ('number', number(symbol))
except decimal.InvalidOperation:
raise UnexpectedSymbol(symbol, pos)
except StopIteration:
raise IncompleteJSONError('Incomplete JSON data')
def parse_array(lexer):
yield ('start_array', None)
try:
pos, symbol = next(lexer)
if symbol != ']':
while True:
for event in parse_value(lexer, symbol, pos):
yield event
pos, symbol = next(lexer)
if symbol == ']':
break
if symbol != ',':
raise UnexpectedSymbol(symbol, pos)
pos, symbol = next(lexer)
yield ('end_array', None)
except StopIteration:
raise IncompleteJSONError('Incomplete JSON data')
def parse_object(lexer):
yield ('start_map', None)
try:
pos, symbol = next(lexer)
if symbol != '}':
while True:
if symbol[0] != '"':
raise UnexpectedSymbol(symbol, pos)
yield ('map_key', unescape(symbol[1:-1]))
pos, symbol = next(lexer)
if symbol != ':':
raise UnexpectedSymbol(symbol, pos)
for event in parse_value(lexer, None, pos):
yield event
pos, symbol = next(lexer)
if symbol == '}':
break
if symbol != ',':
raise UnexpectedSymbol(symbol, pos)
pos, symbol = next(lexer)
yield ('end_map', None)
except StopIteration:
raise IncompleteJSONError('Incomplete JSON data')
def basic_parse(file, buf_size=BUFSIZE):
lexer = iter(Lexer(file, buf_size))
for value in parse_value(lexer):
yield value
try:
next(lexer)
except StopIteration:
pass
else:
raise JSONError('Additional data')
def parse(file, buf_size=BUFSIZE):
return parse_impl(basic_parse(file, buf_size=buf_size))
def items(file, prefix):
return items_impl(parse(file), prefix)
def b2s(b):
return b.decode('utf-8')
assert sys.version_info[0] >= 3
bytetype = bytes