mirror of
https://github.com/duplicati/duplicati.git
synced 2025-11-28 11:30:24 +08:00
288 lines
9.4 KiB
Python
288 lines
9.4 KiB
Python
# ijson 2.3
|
|
# Copyright (c) 2010, Ivan Sagalaev
|
|
# All rights reserved.
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# * Neither the name "ijson" nor the names of its contributors
|
|
# may be used to endorse or promote products derived from this software
|
|
# without specific prior written permission.
|
|
#
|
|
# This software is provided by the regents and contributors ``as is'' and any
|
|
# express or implied warranties, including, but not limited to, the implied
|
|
# warranties of merchantability and fitness for a particular purpose are
|
|
# disclaimed. in no event shall the regents and contributors be liable for any
|
|
# direct, indirect, incidental, special, exemplary, or consequential damages
|
|
# (including, but not limited to, procurement of substitute goods or services;
|
|
# loss of use, data, or profits; or business interruption) however caused and
|
|
# on any theory of liability, whether in contract, strict liability, or tort
|
|
# (including negligence or otherwise) arising in any way out of the use of this
|
|
# software, even if advised of the possibility of such damage.
|
|
|
|
from __future__ import unicode_literals
|
|
import decimal
|
|
import re
|
|
import sys
|
|
from codecs import getreader
|
|
|
|
class JSONError(Exception):
|
|
pass # base exception for all parsing errors.
|
|
|
|
class IncompleteJSONError(JSONError):
|
|
pass # raised when the parser can't read expected data from a stream.
|
|
|
|
class ObjectBuilder(object):
|
|
def __init__(self):
|
|
def initial_set(value):
|
|
self.value = value
|
|
self.containers = [initial_set]
|
|
|
|
def event(self, event, value):
|
|
if event == 'map_key':
|
|
self.key = value
|
|
elif event == 'start_map':
|
|
mapval = {}
|
|
self.containers[-1](mapval)
|
|
def setter(value):
|
|
mapval[self.key] = value
|
|
self.containers.append(setter)
|
|
elif event == 'start_array':
|
|
array = []
|
|
self.containers[-1](array)
|
|
self.containers.append(array.append)
|
|
elif event == 'end_array' or event == 'end_map':
|
|
self.containers.pop()
|
|
else:
|
|
self.containers[-1](value)
|
|
|
|
def parse_impl(basic_events):
|
|
path = []
|
|
for event, value in basic_events:
|
|
if event == 'map_key':
|
|
prefix = '.'.join(path[:-1])
|
|
path[-1] = value
|
|
elif event == 'start_map':
|
|
prefix = '.'.join(path)
|
|
path.append(None)
|
|
elif event == 'end_map':
|
|
path.pop()
|
|
prefix = '.'.join(path)
|
|
elif event == 'start_array':
|
|
prefix = '.'.join(path)
|
|
path.append('item')
|
|
elif event == 'end_array':
|
|
path.pop()
|
|
prefix = '.'.join(path)
|
|
else: # any scalar value
|
|
prefix = '.'.join(path)
|
|
|
|
yield prefix, event, value
|
|
|
|
def items_impl(prefixed_events, prefix):
|
|
prefixed_events = iter(prefixed_events)
|
|
try:
|
|
while True:
|
|
current, event, value = next(prefixed_events)
|
|
if current == prefix:
|
|
if event in ('start_map', 'start_array'):
|
|
builder = ObjectBuilder()
|
|
end_event = event.replace('start', 'end')
|
|
while (current, event) != (prefix, end_event):
|
|
builder.event(event, value)
|
|
current, event, value = next(prefixed_events)
|
|
yield builder.value
|
|
else:
|
|
yield value
|
|
except StopIteration:
|
|
pass
|
|
|
|
def number(str_value):
|
|
number = decimal.Decimal(str_value)
|
|
int_number = int(number)
|
|
if int_number == number:
|
|
number = int_number
|
|
return number
|
|
|
|
class UnexpectedSymbol(JSONError):
|
|
def __init__(self, symbol, pos):
|
|
super(UnexpectedSymbol, self).__init__(
|
|
'Unexpected symbol %r at %d' % (symbol, pos)
|
|
)
|
|
|
|
BUFSIZE = 16 * 1024
|
|
LEXEME_RE = re.compile(r'[a-z0-9eE\.\+-]+|\S')
|
|
|
|
def Lexer(f, buf_size=BUFSIZE):
|
|
if isinstance(f.read(0), bytetype):
|
|
f = getreader('utf-8')(f)
|
|
buf = f.read(buf_size)
|
|
pos = 0
|
|
discarded = 0
|
|
while True:
|
|
match = LEXEME_RE.search(buf, pos)
|
|
if match:
|
|
lexeme = match.group()
|
|
if lexeme == '"':
|
|
pos = match.start()
|
|
start = pos + 1
|
|
while True:
|
|
try:
|
|
end = buf.index('"', start)
|
|
escpos = end - 1
|
|
while buf[escpos] == '\\':
|
|
escpos -= 1
|
|
if (end - escpos) % 2 == 0:
|
|
start = end + 1
|
|
else:
|
|
break
|
|
except ValueError:
|
|
data = f.read(buf_size)
|
|
if not data:
|
|
raise IncompleteJSONError('Incomplete string lexeme')
|
|
buf += data
|
|
yield discarded + pos, buf[pos:end + 1]
|
|
pos = end + 1
|
|
else:
|
|
while match.end() == len(buf):
|
|
data = f.read(buf_size)
|
|
if not data:
|
|
break
|
|
buf += data
|
|
match = LEXEME_RE.search(buf, pos)
|
|
lexeme = match.group()
|
|
yield discarded + match.start(), lexeme
|
|
pos = match.end()
|
|
else:
|
|
data = f.read(buf_size)
|
|
if not data:
|
|
break
|
|
discarded += len(buf)
|
|
buf = data
|
|
pos = 0
|
|
|
|
def unescape(s):
|
|
start = 0
|
|
result = ''
|
|
while start < len(s):
|
|
pos = s.find('\\', start)
|
|
if pos == -1:
|
|
if start == 0:
|
|
return s
|
|
result += s[start:]
|
|
break
|
|
result += s[start:pos]
|
|
pos += 1
|
|
esc = s[pos]
|
|
if esc == 'u':
|
|
result += chr(int(s[pos + 1:pos + 5], 16))
|
|
pos += 4
|
|
elif esc == 'b':
|
|
result += '\b'
|
|
elif esc == 'f':
|
|
result += '\f'
|
|
elif esc == 'n':
|
|
result += '\n'
|
|
elif esc == 'r':
|
|
result += '\r'
|
|
elif esc == 't':
|
|
result += '\t'
|
|
else:
|
|
result += esc
|
|
start = pos + 1
|
|
return result
|
|
|
|
def parse_value(lexer, symbol=None, pos=0):
|
|
try:
|
|
if symbol is None:
|
|
pos, symbol = next(lexer)
|
|
if symbol == 'null':
|
|
yield ('null', None)
|
|
elif symbol == 'true':
|
|
yield ('boolean', True)
|
|
elif symbol == 'false':
|
|
yield ('boolean', False)
|
|
elif symbol == '[':
|
|
for event in parse_array(lexer):
|
|
yield event
|
|
elif symbol == '{':
|
|
for event in parse_object(lexer):
|
|
yield event
|
|
elif symbol[0] == '"':
|
|
yield ('string', unescape(symbol[1:-1]))
|
|
else:
|
|
try:
|
|
yield ('number', number(symbol))
|
|
except decimal.InvalidOperation:
|
|
raise UnexpectedSymbol(symbol, pos)
|
|
except StopIteration:
|
|
raise IncompleteJSONError('Incomplete JSON data')
|
|
|
|
def parse_array(lexer):
|
|
yield ('start_array', None)
|
|
try:
|
|
pos, symbol = next(lexer)
|
|
if symbol != ']':
|
|
while True:
|
|
for event in parse_value(lexer, symbol, pos):
|
|
yield event
|
|
pos, symbol = next(lexer)
|
|
if symbol == ']':
|
|
break
|
|
if symbol != ',':
|
|
raise UnexpectedSymbol(symbol, pos)
|
|
pos, symbol = next(lexer)
|
|
yield ('end_array', None)
|
|
except StopIteration:
|
|
raise IncompleteJSONError('Incomplete JSON data')
|
|
|
|
def parse_object(lexer):
|
|
yield ('start_map', None)
|
|
try:
|
|
pos, symbol = next(lexer)
|
|
if symbol != '}':
|
|
while True:
|
|
if symbol[0] != '"':
|
|
raise UnexpectedSymbol(symbol, pos)
|
|
yield ('map_key', unescape(symbol[1:-1]))
|
|
pos, symbol = next(lexer)
|
|
if symbol != ':':
|
|
raise UnexpectedSymbol(symbol, pos)
|
|
for event in parse_value(lexer, None, pos):
|
|
yield event
|
|
pos, symbol = next(lexer)
|
|
if symbol == '}':
|
|
break
|
|
if symbol != ',':
|
|
raise UnexpectedSymbol(symbol, pos)
|
|
pos, symbol = next(lexer)
|
|
yield ('end_map', None)
|
|
except StopIteration:
|
|
raise IncompleteJSONError('Incomplete JSON data')
|
|
|
|
def basic_parse(file, buf_size=BUFSIZE):
|
|
lexer = iter(Lexer(file, buf_size))
|
|
for value in parse_value(lexer):
|
|
yield value
|
|
try:
|
|
next(lexer)
|
|
except StopIteration:
|
|
pass
|
|
else:
|
|
raise JSONError('Additional data')
|
|
|
|
def parse(file, buf_size=BUFSIZE):
|
|
return parse_impl(basic_parse(file, buf_size=buf_size))
|
|
|
|
def items(file, prefix):
|
|
return items_impl(parse(file), prefix)
|
|
|
|
def b2s(b):
|
|
return b.decode('utf-8')
|
|
|
|
assert sys.version_info[0] >= 3
|
|
bytetype = bytes
|