226 lines
7.2 KiB
Python
226 lines
7.2 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2020 Google LLC. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Provides ddl preprocessing for the Spanner ddl."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import logging
|
|
|
|
from googlecloudsdk.core import exceptions
|
|
|
|
|
|
class DDLSyntaxError(exceptions.Error):
|
|
pass
|
|
|
|
|
|
class DDLParser:
|
|
"""Parser for splitting ddl statements preserving GoogleSQL strings literals.
|
|
|
|
DDLParse has a list of modes. If any mode is selected, control is given to the
|
|
mode. If no mode is selected, the parser trys to enter the first mode that
|
|
could it could enter. The parser handles splitting statements upon ';'.
|
|
|
|
During parsing, a DDL has the following parts:
|
|
* parts that has been processed: emitted or skipped.
|
|
* followed by a buffer that has been matched by the current mode, which
|
|
could be emitted or skipped by a mode. The start index of which is
|
|
mode_start_index_.
|
|
* followed by the next character indexed by next_index_, which could direct
|
|
the parser to enter or exit a mode.
|
|
* followed by the unprocessed character.
|
|
|
|
DDLParser:
|
|
* acts as a default mode.
|
|
* provides utilities uesd by ParserMode to drive the parsing.
|
|
"""
|
|
|
|
def __init__(self, ddl):
|
|
self.ddl_ = ddl
|
|
# Index of the current character to process
|
|
self.next_index_ = 0
|
|
# Mode the parser is in now.
|
|
self.mode_ = None
|
|
# Start index of the buffer that has been matched by a mode or the parser.
|
|
self.mode_start_index_ = 0
|
|
# List of modes. The first one that the parser could enter wins in case of
|
|
# conflict.
|
|
self.modes_ = [
|
|
self.SkippingMode('--', ['\n', '\r']),
|
|
# For all the string modes below, we need to escape \\. If we don't, \\"
|
|
# will trigger mode exiting.
|
|
# Triple double quote.
|
|
# We need escape \", or \""" will be treated trigger mode exiting.
|
|
self.PreservingMode('"""', ['"""'], ['\\"', '\\\\']),
|
|
# Triple single quote.
|
|
# We need escape \', or \''' will be treated trigger mode exiting.
|
|
self.PreservingMode("'''", ["'''"], ["\\'", '\\\\']),
|
|
# Single double quote.
|
|
self.PreservingMode('"', ['"'], ['\\"', '\\\\']),
|
|
# Single single quote.
|
|
self.PreservingMode("'", ["'"], ["\\'", '\\\\']),
|
|
# Single back quote.
|
|
self.PreservingMode('`', ['`'], ['\\`', '\\\\']),
|
|
]
|
|
# A list of statements. A statement is a list of ddl fragments.
|
|
self.statements_ = []
|
|
self.StartNewStatement()
|
|
self.logger_ = logging.getLogger('SpannerDDLParser')
|
|
|
|
def SkippingMode(self, enter_seq, exit_seqs):
|
|
return DDLParserMode(self, enter_seq, exit_seqs, None, True)
|
|
|
|
def PreservingMode(self, enter_seq, exit_seqs, escape_sequences):
|
|
return DDLParserMode(self, enter_seq, exit_seqs, escape_sequences, False)
|
|
|
|
def IsEof(self):
|
|
return self.next_index_ == len(self.ddl_)
|
|
|
|
def Advance(self, l):
|
|
self.next_index_ += l
|
|
|
|
def StartNewStatement(self):
|
|
self.ddl_parts_ = []
|
|
self.statements_.append(self.ddl_parts_)
|
|
|
|
def EmitBuffer(self):
|
|
if self.mode_start_index_ >= self.next_index_:
|
|
# Buffer is empty.
|
|
return
|
|
self.ddl_parts_.append(
|
|
self.ddl_[self.mode_start_index_:self.next_index_])
|
|
self.SkipBuffer()
|
|
self.logger_.debug('emitted: %s', self.ddl_parts_[-1])
|
|
|
|
def SkipBuffer(self):
|
|
self.mode_start_index_ = self.next_index_
|
|
|
|
def EnterMode(self, mode):
|
|
self.logger_.debug('enter mode: %s at index: %d',
|
|
mode.enter_seq_, self.next_index_)
|
|
self.mode_ = mode
|
|
|
|
def ExitMode(self):
|
|
self.logger_.debug('exit mode: %s at index: %d',
|
|
self.mode_.enter_seq_, self.next_index_)
|
|
self.mode_ = None
|
|
|
|
def StartsWith(self, s):
|
|
return self.ddl_[self.next_index_:].startswith(s)
|
|
|
|
def Process(self):
|
|
"""Process the DDL."""
|
|
while not self.IsEof():
|
|
# Delegate to active mode if we have any.
|
|
if self.mode_:
|
|
self.mode_.Process()
|
|
continue
|
|
# Check statement break.
|
|
if self.ddl_[self.next_index_] == ';':
|
|
self.EmitBuffer()
|
|
self.StartNewStatement()
|
|
self.mode_start_index_ += 1
|
|
self.Advance(1)
|
|
continue
|
|
# If we could enter any mode.
|
|
for m in self.modes_:
|
|
if m.TryEnter():
|
|
self.EnterMode(m)
|
|
break
|
|
# No mode is found, consume the character.
|
|
if not self.mode_:
|
|
self.Advance(1)
|
|
|
|
# At the end of parsing, we close the unclosed mode.
|
|
if self.mode_ is not None:
|
|
m = self.mode_
|
|
if not m.is_to_skip_:
|
|
raise DDLSyntaxError(
|
|
'Unclosed %s start at index: %d, %s' %
|
|
(m.enter_seq_, self.mode_start_index_, self.ddl_))
|
|
self.mode_.Exit()
|
|
else:
|
|
self.EmitBuffer()
|
|
self.logger_.debug('ddls: %s', self.statements_)
|
|
res = [''.join(frags) for frags in self.statements_ if frags]
|
|
# See https://stackoverflow.com/q/67857941
|
|
if res and res[-1].isspace():
|
|
return res[:-1]
|
|
return res
|
|
|
|
|
|
class DDLParserMode:
|
|
"""A mode in DDLParser.
|
|
|
|
A mode has one entering sequence, a list of exit sequences and one escape
|
|
sequence. A mode could be:
|
|
* skipping (e.x. comments), which skips the matched text.
|
|
* non-skpping, (e.x. strings), which emits the matched text.
|
|
"""
|
|
|
|
def __init__(self, parser, enter_seq, exit_seqs, escape_sequences,
|
|
is_to_skip):
|
|
self.parser_ = parser
|
|
self.enter_seq_ = enter_seq
|
|
self.exit_seqs_ = exit_seqs
|
|
self.escape_sequences_ = escape_sequences
|
|
self.is_to_skip_ = is_to_skip
|
|
|
|
def TryEnter(self):
|
|
"""Trys to enter into the mode."""
|
|
res = self.parser_.StartsWith(self.enter_seq_)
|
|
if res:
|
|
self.parser_.EmitBuffer()
|
|
self.parser_.Advance(len(self.enter_seq_))
|
|
return res
|
|
|
|
def Exit(self):
|
|
if self.is_to_skip_:
|
|
self.parser_.SkipBuffer()
|
|
else:
|
|
self.parser_.EmitBuffer()
|
|
self.parser_.ExitMode()
|
|
|
|
def FindExitSeqence(self):
|
|
"""Finds a matching exit sequence."""
|
|
for s in self.exit_seqs_:
|
|
if self.parser_.StartsWith(s):
|
|
return s
|
|
return None
|
|
|
|
def Process(self):
|
|
"""Process the ddl at the current parser index."""
|
|
# Put escape sequence into buffer
|
|
if self.escape_sequences_:
|
|
for seq in self.escape_sequences_:
|
|
if self.parser_.StartsWith(seq):
|
|
self.parser_.Advance(len(self.escape_sequences_))
|
|
return
|
|
# Check if we should exit the current mode
|
|
exit_seq = self.FindExitSeqence()
|
|
if not exit_seq:
|
|
self.parser_.Advance(1)
|
|
return
|
|
|
|
# Before exit, put exit_seq into buffer for non skipping mode
|
|
if not self.is_to_skip_:
|
|
self.parser_.Advance(len(exit_seq))
|
|
self.Exit()
|
|
|
|
|
|
def PreprocessDDLWithParser(ddl_text):
|
|
return DDLParser(ddl_text).Process()
|