117 lines
3.7 KiB
Python
117 lines
3.7 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2015 Google LLC. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Code to clean-up transform the JSON description of a dataflow.
|
|
|
|
Example clean-ups:
|
|
|
|
1. Dictionaries representing primitives with a schema will be converted to the
|
|
primitive:
|
|
Ex: { '@type': "https://schema.org/Text", 'value': "Hello" } becomes "Hello"
|
|
2. Fields that are unlikely to be human consumable may be hidden.
|
|
Ex: The serialized_fn field will be hidden, since humans are unlikely to try
|
|
to read the serialized Java object.
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import six
|
|
_EXCLUDED_PROPERTIES = set(['serialized_fn'])
|
|
|
|
_VALUE_RETRIEVERS = {
|
|
'http://schema.org/Boolean': lambda value: value.boolean_value,
|
|
'http://schema.org/Text': lambda value: value.string_value,
|
|
}
|
|
|
|
|
|
def _ExtractStep(step_msg):
|
|
"""Converts a Step message into a dict with more sensible structure.
|
|
|
|
Args:
|
|
step_msg: A Step message.
|
|
Returns:
|
|
A dict with the cleaned up information.
|
|
"""
|
|
properties = {}
|
|
if step_msg.properties:
|
|
for prop in step_msg.properties.additionalProperties:
|
|
if prop.key not in _EXCLUDED_PROPERTIES:
|
|
properties[prop.key] = _ExtractValue(prop.value)
|
|
|
|
return {
|
|
'kind': step_msg.kind,
|
|
'name': step_msg.name,
|
|
'properties': properties,
|
|
}
|
|
|
|
|
|
def _ExtractDecoratedObject(proto):
|
|
"""Extracts an object from the proto representation of the JSON object.
|
|
|
|
Args:
|
|
proto: A protocol representation of a JSON object.
|
|
Returns:
|
|
A clean representation of the JSON object. If it was an object
|
|
representing a primitive, then that primitive.
|
|
"""
|
|
prop_dict = {}
|
|
|
|
for prop in proto.object_value.properties:
|
|
prop_dict[prop.key] = prop.value
|
|
|
|
ty = prop_dict.get('@type', None)
|
|
retriever = ty and _VALUE_RETRIEVERS.get(ty.string_value, None)
|
|
if not ty or not retriever:
|
|
# No @type means this wasn't an object-wrapped leaf.
|
|
# No retriever means that this was created "by us", so we just want to
|
|
# output the properties. We leave the @type around since it has semantic
|
|
# value.
|
|
return dict((k, _ExtractValue(v)) for k, v in six.iteritems(prop_dict))
|
|
|
|
# If we have a retriever,we can throw away everything except the value, and
|
|
# convert it to a more reasonable type. This is important since it cleans
|
|
# up the printed representation significantly.
|
|
try:
|
|
return retriever(prop_dict['value'])
|
|
except KeyError:
|
|
return 'Missing value for type [{0}] in proto [{1}]'.format(
|
|
ty.string_value, proto)
|
|
|
|
|
|
def _ExtractValue(proto):
|
|
# Values are weird, because we actually wrap JSON objects around real
|
|
# JSON values.
|
|
if proto.object_value:
|
|
return _ExtractDecoratedObject(proto)
|
|
if proto.array_value:
|
|
return [_ExtractValue(v) for v in proto.array_value.entries]
|
|
|
|
if proto.string_value:
|
|
return proto.string_value
|
|
|
|
return 'No decoding provided for: {0}'.format(proto)
|
|
|
|
|
|
def ExtractSteps(job):
|
|
"""Extract the cleaned up step dictionary for all the steps in the job.
|
|
|
|
Args:
|
|
job: A Job message.
|
|
Returns:
|
|
A list of cleaned up step dictionaries.
|
|
"""
|
|
return [_ExtractStep(step) for step in job.steps]
|