135 lines
4.2 KiB
Python
135 lines
4.2 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2020 Google LLC. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Utilities for interacting with streaming logs."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import copy
|
|
|
|
from apitools.base.py import encoding
|
|
from googlecloudsdk.command_lib.logs import stream
|
|
import six
|
|
|
|
LOG_FORMAT = ('value('
|
|
'severity,'
|
|
'timestamp.date("%Y-%m-%d %H:%M:%S %z",tz="LOCAL"), '
|
|
'task_name,'
|
|
'message'
|
|
')')
|
|
_CONTINUE_INTERVAL = 10
|
|
|
|
|
|
def StreamLogs(name, continue_function, polling_interval, task_name,
|
|
allow_multiline):
|
|
"""Returns the streaming log of the job by id.
|
|
|
|
Args:
|
|
name: string id of the entity.
|
|
continue_function: One-arg function that takes in the number of empty polls
|
|
and outputs a boolean to decide if we should keep polling or not. If not
|
|
given, keep polling indefinitely.
|
|
polling_interval: amount of time to sleep between each poll.
|
|
task_name: String name of task.
|
|
allow_multiline: Tells us if logs with multiline messages are okay or not.
|
|
"""
|
|
log_fetcher = stream.LogFetcher(
|
|
filters=_LogFilters(name, task_name=task_name),
|
|
polling_interval=polling_interval,
|
|
continue_interval=_CONTINUE_INTERVAL,
|
|
continue_func=continue_function)
|
|
return _SplitMultiline(log_fetcher.YieldLogs(), allow_multiline)
|
|
|
|
|
|
def _LogFilters(name, task_name):
|
|
"""Returns filters for log fetcher to use.
|
|
|
|
Args:
|
|
name: string id of the entity.
|
|
task_name: String name of task.
|
|
|
|
Returns:
|
|
A list of filters to be passed to the logging API.
|
|
"""
|
|
filters = [
|
|
'resource.type="ml_job"', 'resource.labels.job_id="{0}"'.format(name)
|
|
]
|
|
if task_name:
|
|
filters.append('resource.labels.task_name="{0}"'.format(task_name))
|
|
return filters
|
|
|
|
|
|
def _SplitMultiline(log_generator, allow_multiline=False):
|
|
"""Splits the dict output of logs into multiple lines.
|
|
|
|
Args:
|
|
log_generator: iterator that returns a an ml log in dict format.
|
|
allow_multiline: Tells us if logs with multiline messages are okay or not.
|
|
|
|
Yields:
|
|
Single-line ml log dictionaries.
|
|
"""
|
|
for log in log_generator:
|
|
log_dict = _EntryToDict(log)
|
|
messages = log_dict['message'].splitlines()
|
|
if allow_multiline:
|
|
yield log_dict
|
|
else:
|
|
if not messages:
|
|
messages = ['']
|
|
for message in messages:
|
|
single_line_log = copy.deepcopy(log_dict)
|
|
single_line_log['message'] = message
|
|
yield single_line_log
|
|
|
|
|
|
def _EntryToDict(log_entry):
|
|
"""Converts a log entry to a dictionary."""
|
|
output = {}
|
|
output[
|
|
'severity'] = log_entry.severity.name if log_entry.severity else 'DEFAULT'
|
|
output['timestamp'] = log_entry.timestamp
|
|
output['task_name'] = _GetTaskName(log_entry)
|
|
message = []
|
|
if log_entry.jsonPayload is not None:
|
|
json_data = _ToDict(log_entry.jsonPayload)
|
|
# 'message' contains a free-text message that we want to pull out of the
|
|
# JSON.
|
|
if 'message' in json_data:
|
|
if json_data['message']:
|
|
message.append(json_data['message'])
|
|
elif log_entry.textPayload is not None:
|
|
message.append(six.text_type(log_entry.textPayload))
|
|
output['message'] = ''.join(message)
|
|
return output
|
|
|
|
|
|
def _GetTaskName(log_entry):
|
|
"""Reads the label attributes of the given log entry."""
|
|
resource_labels = {} if not log_entry.resource else _ToDict(
|
|
log_entry.resource.labels)
|
|
return 'unknown_task' if not resource_labels.get(
|
|
'task_name') else resource_labels['task_name']
|
|
|
|
|
|
def _ToDict(message):
|
|
if not message:
|
|
return {}
|
|
if isinstance(message, dict):
|
|
return message
|
|
else:
|
|
return encoding.MessageToDict(message)
|