feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*- #
# Copyright 2017 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command group for ml speech."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.GA,
base.ReleaseTrack.BETA,
base.ReleaseTrack.ALPHA)
class Speech(base.Group):
"""Use Google Cloud Speech to get transcripts of audio.
Use Google Cloud Speech to get transcripts of recorded audio, and to
get information about long-running speech recognition operations. For
audio under 60 seconds, use:
$ {command} recognize
For audio 60 seconds or longer, use:
$ {command} recognize-long-running
For more information about the Google Cloud Speech API, refer to this
guide: https://cloud.google.com/speech/docs/
"""

View File

@@ -0,0 +1,253 @@
examples:
recognize: |
To get a transcript of an audio file 'my-recording.wav':
$ {command} 'my-recording.wav' --language-code=en-US
To get a transcript of an audio file in bucket 'gs://bucket/myaudio' with a
custom sampling rate and encoding that uses hints and filters profanity:
$ {command} 'gs://bucket/myaudio' --language-code=es-ES --sample-rate=2200 --hints=Bueno --encoding=OGG_OPUS --filter-profanity
args_v1:
- api_field: audio
arg_name: audio
help_text: |
The location of the audio file to transcribe. Must be a local path or a
Google Cloud Storage URL (in the format gs://bucket/object).
is_positional: true
processor: googlecloudsdk.command_lib.ml.speech.util:GetAudioHook:version=v1
args_v1p1beta1:
- api_field: audio
arg_name: audio
help_text: |
The location of the audio file to transcribe. Must be a local path or a
Google Cloud Storage URL (in the format gs://bucket/object).
is_positional: true
processor: googlecloudsdk.command_lib.ml.speech.util:GetAudioHook:version=v1p1beta1
- api_field: config.enableWordConfidence
arg_name: include-word-confidence
help_text: |
Include a list of words and the confidence for those words in the top
result.
- group:
params:
- api_field: config.diarizationSpeakerCount
arg_name: diarization-speaker-count
type: int
help_text: |
Estimated number of speakers in the conversation being recognized.
- api_field: config.enableSpeakerDiarization
arg_name: enable-speaker-diarization
help_text: |
Enable speaker detection for each recognized word in the top
alternative of the recognition result using an integer speaker_tag
provided in the WordInfo.
type: bool
required: true
- api_field: config.alternativeLanguageCodes
arg_name: additional-language-codes
metavar: language_code
repeated: true
help_text: |
The BCP-47 language tags of other languages that the speech may be
in. Up to 3 can be provided.
If alternative languages are listed, recognition result will contain
recognition in the most likely language detected including the main
language-code.
args_v1p1beta1_alpha_track: # available only in 'gcloud alpha'
- arg_name: enable-automatic-punctuation
api_field: config.enableAutomaticPunctuation
help_text: |
Adds punctuation to recognition result hypotheses.
- group:
help_text: Description of audio data to be recognized.
params:
- arg_name: interaction-type
api_field: config.metadata.interactionType
help_text: |
Determining the interaction type in the conversation.
choices:
- arg_value: discussion
enum_value: DISCUSSION
help_text: Multiple people in a conversation or discussion.
- arg_value: phone-call
enum_value: PHONE_CALL
help_text: A phone-call or video-conference in which two or more people, who are not in the same room, are actively participating.
- arg_value: voicemail
enum_value: VOICEMAIL
help_text: A recorded message intended for another person to listen to.
- arg_value: professionally-produced
enum_value: PROFESSIONALLY_PRODUCED
help_text: Professionally produced audio (eg. TV Show, Podcast).
- arg_value: voice-search
enum_value: VOICE_SEARCH
help_text: Transcribe spoken questions and queries into text.
- arg_value: voice-command
enum_value: VOICE_COMMAND
help_text: Transcribe voice commands, such as for controlling a device.
- arg_value: dictation
enum_value: DICTATION
help_text: Transcribe speech to text to create a written document, such as a text-message, email or report.
- arg_name: naics-code
api_field: config.metadata.industryNaicsCodeOfAudio
type: int
help_text: |
The industry vertical to which this speech recognition request most closely applies.
- arg_name: microphone-distance
api_field: config.metadata.microphoneDistance
help_text: |
The distance at which the audio device is placed to record the conversation.
choices:
- arg_value: nearfield
enum_value: NEARFIELD
help_text: The speaker is within 1 meter of the microphone.
- arg_value: midfield
enum_value: MIDFIELD
help_text: The speaker is within 3 meters of the microphone.
- arg_value: farfield
enum_value: FARFIELD
help_text: The speaker is more than 3 meters away from the microphone.
- arg_name: original-media-type
api_field: config.metadata.originalMediaType
help_text: |
The media type of the original audio conversation.
choices:
- arg_value: audio
enum_value: AUDIO
help_text: The speech data is an audio recording.
- arg_value: video
enum_value: VIDEO
help_text: The speech data originally recorded on a video.
- arg_name: recording-device-type
api_field: config.metadata.recordingDeviceType
help_text: |
The device type through which the original audio was recorded on.
choices:
- arg_value: smartphone
enum_value: SMARTPHONE
help_text: Speech was recorded on a smartphone.
- arg_value: pc
enum_value: PC
help_text: Speech was recorded using a personal computer or tablet.
- arg_value: phone-line
enum_value: PHONE_LINE
help_text: Speech was recorded over a phone line.
- arg_value: vehicle
enum_value: VEHICLE
help_text: Speech was recorded in a vehicle.
- arg_value: outdoor
enum_value: OTHER_OUTDOOR_DEVICE
help_text: Speech was recorded outdoors.
- arg_value: indoor
enum_value: OTHER_INDOOR_DEVICE
help_text: Speech was recorded indoors.
- arg_name: recording-device-name
api_field: config.metadata.recordingDeviceName
help_text: |
The device used to make the recording. Examples: `Nexus 5X`, `Polycom SoundStation IP 6000`
- arg_name: original-mime-type
api_field: config.metadata.originalMimeType
help_text: |
Mime type of the original audio file. Examples: `audio/m4a`, `audio/mp3`.
- arg_name: audio-topic
api_field: config.metadata.audioTopic
help_text: |
Description of the content, e.g. "Recordings of federal supreme court hearings from 2012".
args:
- group:
help_text: Audio channel settings.
params:
- arg_name: separate-channel-recognition
api_field: config.enableSeparateRecognitionPerChannel
required: true
default: false
help_text: |
Recognition result will contain a `channel_tag` field to state which channel that
result belongs to. If this is not true, only the first channel will be recognized.
- arg_name: audio-channel-count
api_field: config.audioChannelCount
required: true
type: int
help_text: |
The number of channels in the input audio data. Set this for
separate-channel-recognition. Valid values are:
1)LINEAR16 and FLAC are `1`-`8`
2)OGG_OPUS are `1`-`254`
3) MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
- group:
mutex: true
required: true
params:
- api_field: config.languageCode
arg_name: language-code
help_text: |
The language of the supplied audio as a BCP-47
(https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. Example:
"en-US". See https://cloud.google.com/speech/docs/languages for a list
of the currently supported language codes.
- api_field: config.languageCode
arg_name: language
hidden: true
action:
deprecated:
warn: Flag {flag_name} is deprecated. Use --language-code instead.
help_text: |
The language of the supplied audio as a BCP-47
(https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. Example:
"en-US". See https://cloud.google.com/speech/docs/languages for a list
of the currently supported language codes.
- api_field: config.speechContexts.phrases
arg_name: hints
default: []
help_text: |
A list of strings containing word and phrase "hints" so that the speech
recognition is more likely to recognize them. This can be used to
improve the accuracy for specific words and phrases, for example, if
specific commands are typically spoken by the user. This can also be
used to add additional words to the vocabulary of the recognizer. See
https://cloud.google.com/speech/limits#content.
- api_field: config.maxAlternatives
arg_name: max-alternatives
default: 1
help_text: |
Maximum number of recognition hypotheses to be returned. The server
may return fewer than max_alternatives. Valid values are 0-30. A value
of 0 or 1 will return a maximum of one.
- api_field: config.profanityFilter
arg_name: filter-profanity
help_text: |
If True, the server will attempt to filter out profanities, replacing
all but the initial character in each filtered word with asterisks,
e.g. ```f***```.
- api_field: config.encoding
arg_name: encoding
default: encoding-unspecified
help_text: |
The type of encoding of the file. Required if the file format is not
WAV or FLAC.
- api_field: config.sampleRateHertz
arg_name: sample-rate
help_text: |
The sample rate in Hertz. For best results, set the sampling rate of
the audio source to 16000 Hz. If that's not possible, use the native
sample rate of the audio source (instead of re-sampling).
- api_field: config.enableWordTimeOffsets
arg_name: include-word-time-offsets
help_text: |
If True, the top result includes a list of words with the start and
end time offsets (timestamps) for those words. If False, no word-level
time offset information is returned.

View File

@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command group for ml speech locations operations."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
@base.Hidden
class Locations(base.Group):
"""Manage Google cloud speech-to-text locations."""

View File

@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text locations describe command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flags_v2
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Create(base.Command):
"""Obtain details about Speech-to-text location."""
@staticmethod
def Args(parser):
"""Register flags for this command."""
flags_v2.AddLocationPositionalArgToParser(parser)
def Run(self, args):
location = args.CONCEPTS.location.Parse()
speech_client = client.SpeechV2Client()
return speech_client.GetLocation(location)

View File

@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text locations list command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Create(base.ListCommand):
"""List Speech-to-text locations."""
@staticmethod
def Args(parser):
parser.display_info.AddUriFunc(lambda location: location.name)
def Run(self, args):
speech_client = client.SpeechV2Client()
return speech_client.ListLocations(
filter_str=args.filter, limit=args.limit, page_size=args.page_size
)

View File

@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*- #
# Copyright 2017 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command group for ml speech operations."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
class Operations(base.Group):
"""Interact with Google Cloud Speech operations.
Get information about Google Cloud Speech long-running operations.
"""

View File

@@ -0,0 +1,22 @@
- help_text:
brief: Get description of a long-running speech recognition operation.
description: |
Get information about a long-running speech recognition operation.
examples: |
To fetch details for the operation '12345':
$ {command} 12345
request:
collection: speech.operations
api_version: v1
resource_method_params:
name: '{operationsId}'
arguments:
resource:
help_text: The ID of the operation to describe.
spec: !REF googlecloudsdk.command_lib.ml.speech.resources:operation
output:
format: json

View File

@@ -0,0 +1,28 @@
- help_text:
brief: Poll long-running speech recognition operation until it completes.
description: |
Poll a long-running speech recognition operation until it completes. When
the operation is complete, this command will display the results of the
transcription.
examples: |
To wait for the result of operation '12345':
$ {command} 12345
request:
collection: speech.operations
api_version: v1
arguments:
resource:
help_text: The ID of the operation to wait for.
spec: !REF googlecloudsdk.command_lib.ml.speech.resources:operation
async:
collection: speech.operations
operation_get_method_params:
name: operationsId
result_attribute: response
output:
format: json

View File

@@ -0,0 +1,129 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Recognize speech in provided audio."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.util import apis
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flags
from googlecloudsdk.command_lib.ml.speech import util
@base.ReleaseTracks(base.ReleaseTrack.GA)
class RecognizeGA(base.Command):
"""Get transcripts of short (less than 60 seconds) audio from an audio file."""
detailed_help = {
'DESCRIPTION':
"""\
Get a transcript of an audio file that is less than 60 seconds. You can use
an audio file that is on your local drive or a Google Cloud Storage URL.
If the audio is longer than 60 seconds, you will get an error. Please use
`{parent_command} recognize-long-running` instead.
""",
'EXAMPLES':
"""\
To get a transcript of an audio file 'my-recording.wav':
$ {command} 'my-recording.wav' --language-code=en-US
To get a transcript of an audio file in bucket 'gs://bucket/myaudio' with a
custom sampling rate and encoding that uses hints and filters profanity:
$ {command} 'gs://bucket/myaudio' \\
--language-code=es-ES --sample-rate=2200 --hints=Bueno \\
--encoding=OGG_OPUS --filter-profanity
""",
'API REFERENCE':
"""\
This command uses the speech/v1 API. The full documentation for this API
can be found at: https://cloud.google.com/speech-to-text/docs/quickstart-protocol
"""
}
API_VERSION = 'v1'
flags_mapper = flags.RecognizeArgsToRequestMapper()
@classmethod
def Args(cls, parser):
parser.display_info.AddFormat('json')
cls.flags_mapper.AddRecognizeArgsToParser(parser, cls.API_VERSION)
def MakeRequest(self, args, messages):
return messages.RecognizeRequest(
audio=util.GetRecognitionAudioFromPath(args.audio, self.API_VERSION),
config=self.flags_mapper.MakeRecognitionConfig(args, messages))
def Run(self, args):
"""Run 'ml speech recognize'.
Args:
args: argparse.Namespace, The arguments that this command was invoked
with.
Returns:
Nothing.
"""
client = apis.GetClientInstance(util.SPEECH_API, self.API_VERSION)
self._request = self.MakeRequest(args, client.MESSAGES_MODULE)
return client.speech.Recognize(self._request)
def Epilog(self, unused_resources_were_displayed):
util.MaybePrintSttUiLink(self._request)
@base.ReleaseTracks(base.ReleaseTrack.BETA)
class RecognizeBeta(RecognizeGA):
__doc__ = RecognizeGA.__doc__
detailed_help = RecognizeGA.detailed_help.copy()
API_VERSION = 'v1p1beta1'
@classmethod
def Args(cls, parser):
super(RecognizeBeta, RecognizeBeta).Args(parser)
cls.flags_mapper.AddBetaRecognizeArgsToParser(parser)
def MakeRequest(self, args, messages):
request = super(RecognizeBeta, self).MakeRequest(args, messages)
self.flags_mapper.UpdateBetaArgsInRecognitionConfig(args, request.config)
return request
RecognizeBeta.detailed_help['API REFERENCE'] = """\
This command uses the speech/v1p1beta1 API. The full documentation for this API
can be found at: https://cloud.google.com/speech-to-text/docs/quickstart-protocol
"""
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class RecognizeAlpha(RecognizeBeta):
__doc__ = RecognizeBeta.__doc__
API_VERSION = 'v1p1beta1'
@classmethod
def Args(cls, parser):
super(RecognizeAlpha, RecognizeAlpha).Args(parser)
cls.flags_mapper.AddAlphaRecognizeArgsToParser(parser, cls.API_VERSION)
def MakeRequest(self, args, messages):
request = super(RecognizeAlpha, self).MakeRequest(args, messages)
self.flags_mapper.UpdateAlphaArgsInRecognitionConfig(args, request.config)
return request

View File

@@ -0,0 +1,160 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Recognize speech in provided audio."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.util import apis
from googlecloudsdk.api_lib.util import waiter
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flags
from googlecloudsdk.command_lib.ml.speech import util
# As per https://cloud.google.com/speech-to-text/docs/basics, async recognition
# accepts audios of 480 minutes in duration. Since recognition can run up to 3x
# the audio length setting timeout at that limit.
OPERATION_TIMEOUT_MS = 3 * 480 * 60 * 1000
@base.ReleaseTracks(base.ReleaseTrack.GA)
class RecognizeLongRunningGA(base.Command):
"""Get transcripts of longer audio from an audio file."""
detailed_help = {
'DESCRIPTION':
"""\
Get a transcript of audio up to 80 minutes in length. If the audio is
under 60 seconds, you may also use `{parent_command} recognize` to
analyze it.
""",
'EXAMPLES':
"""\
To block the command from completing until analysis is finished, run:
$ {command} AUDIO_FILE --language-code=LANGUAGE_CODE --sample-rate=SAMPLE_RATE
You can also receive an operation as the result of the command by running:
$ {command} AUDIO_FILE --language-code=LANGUAGE_CODE --sample-rate=SAMPLE_RATE --async
This will return information about an operation. To get information about the
operation, run:
$ {parent_command} operations describe OPERATION_ID
To poll the operation until it's complete, run:
$ {parent_command} operations wait OPERATION_ID
""",
'API REFERENCE':
"""\
This command uses the speech/v1 API. The full documentation for this API
can be found at: https://cloud.google.com/speech-to-text/docs/quickstart-protocol
""",
}
API_VERSION = 'v1'
flags_mapper = flags.RecognizeArgsToRequestMapper()
@classmethod
def Args(cls, parser):
parser.display_info.AddFormat('json')
cls.flags_mapper.AddRecognizeArgsToParser(parser, cls.API_VERSION)
# LRO specific flags.
base.ASYNC_FLAG.AddToParser(parser)
parser.add_argument(
'--output-uri',
type=util.ValidateOutputUri,
help='Location to which the results should be written. Must be a '
'Google Cloud Storage URI.')
def MakeRequest(self, args, messages):
request = messages.LongRunningRecognizeRequest(
audio=util.GetRecognitionAudioFromPath(args.audio, self.API_VERSION),
config=self.flags_mapper.MakeRecognitionConfig(args, messages))
if args.output_uri is not None:
request.outputConfig = messages.TranscriptOutputConfig(
gcsUri=args.output_uri)
return request
def Run(self, args):
"""Run 'ml speech recognize'.
Args:
args: argparse.Namespace, The arguments that this command was invoked
with.
Returns:
Nothing.
"""
client = apis.GetClientInstance(util.SPEECH_API, self.API_VERSION)
self._request = self.MakeRequest(args, client.MESSAGES_MODULE)
operation = client.speech.Longrunningrecognize(self._request)
if args.async_:
return operation
return waiter.WaitFor(
waiter.CloudOperationPollerNoResources(client.operations, lambda x: x),
operation.name,
'Waiting for [{}] to complete. This may take several minutes.'.format(
operation.name),
wait_ceiling_ms=OPERATION_TIMEOUT_MS)
def Epilog(self, unused_resources_were_displayed):
util.MaybePrintSttUiLink(self._request)
@base.ReleaseTracks(base.ReleaseTrack.BETA)
class RecognizeLongRunningBeta(RecognizeLongRunningGA):
__doc__ = RecognizeLongRunningGA.__doc__
detailed_help = RecognizeLongRunningGA.detailed_help.copy()
API_VERSION = 'v1p1beta1'
@classmethod
def Args(cls, parser):
super(RecognizeLongRunningBeta, RecognizeLongRunningBeta).Args(parser)
cls.flags_mapper.AddBetaRecognizeArgsToParser(parser)
def MakeRequest(self, args, messages):
request = super(RecognizeLongRunningBeta, self).MakeRequest(args, messages)
self.flags_mapper.UpdateBetaArgsInRecognitionConfig(args, request.config)
return request
RecognizeLongRunningBeta.detailed_help['API REFERENCE'] = """\
This command uses the speech/v1p1beta1 API. The full documentation for this API
can be found at: https://cloud.google.com/speech-to-text/docs/quickstart-protocol
"""
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class RecognizeLongRunningAlpha(RecognizeLongRunningBeta):
__doc__ = RecognizeLongRunningBeta.__doc__
API_VERSION = 'v1p1beta1'
@classmethod
def Args(cls, parser):
super(RecognizeLongRunningAlpha, RecognizeLongRunningAlpha).Args(parser)
cls.flags_mapper.AddAlphaRecognizeArgsToParser(parser, cls.API_VERSION)
def MakeRequest(self, args, messages):
request = super(RecognizeLongRunningAlpha, self).MakeRequest(args, messages)
self.flags_mapper.UpdateAlphaArgsInRecognitionConfig(args, request.config)
return request

View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command group for ml speech operations."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Recognizers(base.Group):
"""Manage Google cloud speech-to-text recognizers."""

View File

@@ -0,0 +1,88 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text recognizers create command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flag_validations
from googlecloudsdk.command_lib.ml.speech import flags_v2
from googlecloudsdk.core import log
@base.UniverseCompatible
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Create(base.Command):
"""Create a speech-to-text recognizer."""
def ValidateCreateRecognizerFlags(self, args):
"""Validates create flags."""
flag_validations.ValidateSpeakerDiarization(args)
flag_validations.ValidateDecodingConfig(args)
@staticmethod
def Args(parser):
"""Register flags for this command."""
flags_v2.AddAllFlagsToParser(
parser, require_base_recognizer_attributes=True, use_store_true=True
)
def Run(self, args):
recognizer = args.CONCEPTS.recognizer.Parse()
self.ValidateCreateRecognizerFlags(args)
speech_client = client.SpeechV2Client()
is_async = args.async_
recognition_config, _ = speech_client.InitializeRecognitionConfig(
args.model, args.language_codes
)
recognition_config, _ = speech_client.InitializeDecodingConfigFromArgs(
recognition_config,
args,
)
recognition_config.features, _ = (
speech_client.InitializeASRFeaturesFromArgs(args)
)
operation = speech_client.CreateRecognizer(
recognizer,
args.display_name,
args.model,
args.language_codes,
recognition_config,
)
if is_async:
log.CreatedResource(
operation.name, kind='speech recognizer', is_async=True
)
return operation
resource = speech_client.WaitForRecognizerOperation(
location=recognizer.Parent().Name(),
operation_ref=speech_client.GetOperationRef(operation),
message='waiting for recognizer [{}] to be created'.format(
recognizer.RelativeName()
),
)
log.CreatedResource(resource.name, kind='speech recognizer')
return resource

View File

@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text recognizers delete command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flags_v2
from googlecloudsdk.core import log
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Delete(base.Command):
"""Delete a Speech-to-text recognizer."""
@staticmethod
def Args(parser):
"""Register flags for this command."""
flags_v2.AddRecognizerArgToParser(parser)
base.ASYNC_FLAG.AddToParser(parser)
base.ASYNC_FLAG.SetDefault(parser, False)
def Run(self, args):
recognizer = args.CONCEPTS.recognizer.Parse()
speech_client = client.SpeechV2Client()
is_async = args.async_
operation = speech_client.DeleteRecognizer(recognizer)
if is_async:
log.DeletedResource(
operation.name, kind='speech recognizer', is_async=True)
return operation
resource = speech_client.WaitForRecognizerOperation(
location=recognizer.Parent().Name(),
operation_ref=speech_client.GetOperationRef(operation),
message='waiting for recognizer [{}] to be deleted'.format(
recognizer.RelativeName()))
log.DeletedResource(resource.name, kind='speech recognizer')
return resource

View File

@@ -0,0 +1,38 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text recognizers describe command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flags_v2
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Create(base.Command):
"""Obtain details about Speech-to-text recognizer."""
@staticmethod
def Args(parser):
"""Register flags for this command."""
flags_v2.AddRecognizerArgToParser(parser)
def Run(self, args):
recognizer = args.CONCEPTS.recognizer.Parse()
speech_client = client.SpeechV2Client()
return speech_client.GetRecognizer(recognizer)

View File

@@ -0,0 +1,44 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text recognizers list command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flags_v2
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Create(base.ListCommand):
"""List Speech-to-text recognizers."""
@staticmethod
def Args(parser):
"""Register flags for this command."""
flags_v2.AddLocationArgToParser(parser)
parser.display_info.AddFormat(
'table(name.segment(-1):label=NAME,'
'createTime.date(tz=LOCAL),'
'updateTime.date(tz=LOCAL),'
'model,'
'language_codes.join(sep=","))')
def Run(self, args):
location = args.CONCEPTS.location.Parse()
speech_client = client.SpeechV2Client()
return speech_client.ListRecognizers(location, limit=args.limit)

View File

@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*- #
# Copyright 2024 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text recognizers run short audio command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flag_validations
from googlecloudsdk.command_lib.ml.speech import flags_v2
@base.UniverseCompatible
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class RunBatch(base.Command):
"""Get transcripts of long (more than 60 seconds) audio from a gcloud uri."""
def ValidateRunBatchFlags(self, args):
"""Validates run batch flags."""
flag_validations.ValidateDecodingConfig(args)
flag_validations.ValidateAudioSource(args, batch=True)
@staticmethod
def Args(parser):
"""Register flags for this command."""
flags_v2.AddRecognizeRequestFlagsToParser(parser, add_async_flag=True)
def Run(self, args):
resource = args.CONCEPTS.recognizer.Parse()
speech_client = client.SpeechV2Client()
self.ValidateRunBatchFlags(args)
recognition_config_update_mask = []
recognition_config, recognition_config_update_mask = (
speech_client.InitializeRecognitionConfig(
args.model, args.language_codes, recognition_config_update_mask
)
)
recognition_config, recognition_config_update_mask = (
speech_client.InitializeDecodingConfigFromArgs(
recognition_config,
args,
default_to_auto_decoding_config=True,
update_mask=recognition_config_update_mask,
)
)
recognition_config.features, recognition_config_update_mask = (
speech_client.InitializeASRFeaturesFromArgs(
args, update_mask=recognition_config_update_mask
)
)
if args.hint_phrases or args.hint_phrase_sets:
recognition_config.adaptation, recognition_config_update_mask = (
speech_client.InitializeAdaptationConfigFromArgs(
args, update_mask=recognition_config_update_mask
)
)
operation = speech_client.RunBatch(
resource,
args.audio,
recognition_config,
update_mask=recognition_config_update_mask,
)
if args.async_:
return operation
return speech_client.WaitForBatchRecognizeOperation(
location=resource.Parent().Name(),
operation_ref=operation.name,
message='waiting for batch recognize to finish',
)

View File

@@ -0,0 +1,82 @@
# -*- coding: utf-8 -*- #
# Copyright 2024 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text recognizers run short audio command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flag_validations
from googlecloudsdk.command_lib.ml.speech import flags_v2
@base.UniverseCompatible
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class RunShort(base.Command):
"""Get transcripts of short (less than 60 seconds) audio from an audio file."""
def ValidateRunShortFlags(self, args):
"""Validates run short flags."""
flag_validations.ValidateDecodingConfig(args)
flag_validations.ValidateAudioSource(args)
@staticmethod
def Args(parser):
"""Register flags for this command."""
flags_v2.AddRecognizeRequestFlagsToParser(parser)
def Run(self, args):
resource = args.CONCEPTS.recognizer.Parse()
speech_client = client.SpeechV2Client()
self.ValidateRunShortFlags(args)
recognition_config_update_mask = []
recognition_config, recognition_config_update_mask = (
speech_client.InitializeRecognitionConfig(
args.model, args.language_codes, recognition_config_update_mask
)
)
recognition_config, recognition_config_update_mask = (
speech_client.InitializeDecodingConfigFromArgs(
recognition_config,
args,
default_to_auto_decoding_config=True,
update_mask=recognition_config_update_mask,
)
)
recognition_config.features, recognition_config_update_mask = (
speech_client.InitializeASRFeaturesFromArgs(
args, update_mask=recognition_config_update_mask
)
)
if args.hint_phrases or args.hint_phrase_sets:
recognition_config.adaptation, recognition_config_update_mask = (
speech_client.InitializeAdaptationConfigFromArgs(
args, update_mask=recognition_config_update_mask
)
)
return speech_client.RunShort(
resource,
args.audio,
recognition_config,
recognition_config_update_mask,
)

View File

@@ -0,0 +1,101 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cloud Speech-to-text recognizers update command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.ml.speech import client
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.ml.speech import flag_validations
from googlecloudsdk.command_lib.ml.speech import flags_v2
from googlecloudsdk.core import log
@base.UniverseCompatible
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Update(base.Command):
"""Update a Speech-to-text recognizer."""
def ValidateUpdateRecognizerFlags(self, args):
"""Validates update flags."""
flag_validations.ValidateDecodingConfig(args)
@staticmethod
def Args(parser):
"""Register flags for this command."""
flags_v2.AddAllFlagsToParser(parser)
def Run(self, args):
recognizer = args.CONCEPTS.recognizer.Parse()
speech_client = client.SpeechV2Client()
is_async = args.async_
self.ValidateUpdateRecognizerFlags(args)
recognition_config_update_mask = []
recognition_config, recognition_config_update_mask = (
speech_client.InitializeRecognitionConfig(
args.model, args.language_codes, recognition_config_update_mask
)
)
recognition_config, recognition_config_update_mask = (
speech_client.InitializeDecodingConfigFromArgs(
recognition_config,
args,
update_mask=recognition_config_update_mask,
)
)
recognition_config.features, recognition_config_update_mask = (
speech_client.InitializeASRFeaturesFromArgs(
args, update_mask=recognition_config_update_mask
)
)
recognition_config_update_mask = [
'default_recognition_config.' + field
for field in recognition_config_update_mask
]
operation = speech_client.UpdateRecognizer(
recognizer,
args.display_name,
args.model,
args.language_codes,
recognition_config,
update_mask=recognition_config_update_mask,
)
if is_async:
log.UpdatedResource(
operation.name, kind='speech recognizer', is_async=True
)
return operation
resource = speech_client.WaitForRecognizerOperation(
location=recognizer.Parent().Name(),
operation_ref=speech_client.GetOperationRef(operation),
message='waiting for recognizer [{}] to be updated'.format(
recognizer.RelativeName()
),
)
log.UpdatedResource(resource.name, kind='speech recognizer')
return resource