feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,35 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from google.cloud.pubsub_v1 import types
from google.cloud.pubsub_v1 import publisher
from google.cloud.pubsub_v1 import subscriber
from google.pubsub_v1.services import schema_service
class PublisherClient(publisher.Client):
__doc__ = publisher.Client.__doc__
class SubscriberClient(subscriber.Client):
__doc__ = subscriber.Client.__doc__
class SchemaServiceClient(schema_service.client.SchemaServiceClient):
__doc__ = schema_service.client.SchemaServiceClient.__doc__
__all__ = ("types", "PublisherClient", "SubscriberClient", "SchemaServiceClient")

View File

@@ -0,0 +1,71 @@
# Copyright 2019, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import functools
def add_methods(source_class, blacklist=()):
"""Add wrapped versions of the `api` member's methods to the class.
Any methods passed in `blacklist` are not added.
Additionally, any methods explicitly defined on the wrapped class are
not added.
"""
def wrap(wrapped_fx, lookup_fx):
"""Wrap a GAPIC method; preserve its name and docstring."""
# If this is a static or class method, then we do *not*
# send self as the first argument.
#
# For instance methods, we need to send self.api rather
# than self, since that is where the actual methods were declared.
if isinstance(lookup_fx, (classmethod, staticmethod)):
fx = lambda *a, **kw: wrapped_fx(*a, **kw) # noqa
return staticmethod(functools.wraps(wrapped_fx)(fx))
else:
fx = lambda self, *a, **kw: wrapped_fx(self.api, *a, **kw) # noqa
return functools.wraps(wrapped_fx)(fx)
def actual_decorator(cls):
# Reflectively iterate over most of the methods on the source class
# (the GAPIC) and make wrapped versions available on this client.
for name in dir(source_class):
# Ignore all private and magic methods.
if name.startswith("_"):
continue
# Ignore anything on our blacklist.
if name in blacklist:
continue
# Retrieve the attribute, and ignore it if it is not callable.
attr = getattr(source_class, name)
if not callable(attr):
continue
# Add a wrapper method to this object.
lookup_fx = source_class.__dict__[name]
fx = wrap(attr, lookup_fx)
setattr(cls, name, fx)
# Return the augmented class.
return cls
# Simply return the actual decorator; this is returned from this method
# and actually used to decorate the class.
return actual_decorator

View File

@@ -0,0 +1,20 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from concurrent.futures import TimeoutError
__all__ = ("TimeoutError",)

View File

@@ -0,0 +1,56 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import concurrent.futures
from typing import Any, NoReturn, Optional
import google.api_core.future
class Future(concurrent.futures.Future, google.api_core.future.Future):
"""Encapsulation of the asynchronous execution of an action.
This object is returned from asychronous Pub/Sub calls, and is the
interface to determine the status of those calls.
This object should not be created directly, but is returned by other
methods in this library.
"""
def running(self) -> bool:
"""Return ``True`` if the associated Pub/Sub action has not yet completed."""
return not self.done()
def set_running_or_notify_cancel(self) -> NoReturn:
raise NotImplementedError(
"Only used by executors from `concurrent.futures` package."
)
def set_result(self, result: Any):
"""Set the return value of work associated with the future.
Do not use this method, it should only be used internally by the library and its
unit tests.
"""
return super().set_result(result=result)
def set_exception(self, exception: Optional[BaseException]):
"""Set the result of the future as being the given exception.
Do not use this method, it should only be used internally by the library and its
unit tests.
"""
return super().set_exception(exception=exception)

View File

@@ -0,0 +1,286 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.pubsub.v1;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/protobuf/empty.proto";
option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.PubSub.V1";
option go_package = "google.golang.org/genproto/googleapis/pubsub/v1;pubsub";
option java_multiple_files = true;
option java_outer_classname = "SchemaProto";
option java_package = "com.google.pubsub.v1";
option php_namespace = "Google\\Cloud\\PubSub\\V1";
option ruby_package = "Google::Cloud::PubSub::V1";
// Service for doing schema-related operations.
service SchemaService {
option (google.api.default_host) = "pubsub.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform,"
"https://www.googleapis.com/auth/pubsub";
// Creates a schema.
rpc CreateSchema(CreateSchemaRequest) returns (Schema) {
option (google.api.http) = {
post: "/v1/{parent=projects/*}/schemas"
body: "schema"
};
option (google.api.method_signature) = "parent,schema,schema_id";
}
// Gets a schema.
rpc GetSchema(GetSchemaRequest) returns (Schema) {
option (google.api.http) = {
get: "/v1/{name=projects/*/schemas/*}"
};
option (google.api.method_signature) = "name";
}
// Lists schemas in a project.
rpc ListSchemas(ListSchemasRequest) returns (ListSchemasResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*}/schemas"
};
option (google.api.method_signature) = "parent";
}
// Deletes a schema.
rpc DeleteSchema(DeleteSchemaRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1/{name=projects/*/schemas/*}"
};
option (google.api.method_signature) = "name";
}
// Validates a schema.
rpc ValidateSchema(ValidateSchemaRequest) returns (ValidateSchemaResponse) {
option (google.api.http) = {
post: "/v1/{parent=projects/*}/schemas:validate"
body: "*"
};
option (google.api.method_signature) = "parent,schema";
}
// Validates a message against a schema.
rpc ValidateMessage(ValidateMessageRequest)
returns (ValidateMessageResponse) {
option (google.api.http) = {
post: "/v1/{parent=projects/*}/schemas:validateMessage"
body: "*"
};
}
}
// A schema resource.
message Schema {
option (google.api.resource) = {
type: "pubsub.googleapis.com/Schema"
pattern: "projects/{project}/schemas/{schema}"
};
// Possible schema definition types.
enum Type {
// Default value. This value is unused.
TYPE_UNSPECIFIED = 0;
// A Protocol Buffer schema definition.
PROTOCOL_BUFFER = 1;
// An Avro schema definition.
AVRO = 2;
}
// Required. Name of the schema.
// Format is `projects/{project}/schemas/{schema}`.
string name = 1 [(google.api.field_behavior) = REQUIRED];
// The type of the schema definition.
Type type = 2;
// The definition of the schema. This should contain a string representing
// the full definition of the schema that is a valid schema definition of
// the type specified in `type`.
string definition = 3;
}
// Request for the CreateSchema method.
message CreateSchemaRequest {
// Required. The name of the project in which to create the schema.
// Format is `projects/{project-id}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
child_type: "pubsub.googleapis.com/Schema"
}
];
// Required. The schema object to create.
//
// This schema's `name` parameter is ignored. The schema object returned
// by CreateSchema will have a `name` made using the given `parent` and
// `schema_id`.
Schema schema = 2 [(google.api.field_behavior) = REQUIRED];
// The ID to use for the schema, which will become the final component of
// the schema's resource name.
//
// See https://cloud.google.com/pubsub/docs/admin#resource_names for resource
// name constraints.
string schema_id = 3;
}
// View of Schema object fields to be returned by GetSchema and ListSchemas.
enum SchemaView {
// The default / unset value.
// The API will default to the BASIC view.
SCHEMA_VIEW_UNSPECIFIED = 0;
// Include the name and type of the schema, but not the definition.
BASIC = 1;
// Include all Schema object fields.
FULL = 2;
}
// Request for the GetSchema method.
message GetSchemaRequest {
// Required. The name of the schema to get.
// Format is `projects/{project}/schemas/{schema}`.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = { type: "pubsub.googleapis.com/Schema" }
];
// The set of fields to return in the response. If not set, returns a Schema
// with `name` and `type`, but not `definition`. Set to `FULL` to retrieve all
// fields.
SchemaView view = 2;
}
// Request for the `ListSchemas` method.
message ListSchemasRequest {
// Required. The name of the project in which to list schemas.
// Format is `projects/{project-id}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "cloudresourcemanager.googleapis.com/Project"
}
];
// The set of Schema fields to return in the response. If not set, returns
// Schemas with `name` and `type`, but not `definition`. Set to `FULL` to
// retrieve all fields.
SchemaView view = 2;
// Maximum number of schemas to return.
int32 page_size = 3;
// The value returned by the last `ListSchemasResponse`; indicates that
// this is a continuation of a prior `ListSchemas` call, and that the
// system should return the next page of data.
string page_token = 4;
}
// Response for the `ListSchemas` method.
message ListSchemasResponse {
// The resulting schemas.
repeated Schema schemas = 1;
// If not empty, indicates that there may be more schemas that match the
// request; this value should be passed in a new `ListSchemasRequest`.
string next_page_token = 2;
}
// Request for the `DeleteSchema` method.
message DeleteSchemaRequest {
// Required. Name of the schema to delete.
// Format is `projects/{project}/schemas/{schema}`.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = { type: "pubsub.googleapis.com/Schema" }
];
}
// Request for the `ValidateSchema` method.
message ValidateSchemaRequest {
// Required. The name of the project in which to validate schemas.
// Format is `projects/{project-id}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "cloudresourcemanager.googleapis.com/Project"
}
];
// Required. The schema object to validate.
Schema schema = 2 [(google.api.field_behavior) = REQUIRED];
}
// Response for the `ValidateSchema` method.
message ValidateSchemaResponse {}
// Request for the `ValidateMessage` method.
message ValidateMessageRequest {
// Required. The name of the project in which to validate schemas.
// Format is `projects/{project-id}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "cloudresourcemanager.googleapis.com/Project"
}
];
oneof schema_spec {
// Name of the schema against which to validate.
//
// Format is `projects/{project}/schemas/{schema}`.
string name = 2 [
(google.api.resource_reference) = { type: "pubsub.googleapis.com/Schema" }
];
// Ad-hoc schema against which to validate
Schema schema = 3;
}
// Message to validate against the provided `schema_spec`.
bytes message = 4;
// The encoding expected for messages
Encoding encoding = 5;
}
// Response for the `ValidateMessage` method.
message ValidateMessageResponse {}
// Possible encoding types for messages.
enum Encoding {
// Unspecified
ENCODING_UNSPECIFIED = 0;
// JSON encoding
JSON = 1;
// Binary encoding, as defined by the schema type. For some schema types,
// binary encoding may not be available.
BINARY = 2;
}

View File

@@ -0,0 +1,20 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from google.cloud.pubsub_v1.publisher.client import Client
__all__ = ("Client",)

View File

@@ -0,0 +1,170 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import abc
import enum
import typing
from typing import Optional, Sequence
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud import pubsub_v1
from google.cloud.pubsub_v1 import types
from google.pubsub_v1 import types as gapic_types
class Batch(metaclass=abc.ABCMeta):
"""The base batching class for Pub/Sub publishing.
Although the :class:`~.pubsub_v1.publisher.batch.thread.Batch` class, based
on :class:`threading.Thread`, is fine for most cases, advanced
users may need to implement something based on a different concurrency
model.
This class defines the interface for the Batch implementation;
subclasses may be passed as the ``batch_class`` argument to
:class:`~.pubsub_v1.client.PublisherClient`.
The batching behavior works like this: When the
:class:`~.pubsub_v1.publisher.client.Client` is asked to publish a new
message, it requires a batch. The client will see if there is an
already-opened batch for the given topic; if there is, then the message
is sent to that batch. If there is not, then a new batch is created
and the message put there.
When a new batch is created, it automatically starts a timer counting
down to the maximum latency before the batch should commit.
Essentially, if enough time passes, the batch automatically commits
regardless of how much is in it. However, if either the message count or
size thresholds are encountered first, then the batch will commit early.
"""
def __len__(self):
"""Return the number of messages currently in the batch."""
return len(self.messages)
@staticmethod
@abc.abstractmethod
def make_lock(): # pragma: NO COVER
"""Return a lock in the chosen concurrency model.
Returns:
ContextManager: A newly created lock.
"""
raise NotImplementedError
@property
@abc.abstractmethod
def messages(self) -> Sequence["gapic_types.PubsubMessage"]: # pragma: NO COVER
"""Return the messages currently in the batch.
Returns:
The messages currently in the batch.
"""
raise NotImplementedError
@property
@abc.abstractmethod
def size(self) -> int: # pragma: NO COVER
"""Return the total size of all of the messages currently in the batch.
The size includes any overhead of the actual ``PublishRequest`` that is
sent to the backend.
Returns:
int: The total size of all of the messages currently
in the batch (including the request overhead), in bytes.
"""
raise NotImplementedError
@property
@abc.abstractmethod
def settings(self) -> "types.BatchSettings": # pragma: NO COVER
"""Return the batch settings.
Returns:
The batch settings. These are considered immutable once the batch has
been opened.
"""
raise NotImplementedError
@property
@abc.abstractmethod
def status(self) -> "BatchStatus": # pragma: NO COVER
"""Return the status of this batch.
Returns:
The status of this batch. All statuses are human-readable, all-lowercase
strings. The ones represented in the :class:`BaseBatch.Status` enum are
special, but other statuses are permitted.
"""
raise NotImplementedError
def cancel(
self, cancellation_reason: "BatchCancellationReason"
) -> None: # pragma: NO COVER
"""Complete pending futures with an exception.
This method must be called before publishing starts (ie: while the
batch is still accepting messages.)
Args:
cancellation_reason:
The reason why this batch has been cancelled.
"""
raise NotImplementedError
@abc.abstractmethod
def publish(
self, message: "gapic_types.PubsubMessage"
) -> Optional["pubsub_v1.publisher.futures.Future"]: # pragma: NO COVER
"""Publish a single message.
Add the given message to this object; this will cause it to be
published once the batch either has enough messages or a sufficient
period of time has elapsed.
This method is called by :meth:`~.PublisherClient.publish`.
Args:
message: The Pub/Sub message.
Returns:
An object conforming to the :class:`concurrent.futures.Future` interface.
If :data:`None` is returned, that signals that the batch cannot
accept a message.
"""
raise NotImplementedError
class BatchStatus(str, enum.Enum):
"""An enum-like class representing valid statuses for a batch."""
ACCEPTING_MESSAGES = "accepting messages"
STARTING = "starting"
IN_PROGRESS = "in progress"
ERROR = "error"
SUCCESS = "success"
class BatchCancellationReason(str, enum.Enum):
"""An enum-like class representing reasons why a batch was cancelled."""
PRIOR_ORDERED_MESSAGE_FAILED = (
"Batch cancelled because prior ordered message for the same key has "
"failed. This batch has been cancelled to avoid out-of-order publish."
)
CLIENT_STOPPED = "Batch cancelled because the publisher client has been stopped."

View File

@@ -0,0 +1,409 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import logging
import threading
import time
import typing
from typing import Any, Callable, List, Optional, Sequence
import google.api_core.exceptions
from google.api_core import gapic_v1
from google.cloud.pubsub_v1.publisher import exceptions
from google.cloud.pubsub_v1.publisher import futures
from google.cloud.pubsub_v1.publisher._batch import base
from google.pubsub_v1 import types as gapic_types
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud import pubsub_v1
from google.cloud.pubsub_v1 import types
from google.cloud.pubsub_v1.publisher import Client as PublisherClient
from google.pubsub_v1.services.publisher.client import OptionalRetry
_LOGGER = logging.getLogger(__name__)
_CAN_COMMIT = (base.BatchStatus.ACCEPTING_MESSAGES, base.BatchStatus.STARTING)
_SERVER_PUBLISH_MAX_BYTES = 10 * 1000 * 1000 # max accepted size of PublishRequest
_raw_proto_pubbsub_message = gapic_types.PubsubMessage.pb()
class Batch(base.Batch):
"""A batch of messages.
The batch is the internal group of messages which are either awaiting
publication or currently in progress.
A batch is automatically created by the PublisherClient when the first
message to be published is received; subsequent messages are added to
that batch until the process of actual publishing _starts_.
Once this occurs, any new messages sent to :meth:`publish` open a new
batch.
If you are using this library, you most likely do not need to instantiate
batch objects directly; they will be created for you. If you want to
change the actual batching settings, see the ``batching`` argument on
:class:`~.pubsub_v1.PublisherClient`.
Any properties or methods on this class which are not defined in
:class:`~.pubsub_v1.publisher.batch.BaseBatch` should be considered
implementation details.
Args:
client:
The publisher client used to create this batch.
topic:
The topic. The format for this is ``projects/{project}/topics/{topic}``.
settings:
The settings for batch publishing. These should be considered immutable
once the batch has been opened.
batch_done_callback:
Callback called when the response for a batch publish has been received.
Called with one boolean argument: successfully published or a permanent
error occurred. Temporary errors are not surfaced because they are retried
at a lower level.
commit_when_full:
Whether to commit the batch when the batch is full.
commit_retry:
Designation of what errors, if any, should be retried when commiting
the batch. If not provided, a default retry is used.
commit_timeout:
The timeout to apply when commiting the batch. If not provided, a default
timeout is used.
"""
def __init__(
self,
client: "PublisherClient",
topic: str,
settings: "types.BatchSettings",
batch_done_callback: Callable[[bool], Any] = None,
commit_when_full: bool = True,
commit_retry: "OptionalRetry" = gapic_v1.method.DEFAULT,
commit_timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT,
):
self._client = client
self._topic = topic
self._settings = settings
self._batch_done_callback = batch_done_callback
self._commit_when_full = commit_when_full
self._state_lock = threading.Lock()
# These members are all communicated between threads; ensure that
# any writes to them use the "state lock" to remain atomic.
# _futures list should remain unchanged after batch
# status changed from ACCEPTING_MESSAGES to any other
# in order to avoid race conditions
self._futures: List[futures.Future] = []
self._messages: List[gapic_types.PubsubMessage] = []
self._status = base.BatchStatus.ACCEPTING_MESSAGES
# The initial size is not zero, we need to account for the size overhead
# of the PublishRequest message itself.
self._base_request_size = gapic_types.PublishRequest(topic=topic)._pb.ByteSize()
self._size = self._base_request_size
self._commit_retry = commit_retry
self._commit_timeout = commit_timeout
@staticmethod
def make_lock() -> threading.Lock:
"""Return a threading lock.
Returns:
A newly created lock.
"""
return threading.Lock()
@property
def client(self) -> "PublisherClient":
"""A publisher client."""
return self._client
@property
def messages(self) -> Sequence[gapic_types.PubsubMessage]:
"""The messages currently in the batch."""
return self._messages
@property
def settings(self) -> "types.BatchSettings":
"""Return the batch settings.
Returns:
The batch settings. These are considered immutable once the batch has
been opened.
"""
return self._settings
@property
def size(self) -> int:
"""Return the total size of all of the messages currently in the batch.
The size includes any overhead of the actual ``PublishRequest`` that is
sent to the backend.
Returns:
The total size of all of the messages currently in the batch (including
the request overhead), in bytes.
"""
return self._size
@property
def status(self) -> base.BatchStatus:
"""Return the status of this batch.
Returns:
The status of this batch. All statuses are human-readable, all-lowercase
strings.
"""
return self._status
def cancel(self, cancellation_reason: base.BatchCancellationReason) -> None:
"""Complete pending futures with an exception.
This method must be called before publishing starts (ie: while the
batch is still accepting messages.)
Args:
The reason why this batch has been cancelled.
"""
with self._state_lock:
assert (
self._status == base.BatchStatus.ACCEPTING_MESSAGES
), "Cancel should not be called after sending has started."
exc = RuntimeError(cancellation_reason.value)
for future in self._futures:
future.set_exception(exc)
self._status = base.BatchStatus.ERROR
def commit(self) -> None:
"""Actually publish all of the messages on the active batch.
.. note::
This method is non-blocking. It opens a new thread, which calls
:meth:`_commit`, which does block.
This synchronously sets the batch status to "starting", and then opens
a new thread, which handles actually sending the messages to Pub/Sub.
If the current batch is **not** accepting messages, this method
does nothing.
"""
# Set the status to "starting" synchronously, to ensure that
# this batch will necessarily not accept new messages.
with self._state_lock:
if self._status == base.BatchStatus.ACCEPTING_MESSAGES:
self._status = base.BatchStatus.STARTING
else:
return
self._start_commit_thread()
def _start_commit_thread(self) -> None:
"""Start a new thread to actually handle the commit."""
# NOTE: If the thread is *not* a daemon, a memory leak exists due to a CPython issue.
# https://github.com/googleapis/python-pubsub/issues/395#issuecomment-829910303
# https://github.com/googleapis/python-pubsub/issues/395#issuecomment-830092418
commit_thread = threading.Thread(
name="Thread-CommitBatchPublisher", target=self._commit, daemon=True
)
commit_thread.start()
def _commit(self) -> None:
"""Actually publish all of the messages on the active batch.
This moves the batch out from being the active batch to an in progress
batch on the publisher, and then the batch is discarded upon
completion.
.. note::
This method blocks. The :meth:`commit` method is the non-blocking
version, which calls this one.
"""
with self._state_lock:
if self._status in _CAN_COMMIT:
self._status = base.BatchStatus.IN_PROGRESS
else:
# If, in the intervening period between when this method was
# called and now, the batch started to be committed, or
# completed a commit, then no-op at this point.
_LOGGER.debug(
"Batch is already in progress or has been cancelled, "
"exiting commit"
)
return
# Once in the IN_PROGRESS state, no other thread can publish additional
# messages or initiate a commit (those operations become a no-op), thus
# it is safe to release the state lock here. Releasing the lock avoids
# blocking other threads in case api.publish() below takes a long time
# to complete.
# https://github.com/googleapis/google-cloud-python/issues/8036
# Sanity check: If there are no messages, no-op.
if not self._messages:
_LOGGER.debug("No messages to publish, exiting commit")
self._status = base.BatchStatus.SUCCESS
return
# Begin the request to publish these messages.
# Log how long the underlying request takes.
start = time.time()
batch_transport_succeeded = True
try:
# Performs retries for errors defined by the retry configuration.
response = self._client._gapic_publish(
topic=self._topic,
messages=self._messages,
retry=self._commit_retry,
timeout=self._commit_timeout,
)
except google.api_core.exceptions.GoogleAPIError as exc:
# We failed to publish, even after retries, so set the exception on
# all futures and exit.
self._status = base.BatchStatus.ERROR
for future in self._futures:
future.set_exception(exc)
batch_transport_succeeded = False
if self._batch_done_callback is not None:
# Failed to publish batch.
self._batch_done_callback(batch_transport_succeeded)
_LOGGER.exception("Failed to publish %s messages.", len(self._futures))
return
end = time.time()
_LOGGER.debug("gRPC Publish took %s seconds.", end - start)
if len(response.message_ids) == len(self._futures):
# Iterate over the futures on the queue and return the response
# IDs. We are trusting that there is a 1:1 mapping, and raise
# an exception if not.
self._status = base.BatchStatus.SUCCESS
for message_id, future in zip(response.message_ids, self._futures):
future.set_result(message_id)
else:
# Sanity check: If the number of message IDs is not equal to
# the number of futures I have, then something went wrong.
self._status = base.BatchStatus.ERROR
exception = exceptions.PublishError(
"Some messages were not successfully published."
)
for future in self._futures:
future.set_exception(exception)
# Unknown error -> batch failed to be correctly transported/
batch_transport_succeeded = False
_LOGGER.error(
"Only %s of %s messages were published.",
len(response.message_ids),
len(self._futures),
)
if self._batch_done_callback is not None:
self._batch_done_callback(batch_transport_succeeded)
def publish(
self, message: gapic_types.PubsubMessage
) -> Optional["pubsub_v1.publisher.futures.Future"]:
"""Publish a single message.
Add the given message to this object; this will cause it to be
published once the batch either has enough messages or a sufficient
period of time has elapsed. If the batch is full or the commit is
already in progress, the method does not do anything.
This method is called by :meth:`~.PublisherClient.publish`.
Args:
message: The Pub/Sub message.
Returns:
An object conforming to the :class:`~concurrent.futures.Future` interface
or :data:`None`. If :data:`None` is returned, that signals that the batch
cannot accept a message.
Raises:
pubsub_v1.publisher.exceptions.MessageTooLargeError: If publishing
the ``message`` would exceed the max size limit on the backend.
"""
# Coerce the type, just in case.
if not isinstance(message, gapic_types.PubsubMessage):
# For performance reasons, the message should be constructed by directly
# using the raw protobuf class, and only then wrapping it into the
# higher-level PubsubMessage class.
vanilla_pb = _raw_proto_pubbsub_message(**message)
message = gapic_types.PubsubMessage.wrap(vanilla_pb)
future = None
with self._state_lock:
assert (
self._status != base.BatchStatus.ERROR
), "Publish after stop() or publish error."
if self.status != base.BatchStatus.ACCEPTING_MESSAGES:
return None
size_increase = gapic_types.PublishRequest(
messages=[message]
)._pb.ByteSize()
if (self._base_request_size + size_increase) > _SERVER_PUBLISH_MAX_BYTES:
err_msg = (
"The message being published would produce too large a publish "
"request that would exceed the maximum allowed size on the "
"backend ({} bytes).".format(_SERVER_PUBLISH_MAX_BYTES)
)
raise exceptions.MessageTooLargeError(err_msg)
new_size = self._size + size_increase
new_count = len(self._messages) + 1
size_limit = min(self.settings.max_bytes, _SERVER_PUBLISH_MAX_BYTES)
overflow = new_size > size_limit or new_count >= self.settings.max_messages
if not self._messages or not overflow:
# Store the actual message in the batch's message queue.
self._messages.append(message)
self._size = new_size
# Track the future on this batch (so that the result of the
# future can be set).
future = futures.Future()
self._futures.append(future)
# Try to commit, but it must be **without** the lock held, since
# ``commit()`` will try to obtain the lock.
if self._commit_when_full and overflow:
self.commit()
return future
def _set_status(self, status: base.BatchStatus):
self._status = status

View File

@@ -0,0 +1,82 @@
# Copyright 2019, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import abc
import typing
from google.api_core import gapic_v1
from google.pubsub_v1 import types as gapic_types
if typing.TYPE_CHECKING: # pragma: NO COVER
from concurrent import futures
from google.pubsub_v1.services.publisher.client import OptionalRetry
class Sequencer(metaclass=abc.ABCMeta):
"""The base class for sequencers for Pub/Sub publishing. A sequencer
sequences messages to be published.
"""
@abc.abstractmethod
def is_finished(self) -> bool: # pragma: NO COVER
"""Whether the sequencer is finished and should be cleaned up.
Returns:
bool: Whether the sequencer is finished and should be cleaned up.
"""
raise NotImplementedError
@abc.abstractmethod
def unpause(self) -> None: # pragma: NO COVER
"""Unpauses this sequencer.
Raises:
RuntimeError:
If called when the sequencer has not been paused.
"""
raise NotImplementedError
@abc.abstractmethod
def publish(
self,
message: gapic_types.PubsubMessage,
retry: "OptionalRetry" = gapic_v1.method.DEFAULT,
timeout: gapic_types.TimeoutType = gapic_v1.method.DEFAULT,
) -> "futures.Future": # pragma: NO COVER
"""Publish message for this ordering key.
Args:
message:
The Pub/Sub message.
retry:
The retry settings to apply when publishing the message.
timeout:
The timeout to apply when publishing the message.
Returns:
A class instance that conforms to Python Standard library's
:class:`~concurrent.futures.Future` interface. The future might return
immediately with a
`pubsub_v1.publisher.exceptions.PublishToPausedOrderingKeyException`
if the ordering key is paused. Otherwise, the future tracks the
lifetime of the message publish.
Raises:
RuntimeError:
If called after this sequencer has been stopped, either by
a call to stop() or after all batches have been published.
"""
raise NotImplementedError

View File

@@ -0,0 +1,338 @@
# Copyright 2019, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import enum
import collections
import threading
import typing
from typing import Deque, Iterable, Sequence
from google.api_core import gapic_v1
from google.cloud.pubsub_v1.publisher import futures
from google.cloud.pubsub_v1.publisher import exceptions
from google.cloud.pubsub_v1.publisher._sequencer import base as sequencer_base
from google.cloud.pubsub_v1.publisher._batch import base as batch_base
from google.pubsub_v1 import types as gapic_types
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.pubsub_v1 import types
from google.cloud.pubsub_v1.publisher import _batch
from google.cloud.pubsub_v1.publisher.client import Client as PublisherClient
from google.pubsub_v1.services.publisher.client import OptionalRetry
class _OrderedSequencerStatus(str, enum.Enum):
"""An enum-like class representing valid statuses for an OrderedSequencer.
Starting state: ACCEPTING_MESSAGES
Valid transitions:
ACCEPTING_MESSAGES -> PAUSED (on permanent error)
ACCEPTING_MESSAGES -> STOPPED (when user calls stop() explicitly)
ACCEPTING_MESSAGES -> FINISHED (all batch publishes finish normally)
PAUSED -> ACCEPTING_MESSAGES (when user unpauses)
PAUSED -> STOPPED (when user calls stop() explicitly)
STOPPED -> FINISHED (user stops client and the one remaining batch finishes
publish)
STOPPED -> PAUSED (stop() commits one batch, which fails permanently)
FINISHED -> ACCEPTING_MESSAGES (publish happens while waiting for cleanup)
FINISHED -> STOPPED (when user calls stop() explicitly)
Illegal transitions:
PAUSED -> FINISHED (since all batches are cancelled on pause, there should
not be any that finish normally. paused sequencers
should not be cleaned up because their presence
indicates that the ordering key needs to be resumed)
STOPPED -> ACCEPTING_MESSAGES (no way to make a user-stopped sequencer
accept messages again. this is okay since
stop() should only be called on shutdown.)
FINISHED -> PAUSED (no messages remain in flight, so they can't cause a
permanent error and pause the sequencer)
"""
# Accepting publishes and/or waiting for result of batch publish
ACCEPTING_MESSAGES = "accepting messages"
# Permanent error occurred. User must unpause this sequencer to resume
# publishing. This is done to maintain ordering.
PAUSED = "paused"
# No more publishes allowed. There may be an outstanding batch that will
# call the _batch_done_callback when it's done (success or error.)
STOPPED = "stopped"
# No more work to do. Waiting to be cleaned-up. A publish will transform
# this sequencer back into the normal accepting-messages state.
FINISHED = "finished"
class OrderedSequencer(sequencer_base.Sequencer):
"""Sequences messages into batches ordered by an ordering key for one topic.
A sequencer always has at least one batch in it, unless paused or stopped.
When no batches remain, the |publishes_done_callback| is called so the
client can perform cleanup.
Public methods are thread-safe.
Args:
client:
The publisher client used to create this sequencer.
topic:
The topic. The format for this is ``projects/{project}/topics/{topic}``.
ordering_key:
The ordering key for this sequencer.
"""
def __init__(self, client: "PublisherClient", topic: str, ordering_key: str):
self._client = client
self._topic = topic
self._ordering_key = ordering_key
# Guards the variables below
self._state_lock = threading.Lock()
# Batches ordered from first (head/left) to last (right/tail).
# Invariant: always has at least one batch after the first publish,
# unless paused or stopped.
self._ordered_batches: Deque["_batch.thread.Batch"] = collections.deque()
# See _OrderedSequencerStatus for valid state transitions.
self._state = _OrderedSequencerStatus.ACCEPTING_MESSAGES
def is_finished(self) -> bool:
"""Whether the sequencer is finished and should be cleaned up.
Returns:
Whether the sequencer is finished and should be cleaned up.
"""
with self._state_lock:
return self._state == _OrderedSequencerStatus.FINISHED
def stop(self) -> None:
"""Permanently stop this sequencer.
This differs from pausing, which may be resumed. Immediately commits
the first batch and cancels the rest.
Raises:
RuntimeError:
If called after stop() has already been called.
"""
with self._state_lock:
if self._state == _OrderedSequencerStatus.STOPPED:
raise RuntimeError("Ordered sequencer already stopped.")
self._state = _OrderedSequencerStatus.STOPPED
if self._ordered_batches:
# Give only the first batch the chance to finish.
self._ordered_batches[0].commit()
# Cancel the rest of the batches and remove them from the deque
# of batches.
while len(self._ordered_batches) > 1:
# Pops from the tail until it leaves only the head in the
# deque.
batch = self._ordered_batches.pop()
batch.cancel(batch_base.BatchCancellationReason.CLIENT_STOPPED)
def commit(self) -> None:
"""Commit the first batch, if unpaused.
If paused or no batches exist, this method does nothing.
Raises:
RuntimeError:
If called after stop() has already been called.
"""
with self._state_lock:
if self._state == _OrderedSequencerStatus.STOPPED:
raise RuntimeError("Ordered sequencer already stopped.")
if self._state != _OrderedSequencerStatus.PAUSED and self._ordered_batches:
# It's okay to commit the same batch more than once. The
# operation is idempotent.
self._ordered_batches[0].commit()
def _batch_done_callback(self, success: bool) -> None:
"""Deal with completion of a batch.
Called when a batch has finished publishing, with either a success
or a failure. (Temporary failures are retried infinitely when
ordering keys are enabled.)
"""
ensure_cleanup_and_commit_timer_runs = False
with self._state_lock:
assert self._state != _OrderedSequencerStatus.PAUSED, (
"This method should not be called after pause() because "
"pause() should have cancelled all of the batches."
)
assert self._state != _OrderedSequencerStatus.FINISHED, (
"This method should not be called after all batches have been "
"finished."
)
# Message futures for the batch have been completed (either with a
# result or an exception) already, so remove the batch.
self._ordered_batches.popleft()
if success:
if len(self._ordered_batches) == 0:
# Mark this sequencer as finished.
# If new messages come in for this ordering key and this
# sequencer hasn't been cleaned up yet, it will go back
# into accepting-messages state. Otherwise, the client
# must create a new OrderedSequencer.
self._state = _OrderedSequencerStatus.FINISHED
# Ensure cleanup thread runs at some point.
ensure_cleanup_and_commit_timer_runs = True
elif len(self._ordered_batches) == 1:
# Wait for messages and/or commit timeout
# Ensure there's actually a commit timer thread that'll commit
# after a delay.
ensure_cleanup_and_commit_timer_runs = True
else:
# If there is more than one batch, we know that the next batch
# must be full and, therefore, ready to be committed.
self._ordered_batches[0].commit()
else:
# Unrecoverable error detected
self._pause()
if ensure_cleanup_and_commit_timer_runs:
self._client.ensure_cleanup_and_commit_timer_runs()
def _pause(self) -> None:
"""Pause this sequencer: set state to paused, cancel all batches, and
clear the list of ordered batches.
_state_lock must be taken before calling this method.
"""
assert (
self._state != _OrderedSequencerStatus.FINISHED
), "Pause should not be called after all batches have finished."
self._state = _OrderedSequencerStatus.PAUSED
for batch in self._ordered_batches:
batch.cancel(
batch_base.BatchCancellationReason.PRIOR_ORDERED_MESSAGE_FAILED
)
self._ordered_batches.clear()
def unpause(self) -> None:
"""Unpause this sequencer.
Raises:
RuntimeError:
If called when the ordering key has not been paused.
"""
with self._state_lock:
if self._state != _OrderedSequencerStatus.PAUSED:
raise RuntimeError("Ordering key is not paused.")
self._state = _OrderedSequencerStatus.ACCEPTING_MESSAGES
def _create_batch(
self,
commit_retry: "OptionalRetry" = gapic_v1.method.DEFAULT,
commit_timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT,
) -> "_batch.thread.Batch":
"""Create a new batch using the client's batch class and other stored
settings.
Args:
commit_retry:
The retry settings to apply when publishing the batch.
commit_timeout:
The timeout to apply when publishing the batch.
"""
return self._client._batch_class(
client=self._client,
topic=self._topic,
settings=self._client.batch_settings,
batch_done_callback=self._batch_done_callback,
commit_when_full=False,
commit_retry=commit_retry,
commit_timeout=commit_timeout,
)
def publish(
self,
message: gapic_types.PubsubMessage,
retry: "OptionalRetry" = gapic_v1.method.DEFAULT,
timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT,
) -> futures.Future:
"""Publish message for this ordering key.
Args:
message:
The Pub/Sub message.
retry:
The retry settings to apply when publishing the message.
timeout:
The timeout to apply when publishing the message.
Returns:
A class instance that conforms to Python Standard library's
:class:`~concurrent.futures.Future` interface (but not an
instance of that class). The future might return immediately with a
PublishToPausedOrderingKeyException if the ordering key is paused.
Otherwise, the future tracks the lifetime of the message publish.
Raises:
RuntimeError:
If called after this sequencer has been stopped, either by
a call to stop() or after all batches have been published.
"""
with self._state_lock:
if self._state == _OrderedSequencerStatus.PAUSED:
errored_future = futures.Future()
exception = exceptions.PublishToPausedOrderingKeyException(
self._ordering_key
)
errored_future.set_exception(exception)
return errored_future
# If waiting to be cleaned-up, convert to accepting messages to
# prevent this sequencer from being cleaned-up only to have another
# one with the same ordering key created immediately afterward.
if self._state == _OrderedSequencerStatus.FINISHED:
self._state = _OrderedSequencerStatus.ACCEPTING_MESSAGES
if self._state == _OrderedSequencerStatus.STOPPED:
raise RuntimeError("Cannot publish on a stopped sequencer.")
assert (
self._state == _OrderedSequencerStatus.ACCEPTING_MESSAGES
), "Publish is only allowed in accepting-messages state."
if not self._ordered_batches:
new_batch = self._create_batch(
commit_retry=retry, commit_timeout=timeout
)
self._ordered_batches.append(new_batch)
batch = self._ordered_batches[-1]
future = batch.publish(message)
while future is None:
batch = self._create_batch(commit_retry=retry, commit_timeout=timeout)
self._ordered_batches.append(batch)
future = batch.publish(message)
return future
# Used only for testing.
def _set_batch(self, batch: "_batch.thread.Batch") -> None:
self._ordered_batches = collections.deque([batch])
# Used only for testing.
def _set_batches(self, batches: Iterable["_batch.thread.Batch"]) -> None:
self._ordered_batches = collections.deque(batches)
# Used only for testing.
def _get_batches(self) -> Sequence["_batch.thread.Batch"]:
return self._ordered_batches

View File

@@ -0,0 +1,165 @@
# Copyright 2019, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import typing
from typing import Optional
from google.api_core import gapic_v1
from google.cloud.pubsub_v1.publisher._sequencer import base
from google.pubsub_v1 import types as gapic_types
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.pubsub_v1.publisher import _batch
from google.cloud.pubsub_v1.publisher import futures
from google.cloud.pubsub_v1.publisher.client import Client as PublisherClient
from google.pubsub_v1.services.publisher.client import OptionalRetry
from google.cloud.pubsub_v1 import types
class UnorderedSequencer(base.Sequencer):
"""Sequences messages into batches for one topic without any ordering.
Public methods are NOT thread-safe.
"""
def __init__(self, client: "PublisherClient", topic: str):
self._client = client
self._topic = topic
self._current_batch: Optional["_batch.thread.Batch"] = None
self._stopped = False
def is_finished(self) -> bool:
"""Whether the sequencer is finished and should be cleaned up.
Returns:
Whether the sequencer is finished and should be cleaned up.
"""
# TODO: Implement. Not implementing yet because of possible performance
# impact due to extra locking required. This does mean that
# UnorderedSequencers don't get cleaned up, but this is the same as
# previously existing behavior.
return False
def stop(self) -> None:
"""Stop the sequencer.
Subsequent publishes will fail.
Raises:
RuntimeError:
If called after stop() has already been called.
"""
if self._stopped:
raise RuntimeError("Unordered sequencer already stopped.")
self.commit()
self._stopped = True
def commit(self) -> None:
"""Commit the batch.
Raises:
RuntimeError:
If called after stop() has already been called.
"""
if self._stopped:
raise RuntimeError("Unordered sequencer already stopped.")
if self._current_batch:
self._current_batch.commit()
# At this point, we lose track of the old batch, but we don't
# care since we just committed it.
# Setting this to None guarantees the next publish() creates a new
# batch.
self._current_batch = None
def unpause(self) -> typing.NoReturn:
"""Not relevant for this class."""
raise NotImplementedError
def _create_batch(
self,
commit_retry: "OptionalRetry" = gapic_v1.method.DEFAULT,
commit_timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT,
) -> "_batch.thread.Batch":
"""Create a new batch using the client's batch class and other stored
settings.
Args:
commit_retry:
The retry settings to apply when publishing the batch.
commit_timeout:
The timeout to apply when publishing the batch.
"""
return self._client._batch_class(
client=self._client,
topic=self._topic,
settings=self._client.batch_settings,
batch_done_callback=None,
commit_when_full=True,
commit_retry=commit_retry,
commit_timeout=commit_timeout,
)
def publish(
self,
message: gapic_types.PubsubMessage,
retry: "OptionalRetry" = gapic_v1.method.DEFAULT,
timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT,
) -> "futures.Future":
"""Batch message into existing or new batch.
Args:
message:
The Pub/Sub message.
retry:
The retry settings to apply when publishing the message.
timeout:
The timeout to apply when publishing the message.
Returns:
An object conforming to the :class:`~concurrent.futures.Future` interface.
The future tracks the publishing status of the message.
Raises:
RuntimeError:
If called after stop() has already been called.
pubsub_v1.publisher.exceptions.MessageTooLargeError: If publishing
the ``message`` would exceed the max size limit on the backend.
"""
if self._stopped:
raise RuntimeError("Unordered sequencer already stopped.")
if not self._current_batch:
newbatch = self._create_batch(commit_retry=retry, commit_timeout=timeout)
self._current_batch = newbatch
batch = self._current_batch
future = None
while future is None:
# Might throw MessageTooLargeError
future = batch.publish(message)
# batch is full, triggering commit_when_full
if future is None:
batch = self._create_batch(commit_retry=retry, commit_timeout=timeout)
# At this point, we lose track of the old batch, but we don't
# care since it's already committed (because it was full.)
self._current_batch = batch
return future
# Used only for testing.
def _set_batch(self, batch: "_batch.thread.Batch") -> None:
self._current_batch = batch

View File

@@ -0,0 +1,514 @@
# Copyright 2019, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import copy
import logging
import os
import threading
import time
import typing
from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union
import warnings
from google.api_core import gapic_v1
from google.auth.credentials import AnonymousCredentials # type: ignore
from google.oauth2 import service_account # type: ignore
from google.cloud.pubsub_v1 import types
from google.cloud.pubsub_v1.publisher import exceptions
from google.cloud.pubsub_v1.publisher import futures
from google.cloud.pubsub_v1.publisher._batch import thread
from google.cloud.pubsub_v1.publisher._sequencer import ordered_sequencer
from google.cloud.pubsub_v1.publisher._sequencer import unordered_sequencer
from google.cloud.pubsub_v1.publisher.flow_controller import FlowController
from google.pubsub_v1 import gapic_version as package_version
from google.pubsub_v1 import types as gapic_types
from google.pubsub_v1.services.publisher import client as publisher_client
__version__ = package_version.__version__
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud import pubsub_v1
from google.cloud.pubsub_v1.publisher import _batch
from google.pubsub_v1.services.publisher.client import OptionalRetry
from google.pubsub_v1.types import pubsub as pubsub_types
_LOGGER = logging.getLogger(__name__)
_raw_proto_pubbsub_message = gapic_types.PubsubMessage.pb()
SequencerType = Union[
ordered_sequencer.OrderedSequencer, unordered_sequencer.UnorderedSequencer
]
class Client(publisher_client.PublisherClient):
"""A publisher client for Google Cloud Pub/Sub.
This creates an object that is capable of publishing messages.
Generally, you can instantiate this client with no arguments, and you
get sensible defaults.
Args:
batch_settings:
The settings for batch publishing.
publisher_options:
The options for the publisher client. Note that enabling message ordering
will override the publish retry timeout to be infinite.
kwargs:
Any additional arguments provided are sent as keyword arguments to the
underlying
:class:`~google.cloud.pubsub_v1.gapic.publisher_client.PublisherClient`.
Generally you should not need to set additional keyword
arguments. Regional endpoints can be set via ``client_options`` that
takes a single key-value pair that defines the endpoint.
Example:
.. code-block:: python
from google.cloud import pubsub_v1
publisher_client = pubsub_v1.PublisherClient(
# Optional
batch_settings = pubsub_v1.types.BatchSettings(
max_bytes=1024, # One kilobyte
max_latency=1, # One second
),
# Optional
publisher_options = pubsub_v1.types.PublisherOptions(
enable_message_ordering=False,
flow_control=pubsub_v1.types.PublishFlowControl(
message_limit=2000,
limit_exceeded_behavior=pubsub_v1.types.LimitExceededBehavior.BLOCK,
),
),
# Optional
client_options = {
"api_endpoint": REGIONAL_ENDPOINT
}
)
"""
def __init__(
self,
batch_settings: Union[types.BatchSettings, Sequence] = (),
publisher_options: Union[types.PublisherOptions, Sequence] = (),
**kwargs: Any,
):
assert (
type(batch_settings) is types.BatchSettings or len(batch_settings) == 0
), "batch_settings must be of type BatchSettings or an empty sequence."
assert (
type(publisher_options) is types.PublisherOptions
or len(publisher_options) == 0
), "publisher_options must be of type PublisherOptions or an empty sequence."
# Sanity check: Is our goal to use the emulator?
# If so, create a grpc insecure channel with the emulator host
# as the target.
if os.environ.get("PUBSUB_EMULATOR_HOST"):
kwargs["client_options"] = {
"api_endpoint": os.environ.get("PUBSUB_EMULATOR_HOST")
}
kwargs["credentials"] = AnonymousCredentials()
# For a transient failure, retry publishing the message infinitely.
self.publisher_options = types.PublisherOptions(*publisher_options)
self._enable_message_ordering = self.publisher_options[0]
# Add the metrics headers, and instantiate the underlying GAPIC
# client.
super().__init__(**kwargs)
self._target = self._transport._host
self._batch_class = thread.Batch
self.batch_settings = types.BatchSettings(*batch_settings)
# The batches on the publisher client are responsible for holding
# messages. One batch exists for each topic.
self._batch_lock = self._batch_class.make_lock()
# (topic, ordering_key) => sequencers object
self._sequencers: Dict[Tuple[str, str], SequencerType] = {}
self._is_stopped = False
# Thread created to commit all sequencers after a timeout.
self._commit_thread: Optional[threading.Thread] = None
# The object controlling the message publishing flow
self._flow_controller = FlowController(self.publisher_options.flow_control)
@classmethod
def from_service_account_file( # type: ignore[override]
cls,
filename: str,
batch_settings: Union[types.BatchSettings, Sequence] = (),
**kwargs: Any,
) -> "Client":
"""Creates an instance of this client using the provided credentials
file.
Args:
filename:
The path to the service account private key JSON file.
batch_settings:
The settings for batch publishing.
kwargs:
Additional arguments to pass to the constructor.
Returns:
A Publisher instance that is the constructed client.
"""
credentials = service_account.Credentials.from_service_account_file(filename)
kwargs["credentials"] = credentials
return cls(batch_settings, **kwargs)
from_service_account_json = from_service_account_file # type: ignore[assignment]
@property
def target(self) -> str:
"""Return the target (where the API is).
Returns:
The location of the API.
"""
return self._target
@property
def api(self):
"""The underlying gapic API client.
.. versionchanged:: 2.10.0
Instead of a GAPIC ``PublisherClient`` client instance, this property is a
proxy object to it with the same interface.
.. deprecated:: 2.10.0
Use the GAPIC methods and properties on the client instance directly
instead of through the :attr:`api` attribute.
"""
msg = (
'The "api" property only exists for backward compatibility, access its '
'attributes directly thorugh the client instance (e.g. "client.foo" '
'instead of "client.api.foo").'
)
warnings.warn(msg, category=DeprecationWarning)
return super()
def _get_or_create_sequencer(self, topic: str, ordering_key: str) -> SequencerType:
"""Get an existing sequencer or create a new one given the (topic,
ordering_key) pair.
"""
sequencer_key = (topic, ordering_key)
sequencer = self._sequencers.get(sequencer_key)
if sequencer is None:
if ordering_key == "":
sequencer = unordered_sequencer.UnorderedSequencer(self, topic)
else:
sequencer = ordered_sequencer.OrderedSequencer(
self, topic, ordering_key
)
self._sequencers[sequencer_key] = sequencer
return sequencer
def resume_publish(self, topic: str, ordering_key: str) -> None:
"""Resume publish on an ordering key that has had unrecoverable errors.
Args:
topic: The topic to publish messages to.
ordering_key: A string that identifies related messages for which
publish order should be respected.
Raises:
RuntimeError:
If called after publisher has been stopped by a `stop()` method
call.
ValueError:
If the topic/ordering key combination has not been seen before
by this client.
"""
with self._batch_lock:
if self._is_stopped:
raise RuntimeError("Cannot resume publish on a stopped publisher.")
if not self._enable_message_ordering:
raise ValueError(
"Cannot resume publish on a topic/ordering key if ordering "
"is not enabled."
)
sequencer_key = (topic, ordering_key)
sequencer = self._sequencers.get(sequencer_key)
if sequencer is None:
_LOGGER.debug(
"Error: The topic/ordering key combination has not "
"been seen before."
)
else:
sequencer.unpause()
def _gapic_publish(self, *args, **kwargs) -> "pubsub_types.PublishResponse":
"""Call the GAPIC public API directly."""
return super().publish(*args, **kwargs)
def publish( # type: ignore[override]
self,
topic: str,
data: bytes,
ordering_key: str = "",
retry: "OptionalRetry" = gapic_v1.method.DEFAULT,
timeout: "types.OptionalTimeout" = gapic_v1.method.DEFAULT,
**attrs: Union[bytes, str],
) -> "pubsub_v1.publisher.futures.Future":
"""Publish a single message.
.. note::
Messages in Pub/Sub are blobs of bytes. They are *binary* data,
not text. You must send data as a bytestring
(``bytes`` in Python 3; ``str`` in Python 2), and this library
will raise an exception if you send a text string.
The reason that this is so important (and why we do not try to
coerce for you) is because Pub/Sub is also platform independent
and there is no way to know how to decode messages properly on
the other side; therefore, encoding and decoding is a required
exercise for the developer.
Add the given message to this object; this will cause it to be
published once the batch either has enough messages or a sufficient
period of time has elapsed.
This method may block if LimitExceededBehavior.BLOCK is used in the
flow control settings.
Example:
>>> from google.cloud import pubsub_v1
>>> client = pubsub_v1.PublisherClient()
>>> topic = client.topic_path('[PROJECT]', '[TOPIC]')
>>> data = b'The rain in Wales falls mainly on the snails.'
>>> response = client.publish(topic, data, username='guido')
Args:
topic: The topic to publish messages to.
data: A bytestring representing the message body. This
must be a bytestring.
ordering_key: A string that identifies related messages for which
publish order should be respected. Message ordering must be
enabled for this client to use this feature.
retry:
Designation of what errors, if any, should be retried. If `ordering_key`
is specified, the total retry deadline will be changed to "infinity".
If given, it overides any retry passed into the client through
the ``publisher_options`` argument.
timeout:
The timeout for the RPC request. Can be used to override any timeout
passed in through ``publisher_options`` when instantiating the client.
attrs: A dictionary of attributes to be
sent as metadata. (These may be text strings or byte strings.)
Returns:
A :class:`~google.cloud.pubsub_v1.publisher.futures.Future`
instance that conforms to Python Standard library's
:class:`~concurrent.futures.Future` interface (but not an
instance of that class).
Raises:
RuntimeError:
If called after publisher has been stopped by a `stop()` method
call.
pubsub_v1.publisher.exceptions.MessageTooLargeError: If publishing
the ``message`` would exceed the max size limit on the backend.
"""
# Sanity check: Is the data being sent as a bytestring?
# If it is literally anything else, complain loudly about it.
if not isinstance(data, bytes):
raise TypeError(
"Data being published to Pub/Sub must be sent as a bytestring."
)
if not self._enable_message_ordering and ordering_key != "":
raise ValueError(
"Cannot publish a message with an ordering key when message "
"ordering is not enabled."
)
# Coerce all attributes to text strings.
for k, v in copy.copy(attrs).items():
if isinstance(v, str):
continue
if isinstance(v, bytes):
attrs[k] = v.decode("utf-8")
continue
raise TypeError(
"All attributes being published to Pub/Sub must "
"be sent as text strings."
)
# Create the Pub/Sub message object. For performance reasons, the message
# should be constructed by directly using the raw protobuf class, and only
# then wrapping it into the higher-level PubsubMessage class.
vanilla_pb = _raw_proto_pubbsub_message(
data=data, ordering_key=ordering_key, attributes=attrs
)
message = gapic_types.PubsubMessage.wrap(vanilla_pb)
# Messages should go through flow control to prevent excessive
# queuing on the client side (depending on the settings).
try:
self._flow_controller.add(message)
except exceptions.FlowControlLimitError as exc:
future = futures.Future()
future.set_exception(exc)
return future
def on_publish_done(future):
self._flow_controller.release(message)
if retry is gapic_v1.method.DEFAULT: # if custom retry not passed in
retry = self.publisher_options.retry
if timeout is gapic_v1.method.DEFAULT: # if custom timeout not passed in
timeout = self.publisher_options.timeout
with self._batch_lock:
if self._is_stopped:
raise RuntimeError("Cannot publish on a stopped publisher.")
# Set retry timeout to "infinite" when message ordering is enabled.
# Note that this then also impacts messages added with an empty
# ordering key.
if self._enable_message_ordering:
if retry is gapic_v1.method.DEFAULT:
# use the default retry for the publish GRPC method as a base
transport = self._transport
base_retry = transport._wrapped_methods[transport.publish]._retry
retry = base_retry.with_deadline(2.0**32)
else:
retry = retry.with_deadline(2.0**32)
# Delegate the publishing to the sequencer.
sequencer = self._get_or_create_sequencer(topic, ordering_key)
future = sequencer.publish(message, retry=retry, timeout=timeout)
future.add_done_callback(on_publish_done)
# Create a timer thread if necessary to enforce the batching
# timeout.
self._ensure_commit_timer_runs_no_lock()
return future
def ensure_cleanup_and_commit_timer_runs(self) -> None:
"""Ensure a cleanup/commit timer thread is running.
If a cleanup/commit timer thread is already running, this does nothing.
"""
with self._batch_lock:
self._ensure_commit_timer_runs_no_lock()
def _ensure_commit_timer_runs_no_lock(self) -> None:
"""Ensure a commit timer thread is running, without taking
_batch_lock.
_batch_lock must be held before calling this method.
"""
if not self._commit_thread and self.batch_settings.max_latency < float("inf"):
self._start_commit_thread()
def _start_commit_thread(self) -> None:
"""Start a new thread to actually wait and commit the sequencers."""
# NOTE: If the thread is *not* a daemon, a memory leak exists due to a CPython issue.
# https://github.com/googleapis/python-pubsub/issues/395#issuecomment-829910303
# https://github.com/googleapis/python-pubsub/issues/395#issuecomment-830092418
self._commit_thread = threading.Thread(
name="Thread-PubSubBatchCommitter",
target=self._wait_and_commit_sequencers,
daemon=True,
)
self._commit_thread.start()
def _wait_and_commit_sequencers(self) -> None:
"""Wait up to the batching timeout, and commit all sequencers."""
# Sleep for however long we should be waiting.
time.sleep(self.batch_settings.max_latency)
_LOGGER.debug("Commit thread is waking up")
with self._batch_lock:
if self._is_stopped:
return
self._commit_sequencers()
self._commit_thread = None
def _commit_sequencers(self) -> None:
"""Clean up finished sequencers and commit the rest."""
finished_sequencer_keys = [
key
for key, sequencer in self._sequencers.items()
if sequencer.is_finished()
]
for sequencer_key in finished_sequencer_keys:
del self._sequencers[sequencer_key]
for sequencer in self._sequencers.values():
sequencer.commit()
def stop(self) -> None:
"""Immediately publish all outstanding messages.
Asynchronously sends all outstanding messages and
prevents future calls to `publish()`. Method should
be invoked prior to deleting this `Client()` object
in order to ensure that no pending messages are lost.
.. note::
This method is non-blocking. Use `Future()` objects
returned by `publish()` to make sure all publish
requests completed, either in success or error.
Raises:
RuntimeError:
If called after publisher has been stopped by a `stop()` method
call.
"""
with self._batch_lock:
if self._is_stopped:
raise RuntimeError("Cannot stop a publisher already stopped.")
self._is_stopped = True
for sequencer in self._sequencers.values():
sequencer.stop()
# Used only for testing.
def _set_batch(
self, topic: str, batch: "_batch.thread.Batch", ordering_key: str = ""
) -> None:
sequencer = self._get_or_create_sequencer(topic, ordering_key)
sequencer._set_batch(batch)
# Used only for testing.
def _set_batch_class(self, batch_class: Type) -> None:
self._batch_class = batch_class
# Used only for testing.
def _set_sequencer(
self, topic: str, sequencer: SequencerType, ordering_key: str = ""
) -> None:
sequencer_key = (topic, ordering_key)
self._sequencers[sequencer_key] = sequencer

View File

@@ -0,0 +1,51 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from google.api_core.exceptions import GoogleAPICallError
from google.cloud.pubsub_v1.exceptions import TimeoutError
class PublishError(GoogleAPICallError):
pass
class MessageTooLargeError(ValueError):
"""Attempt to publish a message that would exceed the server max size limit."""
class PublishToPausedOrderingKeyException(Exception):
"""Publish attempted to paused ordering key. To resume publishing, call
the resumePublish method on the publisher Client object with this
ordering key. Ordering keys are paused if an unrecoverable error
occurred during publish of a batch for that key.
"""
def __init__(self, ordering_key: str):
self.ordering_key = ordering_key
super(PublishToPausedOrderingKeyException, self).__init__()
class FlowControlLimitError(Exception):
"""An action resulted in exceeding the flow control limits."""
__all__ = (
"FlowControlLimitError",
"MessageTooLargeError",
"PublishError",
"TimeoutError",
"PublishToPausedOrderingKeyException",
)

View File

@@ -0,0 +1,313 @@
# Copyright 2020, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
import logging
import threading
from typing import Dict, Optional, Type
import warnings
from google.cloud.pubsub_v1 import types
from google.cloud.pubsub_v1.publisher import exceptions
_LOGGER = logging.getLogger(__name__)
MessageType = Type[types.PubsubMessage] # type: ignore
class _QuantityReservation:
"""A (partial) reservation of quantifiable resources."""
def __init__(self, bytes_reserved: int, bytes_needed: int, has_slot: bool):
self.bytes_reserved = bytes_reserved
self.bytes_needed = bytes_needed
self.has_slot = has_slot
def __repr__(self):
return (
f"{type(self).__name__}("
f"bytes_reserved={self.bytes_reserved}, "
f"bytes_needed={self.bytes_needed}, "
f"has_slot={self.has_slot})"
)
class FlowController(object):
"""A class used to control the flow of messages passing through it.
Args:
settings: Desired flow control configuration.
"""
def __init__(self, settings: types.PublishFlowControl):
self._settings = settings
# Load statistics. They represent the number of messages added, but not
# yet released (and their total size).
self._message_count = 0
self._total_bytes = 0
# A FIFO queue of threads blocked on adding a message that also tracks their
# reservations of available flow control bytes and message slots.
# Only relevant if the configured limit exceeded behavior is BLOCK.
self._waiting: Dict[threading.Thread, _QuantityReservation] = OrderedDict()
self._reserved_bytes = 0
self._reserved_slots = 0
# The lock is used to protect all internal state (message and byte count,
# waiting threads to add, etc.).
self._operational_lock = threading.Lock()
# The condition for blocking the flow if capacity is exceeded.
self._has_capacity = threading.Condition(lock=self._operational_lock)
def add(self, message: MessageType) -> None:
"""Add a message to flow control.
Adding a message updates the internal load statistics, and an action is
taken if these limits are exceeded (depending on the flow control settings).
Args:
message:
The message entering the flow control.
Raises:
:exception:`~pubsub_v1.publisher.exceptions.FlowControlLimitError`:
Raised when the desired action is
:attr:`~google.cloud.pubsub_v1.types.LimitExceededBehavior.ERROR` and
the message would exceed flow control limits, or when the desired action
is :attr:`~google.cloud.pubsub_v1.types.LimitExceededBehavior.BLOCK` and
the message would block forever against the flow control limits.
"""
if self._settings.limit_exceeded_behavior == types.LimitExceededBehavior.IGNORE:
return
with self._operational_lock:
if not self._would_overflow(message):
self._message_count += 1
self._total_bytes += message._pb.ByteSize()
return
# Adding a message would overflow, react.
if (
self._settings.limit_exceeded_behavior
== types.LimitExceededBehavior.ERROR
):
# Raising an error means rejecting a message, thus we do not
# add anything to the existing load, but we do report the would-be
# load if we accepted the message.
load_info = self._load_info(
message_count=self._message_count + 1,
total_bytes=self._total_bytes + message._pb.ByteSize(),
)
error_msg = "Flow control limits would be exceeded - {}.".format(
load_info
)
raise exceptions.FlowControlLimitError(error_msg)
assert (
self._settings.limit_exceeded_behavior
== types.LimitExceededBehavior.BLOCK
)
# Sanity check - if a message exceeds total flow control limits all
# by itself, it would block forever, thus raise error.
if (
message._pb.ByteSize() > self._settings.byte_limit
or self._settings.message_limit < 1
):
load_info = self._load_info(
message_count=1, total_bytes=message._pb.ByteSize()
)
error_msg = (
"Total flow control limits too low for the message, "
"would block forever - {}.".format(load_info)
)
raise exceptions.FlowControlLimitError(error_msg)
current_thread = threading.current_thread()
while self._would_overflow(message):
if current_thread not in self._waiting:
reservation = _QuantityReservation(
bytes_reserved=0,
bytes_needed=message._pb.ByteSize(),
has_slot=False,
)
self._waiting[current_thread] = reservation # Will be placed last.
_LOGGER.debug(
"Blocking until there is enough free capacity in the flow - "
"{}.".format(self._load_info())
)
self._has_capacity.wait()
_LOGGER.debug(
"Woke up from waiting on free capacity in the flow - "
"{}.".format(self._load_info())
)
# Message accepted, increase the load and remove thread stats.
self._message_count += 1
self._total_bytes += message._pb.ByteSize()
self._reserved_bytes -= self._waiting[current_thread].bytes_reserved
self._reserved_slots -= 1
del self._waiting[current_thread]
def release(self, message: MessageType) -> None:
"""Release a mesage from flow control.
Args:
message:
The message entering the flow control.
"""
if self._settings.limit_exceeded_behavior == types.LimitExceededBehavior.IGNORE:
return
with self._operational_lock:
# Releasing a message decreases the load.
self._message_count -= 1
self._total_bytes -= message._pb.ByteSize()
if self._message_count < 0 or self._total_bytes < 0:
warnings.warn(
"Releasing a message that was never added or already released.",
category=RuntimeWarning,
stacklevel=2,
)
self._message_count = max(0, self._message_count)
self._total_bytes = max(0, self._total_bytes)
self._distribute_available_capacity()
# If at least one thread waiting to add() can be unblocked, wake them up.
if self._ready_to_unblock():
_LOGGER.debug("Notifying threads waiting to add messages to flow.")
self._has_capacity.notify_all()
def _distribute_available_capacity(self) -> None:
"""Distribute available capacity among the waiting threads in FIFO order.
The method assumes that the caller has obtained ``_operational_lock``.
"""
available_slots = (
self._settings.message_limit - self._message_count - self._reserved_slots
)
available_bytes = (
self._settings.byte_limit - self._total_bytes - self._reserved_bytes
)
for reservation in self._waiting.values():
if available_slots <= 0 and available_bytes <= 0:
break # Santa is now empty-handed, better luck next time.
# Distribute any free slots.
if available_slots > 0 and not reservation.has_slot:
reservation.has_slot = True
self._reserved_slots += 1
available_slots -= 1
# Distribute any free bytes.
if available_bytes <= 0:
continue
bytes_still_needed = reservation.bytes_needed - reservation.bytes_reserved
if bytes_still_needed < 0: # Sanity check for any internal inconsistencies.
msg = "Too many bytes reserved: {} / {}".format(
reservation.bytes_reserved, reservation.bytes_needed
)
warnings.warn(msg, category=RuntimeWarning)
bytes_still_needed = 0
can_give = min(bytes_still_needed, available_bytes)
reservation.bytes_reserved += can_give
self._reserved_bytes += can_give
available_bytes -= can_give
def _ready_to_unblock(self) -> bool:
"""Determine if any of the threads waiting to add a message can proceed.
The method assumes that the caller has obtained ``_operational_lock``.
"""
if self._waiting:
# It's enough to only check the head of the queue, because FIFO
# distribution of any free capacity.
first_reservation = next(iter(self._waiting.values()))
return (
first_reservation.bytes_reserved >= first_reservation.bytes_needed
and first_reservation.has_slot
)
return False
def _would_overflow(self, message: MessageType) -> bool:
"""Determine if accepting a message would exceed flow control limits.
The method assumes that the caller has obtained ``_operational_lock``.
Args:
message: The message entering the flow control.
"""
reservation = self._waiting.get(threading.current_thread())
if reservation:
enough_reserved = reservation.bytes_reserved >= reservation.bytes_needed
has_slot = reservation.has_slot
else:
enough_reserved = False
has_slot = False
bytes_taken = self._total_bytes + self._reserved_bytes + message._pb.ByteSize()
size_overflow = bytes_taken > self._settings.byte_limit and not enough_reserved
msg_count_overflow = not has_slot and (
(self._message_count + self._reserved_slots + 1)
> self._settings.message_limit
)
return size_overflow or msg_count_overflow
def _load_info(
self, message_count: Optional[int] = None, total_bytes: Optional[int] = None
) -> str:
"""Return the current flow control load information.
The caller can optionally adjust some of the values to fit its reporting
needs.
The method assumes that the caller has obtained ``_operational_lock``.
Args:
message_count:
The value to override the current message count with.
total_bytes:
The value to override the current total bytes with.
"""
if message_count is None:
message_count = self._message_count
if total_bytes is None:
total_bytes = self._total_bytes
return (
f"messages: {message_count} / {self._settings.message_limit} "
f"(reserved: {self._reserved_slots}), "
f"bytes: {total_bytes} / {self._settings.byte_limit} "
f"(reserved: {self._reserved_bytes})"
)

View File

@@ -0,0 +1,83 @@
# Copyright 2019, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import typing
from typing import Any, Callable, Union
from google.cloud.pubsub_v1 import futures
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud import pubsub_v1
class Future(futures.Future):
"""This future object is returned from asychronous Pub/Sub publishing
calls.
Calling :meth:`result` will resolve the future by returning the message
ID, unless an error occurs.
"""
def cancel(self) -> bool:
"""Actions in Pub/Sub generally may not be canceled.
This method always returns ``False``.
"""
return False
def cancelled(self) -> bool:
"""Actions in Pub/Sub generally may not be canceled.
This method always returns ``False``.
"""
return False
def result(self, timeout: Union[int, float] = None) -> str:
"""Return the message ID or raise an exception.
This blocks until the message has been published successfully and
returns the message ID unless an exception is raised.
Args:
timeout: The number of seconds before this call
times out and raises TimeoutError.
Returns:
The message ID.
Raises:
concurrent.futures.TimeoutError: If the request times out.
Exception: For undefined exceptions in the underlying
call execution.
"""
return super().result(timeout=timeout)
# This exists to make the type checkers happy.
def add_done_callback(
self, callback: Callable[["pubsub_v1.publisher.futures.Future"], Any]
) -> None:
"""Attach a callable that will be called when the future finishes.
Args:
callback:
A callable that will be called with this future as its only
argument when the future completes or is cancelled. The callable
will always be called by a thread in the same process in which
it was added. If the future has already completed or been
cancelled then the callable will be called immediately. These
callables are called in the order that they were added.
"""
return super().add_done_callback(callback) # type: ignore

View File

@@ -0,0 +1,20 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from google.cloud.pubsub_v1.subscriber.client import Client
__all__ = ("Client",)

View File

@@ -0,0 +1,422 @@
# Copyright 2017, Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
import functools
import itertools
import logging
import math
import time
import threading
import typing
from typing import List, Optional, Sequence, Union
import warnings
from google.api_core.retry import exponential_sleep_generator
from google.cloud.pubsub_v1.subscriber._protocol import helper_threads
from google.cloud.pubsub_v1.subscriber._protocol import requests
from google.cloud.pubsub_v1.subscriber.exceptions import (
AcknowledgeStatus,
)
if typing.TYPE_CHECKING: # pragma: NO COVER
import queue
from google.cloud.pubsub_v1.subscriber._protocol.streaming_pull_manager import (
StreamingPullManager,
)
RequestItem = Union[
requests.AckRequest,
requests.DropRequest,
requests.LeaseRequest,
requests.ModAckRequest,
requests.NackRequest,
]
_LOGGER = logging.getLogger(__name__)
_CALLBACK_WORKER_NAME = "Thread-CallbackRequestDispatcher"
_MAX_BATCH_SIZE = 100
"""The maximum number of requests to process and dispatch at a time."""
_MAX_BATCH_LATENCY = 0.01
"""The maximum amount of time in seconds to wait for additional request items
before processing the next batch of requests."""
_ACK_IDS_BATCH_SIZE = 1000
"""The maximum number of ACK IDs to send in a single StreamingPullRequest.
"""
_MIN_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS = 1
"""The time to wait for the first retry of failed acks and modacks when exactly-once
delivery is enabled."""
_MAX_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS = 10 * 60
"""The maximum amount of time in seconds to retry failed acks and modacks when
exactly-once delivery is enabled."""
class Dispatcher(object):
def __init__(self, manager: "StreamingPullManager", queue: "queue.Queue"):
self._manager = manager
self._queue = queue
self._thread: Optional[threading.Thread] = None
self._operational_lock = threading.Lock()
def start(self) -> None:
"""Start a thread to dispatch requests queued up by callbacks.
Spawns a thread to run :meth:`dispatch_callback`.
"""
with self._operational_lock:
if self._thread is not None:
raise ValueError("Dispatcher is already running.")
worker = helper_threads.QueueCallbackWorker(
self._queue,
self.dispatch_callback,
max_items=_MAX_BATCH_SIZE,
max_latency=_MAX_BATCH_LATENCY,
)
# Create and start the helper thread.
thread = threading.Thread(name=_CALLBACK_WORKER_NAME, target=worker)
thread.daemon = True
thread.start()
_LOGGER.debug("Started helper thread %s", thread.name)
self._thread = thread
def stop(self) -> None:
with self._operational_lock:
if self._thread is not None:
# Signal the worker to stop by queueing a "poison pill"
self._queue.put(helper_threads.STOP)
self._thread.join()
self._thread = None
def dispatch_callback(self, items: Sequence[RequestItem]) -> None:
"""Map the callback request to the appropriate gRPC request.
Args:
items:
Queued requests to dispatch.
"""
lease_requests: List[requests.LeaseRequest] = []
modack_requests: List[requests.ModAckRequest] = []
ack_requests: List[requests.AckRequest] = []
nack_requests: List[requests.NackRequest] = []
drop_requests: List[requests.DropRequest] = []
lease_ids = set()
modack_ids = set()
ack_ids = set()
nack_ids = set()
drop_ids = set()
exactly_once_delivery_enabled = self._manager._exactly_once_delivery_enabled()
for item in items:
if isinstance(item, requests.LeaseRequest):
if (
item.ack_id not in lease_ids
): # LeaseRequests have no futures to handle.
lease_ids.add(item.ack_id)
lease_requests.append(item)
elif isinstance(item, requests.ModAckRequest):
if item.ack_id in modack_ids:
self._handle_duplicate_request_future(
exactly_once_delivery_enabled, item
)
else:
modack_ids.add(item.ack_id)
modack_requests.append(item)
elif isinstance(item, requests.AckRequest):
if item.ack_id in ack_ids:
self._handle_duplicate_request_future(
exactly_once_delivery_enabled, item
)
else:
ack_ids.add(item.ack_id)
ack_requests.append(item)
elif isinstance(item, requests.NackRequest):
if item.ack_id in nack_ids:
self._handle_duplicate_request_future(
exactly_once_delivery_enabled, item
)
else:
nack_ids.add(item.ack_id)
nack_requests.append(item)
elif isinstance(item, requests.DropRequest):
if (
item.ack_id not in drop_ids
): # DropRequests have no futures to handle.
drop_ids.add(item.ack_id)
drop_requests.append(item)
else:
warnings.warn(
f'Skipping unknown request item of type "{type(item)}"',
category=RuntimeWarning,
)
_LOGGER.debug("Handling %d batched requests", len(items))
if lease_requests:
self.lease(lease_requests)
if modack_requests:
self.modify_ack_deadline(modack_requests)
# Note: Drop and ack *must* be after lease. It's possible to get both
# the lease and the ack/drop request in the same batch.
if ack_requests:
self.ack(ack_requests)
if nack_requests:
self.nack(nack_requests)
if drop_requests:
self.drop(drop_requests)
def _handle_duplicate_request_future(
self,
exactly_once_delivery_enabled: bool,
item: Union[requests.AckRequest, requests.ModAckRequest, requests.NackRequest],
) -> None:
_LOGGER.debug(
"This is a duplicate %s with the same ack_id: %s.",
type(item),
item.ack_id,
)
if item.future:
if exactly_once_delivery_enabled:
item.future.set_exception(
ValueError(f"Duplicate ack_id for {type(item)}")
)
# Futures may be present even with exactly-once delivery
# disabled, in transition periods after the setting is changed on
# the subscription.
else:
# When exactly-once delivery is NOT enabled, acks/modacks are considered
# best-effort, so the future should succeed even though this is a duplicate.
item.future.set_result(AcknowledgeStatus.SUCCESS)
def ack(self, items: Sequence[requests.AckRequest]) -> None:
"""Acknowledge the given messages.
Args:
items: The items to acknowledge.
"""
# If we got timing information, add it to the histogram.
for item in items:
time_to_ack = item.time_to_ack
if time_to_ack is not None:
self._manager.ack_histogram.add(time_to_ack)
# We must potentially split the request into multiple smaller requests
# to avoid the server-side max request size limit.
items_gen = iter(items)
ack_ids_gen = (item.ack_id for item in items)
total_chunks = int(math.ceil(len(items) / _ACK_IDS_BATCH_SIZE))
for _ in range(total_chunks):
ack_reqs_dict = {
req.ack_id: req
for req in itertools.islice(items_gen, _ACK_IDS_BATCH_SIZE)
}
requests_completed, requests_to_retry = self._manager.send_unary_ack(
ack_ids=list(itertools.islice(ack_ids_gen, _ACK_IDS_BATCH_SIZE)),
ack_reqs_dict=ack_reqs_dict,
)
# Remove the completed messages from lease management.
self.drop(requests_completed)
# Retry on a separate thread so the dispatcher thread isn't blocked
# by sleeps.
if requests_to_retry:
self._start_retry_thread(
"Thread-RetryAcks",
functools.partial(self._retry_acks, requests_to_retry),
)
def _start_retry_thread(self, thread_name, thread_target):
# note: if the thread is *not* a daemon, a memory leak exists due to a cpython issue.
# https://github.com/googleapis/python-pubsub/issues/395#issuecomment-829910303
# https://github.com/googleapis/python-pubsub/issues/395#issuecomment-830092418
retry_thread = threading.Thread(
name=thread_name,
target=thread_target,
daemon=True,
)
# The thread finishes when the requests succeed or eventually fail with
# a back-end timeout error or other permanent failure.
retry_thread.start()
def _retry_acks(self, requests_to_retry):
retry_delay_gen = exponential_sleep_generator(
initial=_MIN_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS,
maximum=_MAX_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS,
)
while requests_to_retry:
time_to_wait = next(retry_delay_gen)
_LOGGER.debug(
"Retrying {len(requests_to_retry)} ack(s) after delay of "
+ str(time_to_wait)
+ " seconds"
)
time.sleep(time_to_wait)
ack_reqs_dict = {req.ack_id: req for req in requests_to_retry}
requests_completed, requests_to_retry = self._manager.send_unary_ack(
ack_ids=[req.ack_id for req in requests_to_retry],
ack_reqs_dict=ack_reqs_dict,
)
assert (
len(requests_to_retry) <= _ACK_IDS_BATCH_SIZE
), "Too many requests to be retried."
# Remove the completed messages from lease management.
self.drop(requests_completed)
def drop(
self,
items: Sequence[
Union[requests.AckRequest, requests.DropRequest, requests.NackRequest]
],
) -> None:
"""Remove the given messages from lease management.
Args:
items: The items to drop.
"""
assert self._manager.leaser is not None
self._manager.leaser.remove(items)
ordering_keys = (k.ordering_key for k in items if k.ordering_key)
self._manager.activate_ordering_keys(ordering_keys)
self._manager.maybe_resume_consumer()
def lease(self, items: Sequence[requests.LeaseRequest]) -> None:
"""Add the given messages to lease management.
Args:
items: The items to lease.
"""
assert self._manager.leaser is not None
self._manager.leaser.add(items)
self._manager.maybe_pause_consumer()
def modify_ack_deadline(
self,
items: Sequence[requests.ModAckRequest],
default_deadline: Optional[float] = None,
) -> None:
"""Modify the ack deadline for the given messages.
Args:
items: The items to modify.
"""
# We must potentially split the request into multiple smaller requests
# to avoid the server-side max request size limit.
items_gen = iter(items)
ack_ids_gen = (item.ack_id for item in items)
deadline_seconds_gen = (item.seconds for item in items)
total_chunks = int(math.ceil(len(items) / _ACK_IDS_BATCH_SIZE))
for _ in range(total_chunks):
ack_reqs_dict = {
req.ack_id: req
for req in itertools.islice(items_gen, _ACK_IDS_BATCH_SIZE)
}
requests_to_retry: List[requests.ModAckRequest]
if default_deadline is None:
# no further work needs to be done for `requests_to_retry`
_, requests_to_retry = self._manager.send_unary_modack(
modify_deadline_ack_ids=list(
itertools.islice(ack_ids_gen, _ACK_IDS_BATCH_SIZE)
),
modify_deadline_seconds=list(
itertools.islice(deadline_seconds_gen, _ACK_IDS_BATCH_SIZE)
),
ack_reqs_dict=ack_reqs_dict,
default_deadline=None,
)
else:
_, requests_to_retry = self._manager.send_unary_modack(
modify_deadline_ack_ids=itertools.islice(
ack_ids_gen, _ACK_IDS_BATCH_SIZE
),
modify_deadline_seconds=None,
ack_reqs_dict=ack_reqs_dict,
default_deadline=default_deadline,
)
assert (
len(requests_to_retry) <= _ACK_IDS_BATCH_SIZE
), "Too many requests to be retried."
# Retry on a separate thread so the dispatcher thread isn't blocked
# by sleeps.
if requests_to_retry:
self._start_retry_thread(
"Thread-RetryModAcks",
functools.partial(self._retry_modacks, requests_to_retry),
)
def _retry_modacks(self, requests_to_retry):
retry_delay_gen = exponential_sleep_generator(
initial=_MIN_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS,
maximum=_MAX_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS,
)
while requests_to_retry:
time_to_wait = next(retry_delay_gen)
_LOGGER.debug(
"Retrying {len(requests_to_retry)} modack(s) after delay of "
+ str(time_to_wait)
+ " seconds"
)
time.sleep(time_to_wait)
ack_reqs_dict = {req.ack_id: req for req in requests_to_retry}
requests_completed, requests_to_retry = self._manager.send_unary_modack(
modify_deadline_ack_ids=[req.ack_id for req in requests_to_retry],
modify_deadline_seconds=[req.seconds for req in requests_to_retry],
ack_reqs_dict=ack_reqs_dict,
)
def nack(self, items: Sequence[requests.NackRequest]) -> None:
"""Explicitly deny receipt of messages.
Args:
items: The items to deny.
"""
self.modify_ack_deadline(
[
requests.ModAckRequest(
ack_id=item.ack_id, seconds=0, future=item.future
)
for item in items
]
)
self.drop(
[
requests.DropRequest(
ack_id=item.ack_id,
byte_size=item.byte_size,
ordering_key=item.ordering_key,
)
for item in items
]
)

View File

@@ -0,0 +1,77 @@
# Copyright 2018, Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import logging
import threading
import typing
from typing import Optional
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.pubsub_v1.subscriber._protocol.streaming_pull_manager import (
StreamingPullManager,
)
_LOGGER = logging.getLogger(__name__)
_HEARTBEAT_WORKER_NAME = "Thread-Heartbeater"
# How often to send heartbeats in seconds. Determined as half the period of
# time where the Pub/Sub server will close the stream as inactive, which is
# 60 seconds.
_DEFAULT_PERIOD = 30
class Heartbeater(object):
def __init__(self, manager: "StreamingPullManager", period: int = _DEFAULT_PERIOD):
self._thread: Optional[threading.Thread] = None
self._operational_lock = threading.Lock()
self._manager = manager
self._stop_event = threading.Event()
self._period = period
def heartbeat(self) -> None:
"""Periodically send streaming pull heartbeats."""
while not self._stop_event.is_set():
if self._manager.heartbeat():
_LOGGER.debug("Sent heartbeat.")
self._stop_event.wait(timeout=self._period)
_LOGGER.debug("%s exiting.", _HEARTBEAT_WORKER_NAME)
def start(self) -> None:
with self._operational_lock:
if self._thread is not None:
raise ValueError("Heartbeater is already running.")
# Create and start the helper thread.
self._stop_event.clear()
thread = threading.Thread(
name=_HEARTBEAT_WORKER_NAME, target=self.heartbeat
)
thread.daemon = True
thread.start()
_LOGGER.debug("Started helper thread %s", thread.name)
self._thread = thread
def stop(self) -> None:
with self._operational_lock:
self._stop_event.set()
if self._thread is not None:
# The thread should automatically exit when the consumer is
# inactive.
self._thread.join()
self._thread = None

View File

@@ -0,0 +1,122 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import queue
import time
from typing import Any, Callable, List, Sequence
import uuid
__all__ = ("QueueCallbackWorker", "STOP")
_LOGGER = logging.getLogger(__name__)
# Helper thread stop indicator. This could be a sentinel object or None,
# but the sentinel object's ID can change if the process is forked, and
# None has the possibility of a user accidentally killing the helper
# thread.
STOP = uuid.uuid4()
def _get_many(
queue_: queue.Queue, max_items: int = None, max_latency: float = 0
) -> List[Any]:
"""Get multiple items from a Queue.
Gets at least one (blocking) and at most ``max_items`` items
(non-blocking) from a given Queue. Does not mark the items as done.
Args:
queue_: The Queue to get items from.
max_items:
The maximum number of items to get. If ``None``, then all available items
in the queue are returned.
max_latency:
The maximum number of seconds to wait for more than one item from a queue.
This number includes the time required to retrieve the first item.
Returns:
A sequence of items retrieved from the queue.
"""
start = time.time()
# Always return at least one item.
items = [queue_.get()]
while max_items is None or len(items) < max_items:
try:
elapsed = time.time() - start
timeout = max(0, max_latency - elapsed)
items.append(queue_.get(timeout=timeout))
except queue.Empty:
break
return items
class QueueCallbackWorker(object):
"""A helper that executes a callback for items sent in a queue.
Calls a blocking ``get()`` on the ``queue`` until it encounters
:attr:`STOP`.
Args:
queue:
A Queue instance, appropriate for crossing the concurrency boundary
implemented by ``executor``. Items will be popped off (with a blocking
``get()``) until :attr:`STOP` is encountered.
callback:
A callback that can process items pulled off of the queue. Multiple items
will be passed to the callback in batches.
max_items:
The maximum amount of items that will be passed to the callback at a time.
max_latency:
The maximum amount of time in seconds to wait for additional items before
executing the callback.
"""
def __init__(
self,
queue: queue.Queue,
callback: Callable[[Sequence[Any]], Any],
max_items: int = 100,
max_latency: float = 0,
):
self.queue = queue
self._callback = callback
self.max_items = max_items
self.max_latency = max_latency
def __call__(self) -> None:
continue_ = True
while continue_:
items = _get_many(
self.queue, max_items=self.max_items, max_latency=self.max_latency
)
# If stop is in the items, process all items up to STOP and then
# exit.
try:
items = items[: items.index(STOP)]
continue_ = False
except ValueError:
pass
# Run the callback. If any exceptions occur, log them and
# continue.
try:
self._callback(items)
except Exception as exc:
_LOGGER.exception("Error in queue callback worker: %s", exc)
_LOGGER.debug("Exiting the QueueCallbackWorker.")

View File

@@ -0,0 +1,159 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, Optional, Union
MIN_ACK_DEADLINE = 10
MAX_ACK_DEADLINE = 600
class Histogram(object):
"""Representation of a single histogram.
The purpose of this class is to store actual ack timing information
in order to predict how long to renew leases.
The default implementation uses the 99th percentile of previous ack
times to implicitly lease messages; however, custom
:class:`~.pubsub_v1.subscriber._consumer.Consumer` subclasses
are free to use a different formula.
The precision of data stored is to the nearest integer. Additionally,
values outside the range of ``MIN_ACK_DEADLINE <= x <= MAX_ACK_DEADLINE`` are stored
as ``MIN_ACK_DEADLINE`` or ``MAX_ACK_DEADLINE``, since these are the boundaries of
leases in the actual API.
"""
def __init__(self, data: Optional[Dict[int, int]] = None):
"""Instantiate the histogram.
Args:
data:
The data strucure to be used to store the underlying data. The default
is an empty dictionary. This can be set to a dictionary-like object if
required (for example, if a special object is needed for concurrency
reasons).
"""
# The data is stored as a dictionary, with the keys being the
# value being added and the values being the number of times that
# value was added to the dictionary.
#
# This is depending on the Python interpreter's implicit ordering
# of dictionaries, which is a bitwise sort by the key's ``hash()``
# value. Because ``hash(int i) -> i`` and all of our keys are
# positive integers (negatives would be a problem because the sort
# is bitwise), we can rely on this.
if data is None:
data = {}
self._data = data
self._len = 0
def __len__(self) -> int:
"""Return the total number of data points in this histogram.
This is cached on a separate counter (rather than computing it using
``sum([v for v in self._data.values()])``) to optimize lookup.
Returns:
The total number of data points in this histogram.
"""
return self._len
def __contains__(self, needle: int) -> bool:
"""Return ``True`` if needle is present in the histogram, ``False`` otherwise."""
return needle in self._data
def __repr__(self):
return "<Histogram: {len} values between {min} and {max}>".format(
len=len(self), max=self.max, min=self.min
)
@property
def max(self) -> int:
"""Return the maximum value in this histogram.
If there are no values in the histogram at all, return ``MAX_ACK_DEADLINE``.
Returns:
The maximum value in the histogram.
"""
if len(self._data) == 0:
return MAX_ACK_DEADLINE
return next(iter(reversed(sorted(self._data.keys()))))
@property
def min(self) -> int:
"""Return the minimum value in this histogram.
If there are no values in the histogram at all, return ``MIN_ACK_DEADLINE``.
Returns:
The minimum value in the histogram.
"""
if len(self._data) == 0:
return MIN_ACK_DEADLINE
return next(iter(sorted(self._data.keys())))
def add(self, value: Union[int, float]) -> None:
"""Add the value to this histogram.
Args:
value:
The value. Values outside of
``MIN_ACK_DEADLINE <= x <= MAX_ACK_DEADLINE``
will be raised to ``MIN_ACK_DEADLINE`` or reduced to
``MAX_ACK_DEADLINE``.
"""
# If the value is out of bounds, bring it in bounds.
value = int(value)
if value < MIN_ACK_DEADLINE:
value = MIN_ACK_DEADLINE
elif value > MAX_ACK_DEADLINE:
value = MAX_ACK_DEADLINE
# Add the value to the histogram's data dictionary.
self._data.setdefault(value, 0)
self._data[value] += 1
self._len += 1
def percentile(self, percent: Union[int, float]) -> int:
"""Return the value that is the Nth precentile in the histogram.
Args:
percent:
The precentile being sought. The default consumer implementations
consistently use ``99``.
Returns:
The value corresponding to the requested percentile.
"""
# Sanity check: Any value over 100 should become 100.
if percent >= 100:
percent = 100
# Determine the actual target number.
target = len(self) - len(self) * (percent / 100)
# Iterate over the values in reverse, dropping the target by the
# number of times each value has been seen. When the target passes
# 0, return the value we are currently viewing.
for k in reversed(sorted(self._data.keys())):
target -= self._data[k]
if target < 0:
return k
# The only way to get here is if there was no data.
# In this case, just return the shortest possible deadline.
return MIN_ACK_DEADLINE

View File

@@ -0,0 +1,262 @@
# Copyright 2017, Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import copy
import logging
import random
import threading
import time
import typing
from typing import Dict, Iterable, Optional, Union
from google.cloud.pubsub_v1.subscriber._protocol.dispatcher import _MAX_BATCH_LATENCY
try:
from collections.abc import KeysView
KeysView[None] # KeysView is only subscriptable in Python 3.9+
except TypeError:
# Deprecated since Python 3.9, thus only use as a fallback in older Python versions
from typing import KeysView
from google.cloud.pubsub_v1.subscriber._protocol import requests
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.pubsub_v1.subscriber._protocol.streaming_pull_manager import (
StreamingPullManager,
)
_LOGGER = logging.getLogger(__name__)
_LEASE_WORKER_NAME = "Thread-LeaseMaintainer"
class _LeasedMessage(typing.NamedTuple):
sent_time: float
"""The local time when ACK ID was initially leased in seconds since the epoch."""
size: int
ordering_key: Optional[str]
class Leaser(object):
def __init__(self, manager: "StreamingPullManager"):
self._thread: Optional[threading.Thread] = None
self._manager = manager
# a lock used for start/stop operations, protecting the _thread attribute
self._operational_lock = threading.Lock()
# A lock ensuring that add/remove operations are atomic and cannot be
# intertwined. Protects the _leased_messages and _bytes attributes.
self._add_remove_lock = threading.Lock()
# Dict of ack_id -> _LeasedMessage
self._leased_messages: Dict[str, _LeasedMessage] = {}
self._bytes = 0
"""The total number of bytes consumed by leased messages."""
self._stop_event = threading.Event()
@property
def message_count(self) -> int:
"""The number of leased messages."""
return len(self._leased_messages)
@property
def ack_ids(self) -> KeysView[str]:
"""The ack IDs of all leased messages."""
return self._leased_messages.keys()
@property
def bytes(self) -> int:
"""The total size, in bytes, of all leased messages."""
return self._bytes
def add(self, items: Iterable[requests.LeaseRequest]) -> None:
"""Add messages to be managed by the leaser."""
with self._add_remove_lock:
for item in items:
# Add the ack ID to the set of managed ack IDs, and increment
# the size counter.
if item.ack_id not in self._leased_messages:
self._leased_messages[item.ack_id] = _LeasedMessage(
sent_time=float("inf"),
size=item.byte_size,
ordering_key=item.ordering_key,
)
self._bytes += item.byte_size
else:
_LOGGER.debug("Message %s is already lease managed", item.ack_id)
def start_lease_expiry_timer(self, ack_ids: Iterable[str]) -> None:
"""Start the lease expiry timer for `items`.
Args:
items: Sequence of ack-ids for which to start lease expiry timers.
"""
with self._add_remove_lock:
for ack_id in ack_ids:
lease_info = self._leased_messages.get(ack_id)
# Lease info might not exist for this ack_id because it has already
# been removed by remove().
if lease_info:
self._leased_messages[ack_id] = lease_info._replace(
sent_time=time.time()
)
def remove(
self,
items: Iterable[
Union[requests.AckRequest, requests.DropRequest, requests.NackRequest]
],
) -> None:
"""Remove messages from lease management."""
with self._add_remove_lock:
# Remove the ack ID from lease management, and decrement the
# byte counter.
for item in items:
if self._leased_messages.pop(item.ack_id, None) is not None:
self._bytes -= item.byte_size
else:
_LOGGER.debug("Item %s was not managed.", item.ack_id)
if self._bytes < 0:
_LOGGER.debug("Bytes was unexpectedly negative: %d", self._bytes)
self._bytes = 0
def maintain_leases(self) -> None:
"""Maintain all of the leases being managed.
This method modifies the ack deadline for all of the managed
ack IDs, then waits for most of that time (but with jitter), and
repeats.
"""
while not self._stop_event.is_set():
# Determine the appropriate duration for the lease. This is
# based off of how long previous messages have taken to ack, with
# a sensible default and within the ranges allowed by Pub/Sub.
# Also update the deadline currently used if enough new ACK data has been
# gathered since the last deadline update.
deadline = self._manager._obtain_ack_deadline(maybe_update=True)
_LOGGER.debug("The current deadline value is %d seconds.", deadline)
# Make a copy of the leased messages. This is needed because it's
# possible for another thread to modify the dictionary while
# we're iterating over it.
leased_messages = copy.copy(self._leased_messages)
# Drop any leases that are beyond the max lease time. This ensures
# that in the event of a badly behaving actor, we can drop messages
# and allow the Pub/Sub server to resend them.
cutoff = time.time() - self._manager.flow_control.max_lease_duration
to_drop = [
requests.DropRequest(ack_id, item.size, item.ordering_key)
for ack_id, item in leased_messages.items()
if item.sent_time < cutoff
]
if to_drop:
_LOGGER.warning(
"Dropping %s items because they were leased too long.", len(to_drop)
)
assert self._manager.dispatcher is not None
self._manager.dispatcher.drop(to_drop)
# Remove dropped items from our copy of the leased messages (they
# have already been removed from the real one by
# self._manager.drop(), which calls self.remove()).
for item in to_drop:
leased_messages.pop(item.ack_id)
# Create a modack request.
# We do not actually call `modify_ack_deadline` over and over
# because it is more efficient to make a single request.
ack_ids = leased_messages.keys()
expired_ack_ids = set()
if ack_ids:
_LOGGER.debug("Renewing lease for %d ack IDs.", len(ack_ids))
# NOTE: This may not work as expected if ``consumer.active``
# has changed since we checked it. An implementation
# without any sort of race condition would require a
# way for ``send_request`` to fail when the consumer
# is inactive.
assert self._manager.dispatcher is not None
ack_id_gen = (ack_id for ack_id in ack_ids)
expired_ack_ids = self._manager._send_lease_modacks(
ack_id_gen, deadline
)
start_time = time.time()
# If exactly once delivery is enabled, we should drop all expired ack_ids from lease management.
if self._manager._exactly_once_delivery_enabled() and len(expired_ack_ids):
assert self._manager.dispatcher is not None
self._manager.dispatcher.drop(
[
requests.DropRequest(
ack_id,
leased_messages.get(ack_id).size, # type: ignore
leased_messages.get(ack_id).ordering_key, # type: ignore
)
for ack_id in expired_ack_ids
if ack_id in leased_messages
]
)
# Now wait an appropriate period of time and do this again.
#
# We determine the appropriate period of time based on a random
# period between:
# minimum: MAX_BATCH_LATENCY (to prevent duplicate modacks being created in one batch)
# maximum: 90% of the deadline
# This maximum time attempts to prevent ack expiration before new lease modacks arrive at the server.
# This use of jitter (http://bit.ly/2s2ekL7) helps decrease contention in cases
# where there are many clients.
# If we spent any time iterating over expired acks, we should subtract this from the deadline.
snooze = random.uniform(
_MAX_BATCH_LATENCY, (deadline * 0.9 - (time.time() - start_time))
)
_LOGGER.debug("Snoozing lease management for %f seconds.", snooze)
self._stop_event.wait(timeout=snooze)
_LOGGER.debug("%s exiting.", _LEASE_WORKER_NAME)
def start(self) -> None:
with self._operational_lock:
if self._thread is not None:
raise ValueError("Leaser is already running.")
# Create and start the helper thread.
self._stop_event.clear()
thread = threading.Thread(
name=_LEASE_WORKER_NAME, target=self.maintain_leases
)
thread.daemon = True
thread.start()
_LOGGER.debug("Started helper thread %s", thread.name)
self._thread = thread
def stop(self) -> None:
with self._operational_lock:
self._stop_event.set()
if self._thread is not None:
# The thread should automatically exit when the consumer is
# inactive.
self._thread.join()
self._thread = None

View File

@@ -0,0 +1,180 @@
# Copyright 2020, Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import logging
import typing
from typing import Any, Callable, Iterable, Optional
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.pubsub_v1 import subscriber
_LOGGER = logging.getLogger(__name__)
class MessagesOnHold(object):
"""Tracks messages on hold by ordering key. Not thread-safe."""
def __init__(self):
self._size = 0
# A FIFO queue for the messages that have been received from the server,
# but not yet sent to the user callback.
# Both ordered and unordered messages may be in this queue. Ordered
# message state tracked in _pending_ordered_messages once ordered
# messages are taken off this queue.
# The tail of the queue is to the right side of the deque; the head is
# to the left side.
self._messages_on_hold = collections.deque()
# Dict of ordering_key -> queue of ordered messages that have not been
# delivered to the user.
# All ordering keys in this collection have a message in flight. Once
# that one is acked or nacked, the next message in the queue for that
# ordering key will be sent.
# If the queue is empty, it means there's a message for that key in
# flight, but there are no pending messages.
self._pending_ordered_messages = {}
@property
def size(self) -> int:
"""Return the number of messages on hold across ordered and unordered messages.
Note that this object may still store information about ordered messages
in flight even if size is zero.
Returns:
The size value.
"""
return self._size
def get(self) -> Optional["subscriber.message.Message"]:
"""Gets a message from the on-hold queue. A message with an ordering
key wont be returned if there's another message with the same key in
flight.
Returns:
A message that hasn't been sent to the user yet or ``None`` if there are no
messages available.
"""
while self._messages_on_hold:
msg = self._messages_on_hold.popleft()
if msg.ordering_key:
pending_queue = self._pending_ordered_messages.get(msg.ordering_key)
if pending_queue is None:
# Create empty queue to indicate a message with the
# ordering key is in flight.
self._pending_ordered_messages[
msg.ordering_key
] = collections.deque()
self._size = self._size - 1
return msg
else:
# Another message is in flight so add message to end of
# queue for this ordering key.
pending_queue.append(msg)
else:
# Unordered messages can be returned without any
# restrictions.
self._size = self._size - 1
return msg
return None
def put(self, message: "subscriber.message.Message") -> None:
"""Put a message on hold.
Args:
message: The message to put on hold.
"""
self._messages_on_hold.append(message)
self._size = self._size + 1
def activate_ordering_keys(
self,
ordering_keys: Iterable[str],
schedule_message_callback: Callable[["subscriber.message.Message"], Any],
) -> None:
"""Send the next message in the queue for each of the passed-in
ordering keys, if they exist. Clean up state for keys that no longer
have any queued messages.
See comment at streaming_pull_manager.activate_ordering_keys() for more
detail about the impact of this method on load.
Args:
ordering_keys:
The ordering keys to activate. May be empty, or contain duplicates.
schedule_message_callback:
The callback to call to schedule a message to be sent to the user.
"""
for key in ordering_keys:
pending_ordered_messages = self._pending_ordered_messages.get(key)
if pending_ordered_messages is None:
_LOGGER.warning(
"No message queue exists for message ordering key: %s.", key
)
continue
next_msg = self._get_next_for_ordering_key(key)
if next_msg:
# Schedule the next message because the previous was dropped.
# Note that this may overload the user's `max_bytes` limit, but
# not their `max_messages` limit.
schedule_message_callback(next_msg)
else:
# No more messages for this ordering key, so do clean-up.
self._clean_up_ordering_key(key)
def _get_next_for_ordering_key(
self, ordering_key: str
) -> Optional["subscriber.message.Message"]:
"""Get next message for ordering key.
The client should call clean_up_ordering_key() if this method returns
None.
Args:
ordering_key: Ordering key for which to get the next message.
Returns:
The next message for this ordering key or None if there aren't any.
"""
queue_for_key = self._pending_ordered_messages.get(ordering_key)
if queue_for_key:
self._size = self._size - 1
return queue_for_key.popleft()
return None
def _clean_up_ordering_key(self, ordering_key: str) -> None:
"""Clean up state for an ordering key with no pending messages.
Args
ordering_key: The ordering key to clean up.
"""
message_queue = self._pending_ordered_messages.get(ordering_key)
if message_queue is None:
_LOGGER.warning(
"Tried to clean up ordering key that does not exist: %s", ordering_key
)
return
if len(message_queue) > 0:
_LOGGER.warning(
"Tried to clean up ordering key: %s with %d messages remaining.",
ordering_key,
len(message_queue),
)
return
del self._pending_ordered_messages[ordering_key]

View File

@@ -0,0 +1,54 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import typing
from typing import NamedTuple, Optional
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.pubsub_v1.subscriber import futures
# Namedtuples for management requests. Used by the Message class to communicate
# items of work back to the policy.
class AckRequest(NamedTuple):
ack_id: str
byte_size: int
time_to_ack: float
ordering_key: Optional[str]
future: Optional["futures.Future"]
class DropRequest(NamedTuple):
ack_id: str
byte_size: int
ordering_key: Optional[str]
class LeaseRequest(NamedTuple):
ack_id: str
byte_size: int
ordering_key: Optional[str]
class ModAckRequest(NamedTuple):
ack_id: str
seconds: float
future: Optional["futures.Future"]
class NackRequest(NamedTuple):
ack_id: str
byte_size: int
ordering_key: Optional[str]
future: Optional["futures.Future"]

View File

@@ -0,0 +1,283 @@
# Copyright 2019, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import os
import typing
from typing import cast, Any, Callable, Optional, Sequence, Union
import warnings
from google.auth.credentials import AnonymousCredentials # type: ignore
from google.oauth2 import service_account # type: ignore
from google.cloud.pubsub_v1 import types
from google.cloud.pubsub_v1.subscriber import futures
from google.cloud.pubsub_v1.subscriber._protocol import streaming_pull_manager
from google.pubsub_v1.services.subscriber import client as subscriber_client
from google.pubsub_v1 import gapic_version as package_version
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.pubsub_v1 import subscriber
from google.pubsub_v1.services.subscriber.transports.grpc import (
SubscriberGrpcTransport,
)
__version__ = package_version.__version__
class Client(subscriber_client.SubscriberClient):
"""A subscriber client for Google Cloud Pub/Sub.
This creates an object that is capable of subscribing to messages.
Generally, you can instantiate this client with no arguments, and you
get sensible defaults.
Args:
kwargs: Any additional arguments provided are sent as keyword
keyword arguments to the underlying
:class:`~google.cloud.pubsub_v1.gapic.subscriber_client.SubscriberClient`.
Generally you should not need to set additional keyword
arguments. Optionally, regional endpoints can be set via
``client_options`` that takes a single key-value pair that
defines the endpoint.
Example:
.. code-block:: python
from google.cloud import pubsub_v1
subscriber_client = pubsub_v1.SubscriberClient(
# Optional
client_options = {
"api_endpoint": REGIONAL_ENDPOINT
}
)
"""
def __init__(self, **kwargs: Any):
# Sanity check: Is our goal to use the emulator?
# If so, create a grpc insecure channel with the emulator host
# as the target.
if os.environ.get("PUBSUB_EMULATOR_HOST"):
kwargs["client_options"] = {
"api_endpoint": os.environ.get("PUBSUB_EMULATOR_HOST")
}
kwargs["credentials"] = AnonymousCredentials()
# Instantiate the underlying GAPIC client.
super().__init__(**kwargs)
self._target = self._transport._host
self._closed = False
@classmethod
def from_service_account_file( # type: ignore[override]
cls, filename: str, **kwargs: Any
) -> "Client":
"""Creates an instance of this client using the provided credentials
file.
Args:
filename: The path to the service account private key json file.
kwargs: Additional arguments to pass to the constructor.
Returns:
A Subscriber :class:`~google.cloud.pubsub_v1.subscriber.client.Client`
instance that is the constructed client.
"""
credentials = service_account.Credentials.from_service_account_file(filename)
kwargs["credentials"] = credentials
return cls(**kwargs)
from_service_account_json = from_service_account_file # type: ignore[assignment]
@property
def target(self) -> str:
"""Return the target (where the API is).
Returns:
The location of the API.
"""
return self._target
@property
def closed(self) -> bool:
"""Return whether the client has been closed and cannot be used anymore.
.. versionadded:: 2.8.0
"""
return self._closed
@property
def api(self):
"""The underlying gapic API client.
.. versionchanged:: 2.10.0
Instead of a GAPIC ``SubscriberClient`` client instance, this property is a
proxy object to it with the same interface.
.. deprecated:: 2.10.0
Use the GAPIC methods and properties on the client instance directly
instead of through the :attr:`api` attribute.
"""
msg = (
'The "api" property only exists for backward compatibility, access its '
'attributes directly thorugh the client instance (e.g. "client.foo" '
'instead of "client.api.foo").'
)
warnings.warn(msg, category=DeprecationWarning)
return super()
def subscribe(
self,
subscription: str,
callback: Callable[["subscriber.message.Message"], Any],
flow_control: Union[types.FlowControl, Sequence] = (),
scheduler: Optional["subscriber.scheduler.ThreadScheduler"] = None,
use_legacy_flow_control: bool = False,
await_callbacks_on_shutdown: bool = False,
) -> futures.StreamingPullFuture:
"""Asynchronously start receiving messages on a given subscription.
This method starts a background thread to begin pulling messages from
a Pub/Sub subscription and scheduling them to be processed using the
provided ``callback``.
The ``callback`` will be called with an individual
:class:`google.cloud.pubsub_v1.subscriber.message.Message`. It is the
responsibility of the callback to either call ``ack()`` or ``nack()``
on the message when it finished processing. If an exception occurs in
the callback during processing, the exception is logged and the message
is ``nack()`` ed.
The ``flow_control`` argument can be used to control the rate of at
which messages are pulled. The settings are relatively conservative by
default to prevent "message hoarding" - a situation where the client
pulls a large number of messages but can not process them fast enough
leading it to "starve" other clients of messages. Increasing these
settings may lead to faster throughput for messages that do not take
a long time to process.
The ``use_legacy_flow_control`` argument disables enforcing flow control
settings at the Cloud Pub/Sub server, and only the client side flow control
will be enforced.
This method starts the receiver in the background and returns a
*Future* representing its execution. Waiting on the future (calling
``result()``) will block forever or until a non-recoverable error
is encountered (such as loss of network connectivity). Cancelling the
future will signal the process to shutdown gracefully and exit.
.. note:: This uses Pub/Sub's *streaming pull* feature. This feature
properties that may be surprising. Please take a look at
https://cloud.google.com/pubsub/docs/pull#streamingpull for
more details on how streaming pull behaves compared to the
synchronous pull method.
Example:
.. code-block:: python
from google.cloud import pubsub_v1
subscriber_client = pubsub_v1.SubscriberClient()
# existing subscription
subscription = subscriber_client.subscription_path(
'my-project-id', 'my-subscription')
def callback(message):
print(message)
message.ack()
future = subscriber_client.subscribe(
subscription, callback)
try:
future.result()
except KeyboardInterrupt:
future.cancel() # Trigger the shutdown.
future.result() # Block until the shutdown is complete.
Args:
subscription:
The name of the subscription. The subscription should have already been
created (for example, by using :meth:`create_subscription`).
callback:
The callback function. This function receives the message as
its only argument and will be called from a different thread/
process depending on the scheduling strategy.
flow_control:
The flow control settings. Use this to prevent situations where you are
inundated with too many messages at once.
scheduler:
An optional *scheduler* to use when executing the callback. This
controls how callbacks are executed concurrently. This object must not
be shared across multiple ``SubscriberClient`` instances.
use_legacy_flow_control (bool):
If set to ``True``, flow control at the Cloud Pub/Sub server is disabled,
though client-side flow control is still enabled. If set to ``False``
(default), both server-side and client-side flow control are enabled.
await_callbacks_on_shutdown:
If ``True``, after canceling the returned future, the latter's
``result()`` method will block until the background stream and its
helper threads have been terminated, and all currently executing message
callbacks are done processing.
If ``False`` (default), the returned future's ``result()`` method will
not block after canceling the future. The method will instead return
immediately after the background stream and its helper threads have been
terminated, but some of the message callback threads might still be
running at that point.
Returns:
A future instance that can be used to manage the background stream.
"""
flow_control = types.FlowControl(*flow_control)
manager = streaming_pull_manager.StreamingPullManager(
self,
subscription,
flow_control=flow_control,
scheduler=scheduler,
use_legacy_flow_control=use_legacy_flow_control,
await_callbacks_on_shutdown=await_callbacks_on_shutdown,
)
future = futures.StreamingPullFuture(manager)
manager.open(callback=callback, on_callback_error=future.set_exception)
return future
def close(self) -> None:
"""Close the underlying channel to release socket resources.
After a channel has been closed, the client instance cannot be used
anymore.
This method is idempotent.
"""
transport = cast("SubscriberGrpcTransport", self._transport)
transport.grpc_channel.close()
self._closed = True
def __enter__(self) -> "Client":
if self._closed:
raise RuntimeError("Closed subscriber cannot be used as context manager.")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()

View File

@@ -0,0 +1,44 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from enum import Enum
from google.api_core.exceptions import GoogleAPICallError
from typing import Optional
class AcknowledgeStatus(Enum):
SUCCESS = 1
PERMISSION_DENIED = 2
FAILED_PRECONDITION = 3
INVALID_ACK_ID = 4
OTHER = 5
class AcknowledgeError(GoogleAPICallError):
"""Error during ack/modack/nack operation on exactly-once-enabled subscription."""
def __init__(self, error_code: AcknowledgeStatus, info: Optional[str]):
self.error_code = error_code
self.info = info
message = None
if info:
message = str(self.error_code) + " : " + str(self.info)
else:
message = str(self.error_code)
super(AcknowledgeError, self).__init__(message)
__all__ = ("AcknowledgeError",)

View File

@@ -0,0 +1,125 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import typing
from typing import Any
from typing import Union
from google.cloud.pubsub_v1 import futures
from google.cloud.pubsub_v1.subscriber.exceptions import AcknowledgeStatus
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.pubsub_v1.subscriber._protocol.streaming_pull_manager import (
StreamingPullManager,
)
class StreamingPullFuture(futures.Future):
"""Represents a process that asynchronously performs streaming pull and
schedules messages to be processed.
This future is resolved when the process is stopped (via :meth:`cancel`) or
if it encounters an unrecoverable error. Calling `.result()` will cause
the calling thread to block indefinitely.
"""
def __init__(self, manager: "StreamingPullManager"):
super(StreamingPullFuture, self).__init__()
self.__manager = manager
self.__manager.add_close_callback(self._on_close_callback)
self.__cancelled = False
def _on_close_callback(self, manager: "StreamingPullManager", result: Any):
if self.done():
# The future has already been resolved in a different thread,
# nothing to do on the streaming pull manager shutdown.
return
if result is None:
self.set_result(True)
else:
self.set_exception(result)
def cancel(self) -> bool:
"""Stops pulling messages and shutdowns the background thread consuming
messages.
The method always returns ``True``, as the shutdown is always initiated.
However, if the background stream is already being shut down or the shutdown
has completed, this method is a no-op.
.. versionchanged:: 2.4.1
The method does not block anymore, it just triggers the shutdown and returns
immediately. To block until the background stream is terminated, call
:meth:`result()` after cancelling the future.
.. versionchanged:: 2.10.0
The method always returns ``True`` instead of ``None``.
"""
# NOTE: We circumvent the base future's self._state to track the cancellation
# state, as this state has different meaning with streaming pull futures.
self.__cancelled = True
self.__manager.close()
return True
def cancelled(self) -> bool:
"""
Returns:
``True`` if the subscription has been cancelled.
"""
return self.__cancelled
class Future(futures.Future):
"""This future object is for subscribe-side calls.
Calling :meth:`result` will resolve the future by returning the message
ID, unless an error occurs.
"""
def cancel(self) -> bool:
"""Actions in Pub/Sub generally may not be canceled.
This method always returns ``False``.
"""
return False
def cancelled(self) -> bool:
"""Actions in Pub/Sub generally may not be canceled.
This method always returns ``False``.
"""
return False
def result(self, timeout: Union[int, float] = None) -> AcknowledgeStatus:
"""Return a success code or raise an exception.
This blocks until the operation completes successfully and
returns the error code unless an exception is raised.
Args:
timeout: The number of seconds before this call
times out and raises TimeoutError.
Returns:
AcknowledgeStatus.SUCCESS if the operation succeeded.
Raises:
concurrent.futures.TimeoutError: If the request times out.
AcknowledgeError: If the operation did not succeed for another
reason.
"""
return super().result(timeout=timeout)

View File

@@ -0,0 +1,492 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import datetime as dt
import json
import math
import time
import typing
from typing import Optional, Callable
from google.cloud.pubsub_v1.subscriber._protocol import requests
from google.cloud.pubsub_v1.subscriber import futures
from google.cloud.pubsub_v1.subscriber.exceptions import AcknowledgeStatus
if typing.TYPE_CHECKING: # pragma: NO COVER
import datetime
import queue
from google.cloud.pubsub_v1 import types
from cloudsdk.google.protobuf.internal import containers
_MESSAGE_REPR = """\
Message {{
data: {!r}
ordering_key: {!r}
attributes: {}
}}"""
_SUCCESS_FUTURE = futures.Future()
_SUCCESS_FUTURE.set_result(AcknowledgeStatus.SUCCESS)
def _indent(lines: str, prefix: str = " ") -> str:
"""Indent some text.
Note that this is present as ``textwrap.indent``, but not in Python 2.
Args:
lines:
The newline delimited string to be indented.
prefix:
The prefix to indent each line with. Defaults to two spaces.
Returns:
The newly indented content.
"""
indented = []
for line in lines.split("\n"):
indented.append(prefix + line)
return "\n".join(indented)
class Message(object):
"""A representation of a single Pub/Sub message.
The common way to interact with
:class:`~.pubsub_v1.subscriber.message.Message` objects is to receive
them in callbacks on subscriptions; most users should never have a need
to instantiate them by hand. (The exception to this is if you are
implementing a custom subclass to
:class:`~.pubsub_v1.subscriber._consumer.Consumer`.)
Attributes:
message_id (str):
The message ID. In general, you should not need to use this directly.
data (bytes):
The data in the message. Note that this will be a :class:`bytes`,
not a text string.
attributes (MutableMapping[str, str]):
The attributes sent along with the message. See :attr:`attributes` for more
information on this type.
publish_time (google.protobuf.timestamp_pb2.Timestamp):
The time that this message was originally published.
"""
def __init__(
self,
message: "types.PubsubMessage._meta._pb", # type: ignore
ack_id: str,
delivery_attempt: int,
request_queue: "queue.Queue",
exactly_once_delivery_enabled_func: Callable[[], bool] = lambda: False,
):
"""Construct the Message.
.. note::
This class should not be constructed directly; it is the
responsibility of :class:`BasePolicy` subclasses to do so.
Args:
message (types.PubsubMessage._meta._pb):
The message received from Pub/Sub. For performance reasons it should be
the raw protobuf message normally wrapped by
:class:`~pubsub_v1.types.PubsubMessage`. A raw message can be obtained
from a :class:`~pubsub_v1.types.PubsubMessage` instance through the
latter's ``._pb`` attribute.
ack_id (str):
The ack_id received from Pub/Sub.
delivery_attempt (int):
The delivery attempt counter received from Pub/Sub if a DeadLetterPolicy
is set on the subscription, and zero otherwise.
request_queue (queue.Queue):
A queue provided by the policy that can accept requests; the policy is
responsible for handling those requests.
exactly_once_delivery_enabled_func (Callable[[], bool]):
A Callable that returns whether exactly-once delivery is currently-enabled. Defaults to a lambda that always returns False.
"""
self._message = message
self._ack_id = ack_id
self._delivery_attempt = delivery_attempt if delivery_attempt > 0 else None
self._request_queue = request_queue
self._exactly_once_delivery_enabled_func = exactly_once_delivery_enabled_func
self.message_id = message.message_id
# The instantiation time is the time that this message
# was received. Tracking this provides us a way to be smart about
# the default lease deadline.
self._received_timestamp = time.time()
# Store the message attributes directly to speed up attribute access, i.e.
# to avoid two lookups if self._message.<attribute> pattern was used in
# properties.
self._attributes = message.attributes
self._data = message.data
self._publish_time = dt.datetime.fromtimestamp(
message.publish_time.seconds + message.publish_time.nanos / 1e9,
tz=dt.timezone.utc,
)
self._ordering_key = message.ordering_key
self._size = message.ByteSize()
def __repr__(self):
# Get an abbreviated version of the data.
abbv_data = self._message.data
if len(abbv_data) > 50:
abbv_data = abbv_data[:50] + b"..."
pretty_attrs = json.dumps(
dict(self.attributes), indent=2, separators=(",", ": "), sort_keys=True
)
pretty_attrs = _indent(pretty_attrs)
# We don't actually want the first line indented.
pretty_attrs = pretty_attrs.lstrip()
return _MESSAGE_REPR.format(abbv_data, str(self.ordering_key), pretty_attrs)
@property
def attributes(self) -> "containers.ScalarMap":
"""Return the attributes of the underlying Pub/Sub Message.
.. warning::
A ``ScalarMap`` behaves slightly differently than a
``dict``. For a Pub / Sub message this is a ``string->string`` map.
When trying to access a value via ``map['key']``, if the key is
not in the map, then the default value for the string type will
be returned, which is an empty string. It may be more intuitive
to just cast the map to a ``dict`` or to one use ``map.get``.
Returns:
containers.ScalarMap: The message's attributes. This is a
``dict``-like object provided by ``google.protobuf``.
"""
return self._attributes
@property
def data(self) -> bytes:
"""Return the data for the underlying Pub/Sub Message.
Returns:
bytes: The message data. This is always a bytestring; if you want
a text string, call :meth:`bytes.decode`.
"""
return self._data
@property
def publish_time(self) -> "datetime.datetime":
"""Return the time that the message was originally published.
Returns:
datetime.datetime: The date and time that the message was
published.
"""
return self._publish_time
@property
def ordering_key(self) -> str:
"""The ordering key used to publish the message."""
return self._ordering_key
@property
def size(self) -> int:
"""Return the size of the underlying message, in bytes."""
return self._size
@property
def ack_id(self) -> str:
"""the ID used to ack the message."""
return self._ack_id
@property
def delivery_attempt(self) -> Optional[int]:
"""The delivery attempt counter is 1 + (the sum of number of NACKs
and number of ack_deadline exceeds) for this message. It is set to None
if a DeadLetterPolicy is not set on the subscription.
A NACK is any call to ModifyAckDeadline with a 0 deadline. An ack_deadline
exceeds event is whenever a message is not acknowledged within
ack_deadline. Note that ack_deadline is initially
Subscription.ackDeadlineSeconds, but may get extended automatically by
the client library.
The first delivery of a given message will have this value as 1. The value
is calculated at best effort and is approximate.
Returns:
Optional[int]: The delivery attempt counter or ``None``.
"""
return self._delivery_attempt
def ack(self) -> None:
"""Acknowledge the given message.
Acknowledging a message in Pub/Sub means that you are done
with it, and it will not be delivered to this subscription again.
You should avoid acknowledging messages until you have
*finished* processing them, so that in the event of a failure,
you receive the message again.
.. warning::
Acks in Pub/Sub are best effort. You should always
ensure that your processing code is idempotent, as you may
receive any given message more than once. If you need strong
guarantees about acks and re-deliveres, enable exactly-once
delivery on your subscription and use the `ack_with_response`
method instead. Exactly once delivery is a preview feature.
For more details, see:
https://cloud.google.com/pubsub/docs/exactly-once-delivery."
"""
time_to_ack = math.ceil(time.time() - self._received_timestamp)
self._request_queue.put(
requests.AckRequest(
ack_id=self._ack_id,
byte_size=self.size,
time_to_ack=time_to_ack,
ordering_key=self.ordering_key,
future=None,
)
)
def ack_with_response(self) -> "futures.Future":
"""Acknowledge the given message.
Acknowledging a message in Pub/Sub means that you are done
with it, and it will not be delivered to this subscription again.
You should avoid acknowledging messages until you have
*finished* processing them, so that in the event of a failure,
you receive the message again.
If exactly-once delivery is NOT enabled on the subscription, the
future returns immediately with an AcknowledgeStatus.SUCCESS.
Since acks in Cloud Pub/Sub are best effort when exactly-once
delivery is disabled, the message may be re-delivered. Because
re-deliveries are possible, you should ensure that your processing
code is idempotent, as you may receive any given message more than
once.
If exactly-once delivery is enabled on the subscription, the
future returned by this method tracks the state of acknowledgement
operation. If the future completes successfully, the message is
guaranteed NOT to be re-delivered. Otherwise, the future will
contain an exception with more details about the failure and the
message may be re-delivered.
Exactly once delivery is a preview feature. For more details,
see https://cloud.google.com/pubsub/docs/exactly-once-delivery."
Returns:
futures.Future: A
:class:`~google.cloud.pubsub_v1.subscriber.futures.Future`
instance that conforms to Python Standard library's
:class:`~concurrent.futures.Future` interface (but not an
instance of that class). Call `result()` to get the result
of the operation; upon success, a
pubsub_v1.subscriber.exceptions.AcknowledgeStatus.SUCCESS
will be returned and upon an error, an
pubsub_v1.subscriber.exceptions.AcknowledgeError exception
will be thrown.
"""
req_future: Optional[futures.Future]
if self._exactly_once_delivery_enabled_func():
future = futures.Future()
req_future = future
else:
future = _SUCCESS_FUTURE
req_future = None
time_to_ack = math.ceil(time.time() - self._received_timestamp)
self._request_queue.put(
requests.AckRequest(
ack_id=self._ack_id,
byte_size=self.size,
time_to_ack=time_to_ack,
ordering_key=self.ordering_key,
future=req_future,
)
)
return future
def drop(self) -> None:
"""Release the message from lease management.
This informs the policy to no longer hold on to the lease for this
message. Pub/Sub will re-deliver the message if it is not acknowledged
before the existing lease expires.
.. warning::
For most use cases, the only reason to drop a message from
lease management is on `ack` or `nack`; this library
automatically drop()s the message on `ack` or `nack`. You probably
do not want to call this method directly.
"""
self._request_queue.put(
requests.DropRequest(
ack_id=self._ack_id, byte_size=self.size, ordering_key=self.ordering_key
)
)
def modify_ack_deadline(self, seconds: int) -> None:
"""Resets the deadline for acknowledgement.
New deadline will be the given value of seconds from now.
The default implementation handles automatically modacking received messages for you;
you should not need to manually deal with setting ack deadlines. The exception case is
if you are implementing your own custom subclass of
:class:`~.pubsub_v1.subcriber._consumer.Consumer`.
Args:
seconds (int):
The number of seconds to set the lease deadline to. This should be
between 0 and 600. Due to network latency, values below 10 are advised
against.
"""
self._request_queue.put(
requests.ModAckRequest(ack_id=self._ack_id, seconds=seconds, future=None)
)
def modify_ack_deadline_with_response(self, seconds: int) -> "futures.Future":
"""Resets the deadline for acknowledgement and returns the response
status via a future.
New deadline will be the given value of seconds from now.
The default implementation handles automatically modacking received messages for you;
you should not need to manually deal with setting ack deadlines. The exception case is
if you are implementing your own custom subclass of
:class:`~.pubsub_v1.subcriber._consumer.Consumer`.
If exactly-once delivery is NOT enabled on the subscription, the
future returns immediately with an AcknowledgeStatus.SUCCESS.
Since modify-ack-deadline operations in Cloud Pub/Sub are best effort
when exactly-once delivery is disabled, the message may be re-delivered
within the set deadline.
If exactly-once delivery is enabled on the subscription, the
future returned by this method tracks the state of the
modify-ack-deadline operation. If the future completes successfully,
the message is guaranteed NOT to be re-delivered within the new deadline.
Otherwise, the future will contain an exception with more details about
the failure and the message will be redelivered according to its
currently-set ack deadline.
Exactly once delivery is a preview feature. For more details,
see https://cloud.google.com/pubsub/docs/exactly-once-delivery."
Args:
seconds (int):
The number of seconds to set the lease deadline to. This should be
between 0 and 600. Due to network latency, values below 10 are advised
against.
Returns:
futures.Future: A
:class:`~google.cloud.pubsub_v1.subscriber.futures.Future`
instance that conforms to Python Standard library's
:class:`~concurrent.futures.Future` interface (but not an
instance of that class). Call `result()` to get the result
of the operation; upon success, a
pubsub_v1.subscriber.exceptions.AcknowledgeStatus.SUCCESS
will be returned and upon an error, an
pubsub_v1.subscriber.exceptions.AcknowledgeError exception
will be thrown.
"""
req_future: Optional[futures.Future]
if self._exactly_once_delivery_enabled_func():
future = futures.Future()
req_future = future
else:
future = _SUCCESS_FUTURE
req_future = None
self._request_queue.put(
requests.ModAckRequest(
ack_id=self._ack_id, seconds=seconds, future=req_future
)
)
return future
def nack(self) -> None:
"""Decline to acknowledge the given message.
This will cause the message to be re-delivered to subscribers. Re-deliveries
may take place immediately or after a delay, and may arrive at this subscriber
or another.
"""
self._request_queue.put(
requests.NackRequest(
ack_id=self._ack_id,
byte_size=self.size,
ordering_key=self.ordering_key,
future=None,
)
)
def nack_with_response(self) -> "futures.Future":
"""Decline to acknowledge the given message, returning the response status via
a future.
This will cause the message to be re-delivered to subscribers. Re-deliveries
may take place immediately or after a delay, and may arrive at this subscriber
or another.
If exactly-once delivery is NOT enabled on the subscription, the
future returns immediately with an AcknowledgeStatus.SUCCESS.
If exactly-once delivery is enabled on the subscription, the
future returned by this method tracks the state of the
nack operation. If the future completes successfully,
the future's result will be an AcknowledgeStatus.SUCCESS.
Otherwise, the future will contain an exception with more details about
the failure.
Exactly once delivery is a preview feature. For more details,
see https://cloud.google.com/pubsub/docs/exactly-once-delivery."
Returns:
futures.Future: A
:class:`~google.cloud.pubsub_v1.subscriber.futures.Future`
instance that conforms to Python Standard library's
:class:`~concurrent.futures.Future` interface (but not an
instance of that class). Call `result()` to get the result
of the operation; upon success, a
pubsub_v1.subscriber.exceptions.AcknowledgeStatus.SUCCESS
will be returned and upon an error, an
pubsub_v1.subscriber.exceptions.AcknowledgeError exception
will be thrown.
"""
req_future: Optional[futures.Future]
if self._exactly_once_delivery_enabled_func():
future = futures.Future()
req_future = future
else:
future = _SUCCESS_FUTURE
req_future = None
self._request_queue.put(
requests.NackRequest(
ack_id=self._ack_id,
byte_size=self.size,
ordering_key=self.ordering_key,
future=req_future,
)
)
return future

View File

@@ -0,0 +1,170 @@
# Copyright 2018, Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Schedulers provide means to *schedule* callbacks asynchronously.
These are used by the subscriber to call the user-provided callback to process
each message.
"""
import abc
import concurrent.futures
import queue
import typing
from typing import Callable, List, Optional
import warnings
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud import pubsub_v1
class Scheduler(metaclass=abc.ABCMeta):
"""Abstract base class for schedulers.
Schedulers are used to schedule callbacks asynchronously.
"""
@property
@abc.abstractmethod
def queue(self) -> queue.Queue: # pragma: NO COVER
"""Queue: A concurrency-safe queue specific to the underlying
concurrency implementation.
This queue is used to send messages *back* to the scheduling actor.
"""
raise NotImplementedError
@abc.abstractmethod
def schedule(self, callback: Callable, *args, **kwargs) -> None: # pragma: NO COVER
"""Schedule the callback to be called asynchronously.
Args:
callback: The function to call.
args: Positional arguments passed to the callback.
kwargs: Key-word arguments passed to the callback.
Returns:
None
"""
raise NotImplementedError
@abc.abstractmethod
def shutdown(
self, await_msg_callbacks: bool = False
) -> List["pubsub_v1.subscriber.message.Message"]: # pragma: NO COVER
"""Shuts down the scheduler and immediately end all pending callbacks.
Args:
await_msg_callbacks:
If ``True``, the method will block until all currently executing
callbacks are done processing. If ``False`` (default), the
method will not wait for the currently running callbacks to complete.
Returns:
The messages submitted to the scheduler that were not yet dispatched
to their callbacks.
It is assumed that each message was submitted to the scheduler as the
first positional argument to the provided callback.
"""
raise NotImplementedError
def _make_default_thread_pool_executor() -> concurrent.futures.ThreadPoolExecutor:
return concurrent.futures.ThreadPoolExecutor(
max_workers=10, thread_name_prefix="ThreadPoolExecutor-ThreadScheduler"
)
class ThreadScheduler(Scheduler):
"""A thread pool-based scheduler. It must not be shared across
SubscriberClients.
This scheduler is useful in typical I/O-bound message processing.
Args:
executor:
An optional executor to use. If not specified, a default one
will be created.
"""
def __init__(
self, executor: Optional[concurrent.futures.ThreadPoolExecutor] = None
):
self._queue: queue.Queue = queue.Queue()
if executor is None:
self._executor = _make_default_thread_pool_executor()
else:
self._executor = executor
@property
def queue(self):
"""Queue: A thread-safe queue used for communication between callbacks
and the scheduling thread."""
return self._queue
def schedule(self, callback: Callable, *args, **kwargs) -> None:
"""Schedule the callback to be called asynchronously in a thread pool.
Args:
callback: The function to call.
args: Positional arguments passed to the callback.
kwargs: Key-word arguments passed to the callback.
Returns:
None
"""
try:
self._executor.submit(callback, *args, **kwargs)
except RuntimeError:
warnings.warn(
"Scheduling a callback after executor shutdown.",
category=RuntimeWarning,
stacklevel=2,
)
def shutdown(
self, await_msg_callbacks: bool = False
) -> List["pubsub_v1.subscriber.message.Message"]:
"""Shut down the scheduler and immediately end all pending callbacks.
Args:
await_msg_callbacks:
If ``True``, the method will block until all currently executing
executor threads are done processing. If ``False`` (default), the
method will not wait for the currently running threads to complete.
Returns:
The messages submitted to the scheduler that were not yet dispatched
to their callbacks.
It is assumed that each message was submitted to the scheduler as the
first positional argument to the provided callback.
"""
dropped_messages = []
# Drop all pending item from the executor. Without this, the executor will also
# try to process any pending work items before termination, which is undesirable.
#
# TODO: Replace the logic below by passing `cancel_futures=True` to shutdown()
# once we only need to support Python 3.9+.
try:
while True:
work_item = self._executor._work_queue.get(block=False)
if work_item is None: # Exceutor in shutdown mode.
continue
dropped_messages.append(work_item.args[0])
except queue.Empty:
pass
self._executor.shutdown(wait=await_msg_callbacks)
return dropped_messages

View File

@@ -0,0 +1,299 @@
# Copyright 2017, Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import collections
import enum
import inspect
import sys
import typing
from typing import Dict, NamedTuple, Union
import proto # type: ignore
from google.api import http_pb2 # type: ignore
from google.api_core import gapic_v1
from google.iam.v1 import iam_policy_pb2 # type: ignore
from google.iam.v1 import policy_pb2
from google.iam.v1.logging import audit_data_pb2 # type: ignore
from cloudsdk.google.protobuf import descriptor_pb2
from cloudsdk.google.protobuf import duration_pb2
from cloudsdk.google.protobuf import empty_pb2
from cloudsdk.google.protobuf import field_mask_pb2
from cloudsdk.google.protobuf import timestamp_pb2
from google.api_core.protobuf_helpers import get_messages
from google.pubsub_v1.types import pubsub as pubsub_gapic_types
if typing.TYPE_CHECKING: # pragma: NO COVER
from types import ModuleType
from google.pubsub_v1 import types as gapic_types
from google.pubsub_v1.services.publisher.client import OptionalRetry
# TODO: Eventually implement OptionalTimeout in the GAPIC code generator and import
# it from the generated code. It's the same solution that is used for OptionalRetry.
# https://github.com/googleapis/gapic-generator-python/pull/1032/files
# https://github.com/googleapis/gapic-generator-python/pull/1065/files
if hasattr(gapic_v1.method, "_MethodDefault"):
# _MethodDefault was only added in google-api-core==2.2.2
OptionalTimeout = Union[gapic_types.TimeoutType, gapic_v1.method._MethodDefault]
else:
OptionalTimeout = Union[gapic_types.TimeoutType, object] # type: ignore
# Define the default values for batching.
#
# This class is used when creating a publisher or subscriber client, and
# these settings can be altered to tweak Pub/Sub behavior.
# The defaults should be fine for most use cases.
class BatchSettings(NamedTuple):
"""The settings for batch publishing the messages.
Attributes:
max_bytes (int):
The maximum total size of the messages to collect before automatically
publishing the batch, including any byte size overhead of the publish
request itself. The maximum value is bound by the server-side limit of
10_000_000 bytes. Defaults to 1 MB.
max_latency (float):
The maximum number of seconds to wait for additional messages before
automatically publishing the batch. Defaults to 10ms.
max_messages (int):
The maximum number of messages to collect before automatically
publishing the batch. Defaults to 100.
"""
max_bytes: int = 1 * 1000 * 1000 # 1 MB
(
"The maximum total size of the messages to collect before automatically "
"publishing the batch, including any byte size overhead of the publish "
"request itself. The maximum value is bound by the server-side limit of "
"10_000_000 bytes."
)
max_latency: float = 0.01 # 10 ms
(
"The maximum number of seconds to wait for additional messages before "
"automatically publishing the batch."
)
max_messages: int = 100
(
"The maximum number of messages to collect before automatically "
"publishing the batch."
)
class LimitExceededBehavior(str, enum.Enum):
"""The possible actions when exceeding the publish flow control limits."""
IGNORE = "ignore"
BLOCK = "block"
ERROR = "error"
class PublishFlowControl(NamedTuple):
"""The client flow control settings for message publishing.
Attributes:
message_limit (int):
The maximum number of messages awaiting to be published.
Defaults to 1000.
byte_limit (int):
The maximum total size of messages awaiting to be published.
Defaults to 10MB.
limit_exceeded_behavior (LimitExceededBehavior):
The action to take when publish flow control limits are exceeded.
Defaults to LimitExceededBehavior.IGNORE.
"""
message_limit: int = 10 * BatchSettings.__new__.__defaults__[2] # type: ignore
"""The maximum number of messages awaiting to be published."""
byte_limit: int = 10 * BatchSettings.__new__.__defaults__[0] # type: ignore
"""The maximum total size of messages awaiting to be published."""
limit_exceeded_behavior: LimitExceededBehavior = LimitExceededBehavior.IGNORE
"""The action to take when publish flow control limits are exceeded."""
# Define the default publisher options.
#
# This class is used when creating a publisher client to pass in options
# to enable/disable features.
class PublisherOptions(NamedTuple):
"""The options for the publisher client.
Attributes:
enable_message_ordering (bool):
Whether to order messages in a batch by a supplied ordering key.
Defaults to false.
flow_control (PublishFlowControl):
Flow control settings for message publishing by the client. By default
the publisher client does not do any throttling.
retry (OptionalRetry):
Retry settings for message publishing by the client. This should be
an instance of :class:`google.api_core.retry.Retry`.
timeout (OptionalTimeout):
Timeout settings for message publishing by the client. It should be
compatible with :class:`~.pubsub_v1.types.TimeoutType`.
"""
enable_message_ordering: bool = False
"""Whether to order messages in a batch by a supplied ordering key."""
flow_control: PublishFlowControl = PublishFlowControl()
(
"Flow control settings for message publishing by the client. By default "
"the publisher client does not do any throttling."
)
retry: "OptionalRetry" = gapic_v1.method.DEFAULT # use api_core default
(
"Retry settings for message publishing by the client. This should be "
"an instance of :class:`google.api_core.retry.Retry`."
)
timeout: "OptionalTimeout" = gapic_v1.method.DEFAULT # use api_core default
(
"Timeout settings for message publishing by the client. It should be "
"compatible with :class:`~.pubsub_v1.types.TimeoutType`."
)
# Define the type class and default values for flow control settings.
#
# This class is used when creating a publisher or subscriber client, and
# these settings can be altered to tweak Pub/Sub behavior.
# The defaults should be fine for most use cases.
class FlowControl(NamedTuple):
"""The settings for controlling the rate at which messages are pulled
with an asynchronous subscription.
Attributes:
max_bytes (int):
The maximum total size of received - but not yet processed - messages
before pausing the message stream. Defaults to 100 MiB.
max_messages (int):
The maximum number of received - but not yet processed - messages before
pausing the message stream. Defaults to 1000.
max_lease_duration (float):
The maximum amount of time in seconds to hold a lease on a message
before dropping it from the lease management. Defaults to 1 hour.
min_duration_per_lease_extension (float):
The min amount of time in seconds for a single lease extension attempt.
Must be between 10 and 600 (inclusive). Ignored by default, but set to
60 seconds if the subscription has exactly-once delivery enabled.
max_duration_per_lease_extension (float):
The max amount of time in seconds for a single lease extension attempt.
Bounds the delay before a message redelivery if the subscriber
fails to extend the deadline. Must be between 10 and 600 (inclusive). Ignored
if set to 0.
"""
max_bytes: int = 100 * 1024 * 1024 # 100 MiB
(
"The maximum total size of received - but not yet processed - messages "
"before pausing the message stream."
)
max_messages: int = 1000
(
"The maximum number of received - but not yet processed - messages before "
"pausing the message stream."
)
max_lease_duration: float = 1 * 60 * 60 # 1 hour
(
"The maximum amount of time in seconds to hold a lease on a message "
"before dropping it from the lease management."
)
min_duration_per_lease_extension: float = 0
(
"The min amount of time in seconds for a single lease extension attempt. "
"Must be between 10 and 600 (inclusive). Ignored by default, but set to "
"60 seconds if the subscription has exactly-once delivery enabled."
)
max_duration_per_lease_extension: float = 0 # disabled by default
(
"The max amount of time in seconds for a single lease extension attempt. "
"Bounds the delay before a message redelivery if the subscriber "
"fails to extend the deadline. Must be between 10 and 600 (inclusive). Ignored "
"if set to 0."
)
# The current api core helper does not find new proto messages of type proto.Message,
# thus we need our own helper. Adjusted from
# https://github.com/googleapis/python-api-core/blob/8595f620e7d8295b6a379d6fd7979af3bef717e2/google/api_core/protobuf_helpers.py#L101-L118
def _get_protobuf_messages(module: "ModuleType") -> Dict[str, proto.Message]:
"""Discover all protobuf Message classes in a given import module.
Args:
module (module): A Python module; :func:`dir` will be run against this
module to find Message subclasses.
Returns:
dict[str, proto.Message]: A dictionary with the
Message class names as keys, and the Message subclasses themselves
as values.
"""
answer = collections.OrderedDict()
for name in dir(module):
candidate = getattr(module, name)
if inspect.isclass(candidate) and issubclass(candidate, proto.Message):
answer[name] = candidate
return answer
_shared_modules = [
http_pb2,
iam_policy_pb2,
policy_pb2,
audit_data_pb2,
descriptor_pb2,
duration_pb2,
empty_pb2,
field_mask_pb2,
timestamp_pb2,
]
_local_modules = [pubsub_gapic_types]
names = [
"BatchSettings",
"LimitExceededBehavior",
"PublishFlowControl",
"PublisherOptions",
"FlowControl",
]
for module in _shared_modules:
for name, message in get_messages(module).items():
setattr(sys.modules[__name__], name, message)
names.append(name)
for module in _local_modules:
for name, message in _get_protobuf_messages(module).items():
message.__module__ = "google.cloud.pubsub_v1.types"
setattr(sys.modules[__name__], name, message)
names.append(name)
__all__ = tuple(sorted(names))