如何在使用 Boto3 删除 SQS 消息时防止连接超时

How to prevent connection timeouts while deleting SQS messages with Boto3

我有一系列由 SQS 队列事件触发器提供的 AWS Lambda。但是,有时当我尝试从队列中删除消息时,尝试会一次又一次地超时,直到我的 Lambda 超时发生。

我启用了调试日志记录,确认这是一个套接字超时,但除此之外我没有得到任何进一步的详细信息。这似乎也是不规则的。起初,我认为这是一个 Lambda 预热问题,但我在 运行 lambda 多次成功并在第一次部署后看到了这个问题。

到目前为止我尝试过的:

一些其他细节:

我正在使用的代码片段:

config = Config(connect_timeout=30, read_timeout=30, retries={'total_max_attempts': 1}, region_name='us-east-1')
sqs_client = boto3.client(service_name='sqs', config=config)
receiptHandle = event['Records'][0]['receiptHandle']\
fromQueueName = eventSourceARN.split(':')[-1]
fromQueue = sqs_client.get_queue_url(QueueName=fromQueueName)
fromQueueUrl = sqs_client.get_queue_url(QueueName=fromQueueName)['QueueUrl']
messageDelete = sqs_client.delete_message(QueueUrl=fromQueueUrl, ReceiptHandle=receiptHandle)

以及我看到的 DEBUG 异常示例:

[DEBUG] 2020-10-29T21:27:28.32Z 3c60cac9-6d99-58c6-84c9-92dc581919fd retry needed, retryable exception caught: Connect timeout on endpoint URL: "https://queue.amazonaws.com/" Traceback (most recent call last): File "/var/task/urllib3/connection.py", line 159, in _new_conn conn = connection.create_connection( File "/var/task/urllib3/util/connection.py", line 84, in create_connection raise err File "/var/task/urllib3/util/connection.py", line 74, in create_connection sock.connect(sa) socket.timeout: timed out During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/opt/python/botocore/httpsession.py", line 254, in send urllib_response = conn.urlopen( File "/var/task/urllib3/connectionpool.py", line 726, in urlopen retries = retries.increment( File "/var/task/urllib3/util/retry.py", line 386, in increment raise six.reraise(type(error), error, _stacktrace) File "/var/task/urllib3/packages/six.py", line 735, in reraise raise value File "/var/task/urllib3/connectionpool.py", line 670, in urlopen httplib_response = self._make_request( File "/var/task/urllib3/connectionpool.py", line 381, in _make_request self._validate_conn(conn) File "/var/task/urllib3/connectionpool.py", line 978, in _validate_conn conn.connect() File "/var/task/urllib3/connection.py", line 309, in connect conn = self._new_conn() File "/var/task/urllib3/connection.py", line 164, in _new_conn raise ConnectTimeoutError( urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPSConnection object at 0x7f27b56b7460>, 'Connection to queue.amazonaws.com timed out. (connect timeout=15)') During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/opt/python/utils.py", line 79, in preflight_check fromQueue = sqs_client.get_queue_url(QueueName=fromQueueName) File "/opt/python/botocore/client.py", line 357, in _api_call return self._make_api_call(operation_name, kwargs) File "/opt/python/botocore/client.py", line 662, in _make_api_call http, parsed_response = self._make_request( File "/opt/python/botocore/client.py", line 682, in _make_request return self._endpoint.make_request(operation_model, request_dict) File "/opt/python/botocore/endpoint.py", line 102, in make_request return self._send_request(request_dict, operation_model) File "/opt/python/botocore/endpoint.py", line 136, in _send_request while self._needs_retry(attempts, operation_model, request_dict, File "/opt/python/botocore/endpoint.py", line 253, in _needs_retry responses = self._event_emitter.emit( File "/opt/python/botocore/hooks.py", line 356, in emit return self._emitter.emit(aliased_event_name, **kwargs) File "/opt/python/botocore/hooks.py", line 228, in emit return self._emit(event_name, kwargs) File "/opt/python/botocore/hooks.py", line 211, in _emit response = handler(**kwargs) File "/opt/python/botocore/retryhandler.py", line 183, in __call__ if self._checker(attempts, response, caught_exception): File "/opt/python/botocore/retryhandler.py", line 250, in __call__ should_retry = self._should_retry(attempt_number, response, File "/opt/python/botocore/retryhandler.py", line 277, in _should_retry return self._checker(attempt_number, response, caught_exception) File "/opt/python/botocore/retryhandler.py", line 316, in __call__ checker_response = checker(attempt_number, response, File "/opt/python/botocore/retryhandler.py", line 222, in __call__ return self._check_caught_exception( File "/opt/python/botocore/retryhandler.py", line 359, in _check_caught_exception raise caught_exception File "/opt/python/botocore/endpoint.py", line 200, in _do_get_response http_response = self._send(request) File "/opt/python/botocore/endpoint.py", line 269, in _send return self.http_session.send(request) File "/opt/python/botocore/httpsession.py", line 287, in send raise ConnectTimeoutError(endpoint_url=request.url, error=e) botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "https://queue.amazonaws.com/" During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/opt/python/botocore/retryhandler.py", line 269, in _should_retry return self._checker(attempt_number, response, caught_exception) File "/opt/python/botocore/retryhandler.py", line 316, in __call__ checker_response = checker(attempt_number, response, File "/opt/python/botocore/retryhandler.py", line 222, in __call__ return self._check_caught_exception( File "/opt/python/botocore/retryhandler.py", line 359, in _check_caught_exception raise caught_exception File "/opt/python/botocore/endpoint.py", line 200, in _do_get_response http_response = self._send(request) File "/opt/python/botocore/endpoint.py", line 269, in _send return self.http_session.send(request) File "/opt/python/botocore/httpsession.py", line 287, in send raise ConnectTimeoutError(endpoint_url=request.url, error=e) botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "https://queue.amazonaws.com/"

根据评论。

SQS 超时是由于 lambda 函数与 VPC 相关联,而 VPC 没有 SQS VPC interface endpoint。没有端点或NAT网关,无法连接SQS。

解决方案 是为 SQS 服务添加 VPC 接口端点。