将文件从 /tmp 文件夹移动到 Google Cloud Storage 存储桶
Move file from /tmp folder to Google Cloud Storage bucket
我最初发布 this question 时遇到了 python 云函数创建和写入新文件的问题。从那时起,我设法在 /tmp
目录中创建了一个 csv,但我正在努力寻找一种方法将该文件移动到我上传原始 csv 的存储桶文件夹中。
这可以吗?我查看了 Google Cloud Storage 文档并尝试使用 blob.download_to_filename()
和 bucket.copy_blob()
方法,但目前收到错误:FileNotFoundError: [Errno 2] No such file or directory: 'my-project.appspot.com/my-folder/my-converted-file.csv'
感谢任何帮助或建议!
to move that file into my bucket
这是一个例子。请记住:
- 不要随便复制粘贴。
- 该代码片段仅用于展示想法 - 它不会按原样运行。需要进行修改以适应您的上下文和要求。
_crc32sum
功能不是我开发的
- 我没有测试代码。这只是我从不同 public 来源复制一些元素的想法。
代码如下:
import base64
import crc32c
import os
from google.cloud import exceptions
from google.cloud import storage
# =====> ==============================
# a function to calculate crc32c hash
def _crc32sum(filename: str, blocksize: int = 65536) -> int:
"""Calculate the crc32c hash for a file with the provided name
:param filename: the name of the file
:param blocksize: the size of the block for the file reading
:return: the calculated crc32c hash for the given file
"""
checksum = 0
with open(filename, "rb") as f_ref:
for block in iter(lambda: f_ref.read(blocksize), b""):
checksum = crc32c.crc32(block, checksum)
return checksum & 0xffffffff
# =====> ==============================
# use the default project in the client initialisation
CS = storage.Client()
lcl_file_name = "/tmp/my-local-file.csv"
tgt_bucket_name = "my-bucket-name"
tgt_object_name = "prefix/another-prefix/my-target-file.csv"
# =====> ==============================
# =====> ==============================
# =====> the process strats here
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/client.html#Client.lookup_bucket
gcs_tgt_bucket_ref = CS.lookup_bucket(tgt_bucket_name)
# check if the target bucket does exist
if gcs_tgt_bucket_ref is None:
# handle incorrect bucket name or its absence
# most likely we are to finish the execution here rather than 'pass'
pass
# calculate the hash for the local file
lcl_crc32c = _crc32sum(lcl_file_name)
base64_crc32c = base64.b64encode(lcl_crc32c.to_bytes(
length=4, byteorder='big')).decode('utf-8')
# check if the file/object in the bucket already exists
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/bucket.html#Bucket.blob
gcs_file_ref = gcs_tgt_bucket_ref.blob(tgt_object_name)
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/blob.html#Blob.exists
if gcs_file_ref.exists():
gcs_file_ref.reload()
# compare crc32c hashes - between the local file and the gcs file/object
if base64_crc32c != gcs_file_ref.crc32c:
# the blob file/object in the GCS has a different hash
# the blob file/object should be deleted and a new one to be uploaded
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/blob.html#Blob.delete
gcs_file_ref.delete()
else:
# the file/object is already in the bucket
# most likely we are to finish the execution here rather than 'pass'
pass
# upload file to the target bucket
# reinit the reference in case the target file/object was deleted
gcs_file_ref = gcs_tgt_bucket_ref.blob(tgt_file_name)
gcs_file_ref.crc32c = base64_crc32c
with open(lcl_file_name, 'rb') as file_obj:
try:
gcs_file_ref.metadata = {
"custom-metadata-key": "custom-metadata-value"
}
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/blob.html#Blob.upload_from_file
gcs_file_ref.upload_from_file(
file_obj=file_obj, content_type="text/csv", checksum="crc32c")
except exceptions.GoogleCloudError as gc_err:
# handle the exception here
# don't forget to delete the local file if it is not required anymore
# most likely we are to finish the execution here rather than 'pass'
pass
# clean behind
if lcl_file_name and os.path.exists(lcl_file_name):
os.remove(lcl_file_name)
# =====> the process ends here
# =====> ==============================
如果有重大错误请告诉我,我会修改示例。
我最初发布 this question 时遇到了 python 云函数创建和写入新文件的问题。从那时起,我设法在 /tmp
目录中创建了一个 csv,但我正在努力寻找一种方法将该文件移动到我上传原始 csv 的存储桶文件夹中。
这可以吗?我查看了 Google Cloud Storage 文档并尝试使用 blob.download_to_filename()
和 bucket.copy_blob()
方法,但目前收到错误:FileNotFoundError: [Errno 2] No such file or directory: 'my-project.appspot.com/my-folder/my-converted-file.csv'
感谢任何帮助或建议!
to move that file into my bucket
这是一个例子。请记住:
- 不要随便复制粘贴。
- 该代码片段仅用于展示想法 - 它不会按原样运行。需要进行修改以适应您的上下文和要求。
_crc32sum
功能不是我开发的- 我没有测试代码。这只是我从不同 public 来源复制一些元素的想法。
代码如下:
import base64
import crc32c
import os
from google.cloud import exceptions
from google.cloud import storage
# =====> ==============================
# a function to calculate crc32c hash
def _crc32sum(filename: str, blocksize: int = 65536) -> int:
"""Calculate the crc32c hash for a file with the provided name
:param filename: the name of the file
:param blocksize: the size of the block for the file reading
:return: the calculated crc32c hash for the given file
"""
checksum = 0
with open(filename, "rb") as f_ref:
for block in iter(lambda: f_ref.read(blocksize), b""):
checksum = crc32c.crc32(block, checksum)
return checksum & 0xffffffff
# =====> ==============================
# use the default project in the client initialisation
CS = storage.Client()
lcl_file_name = "/tmp/my-local-file.csv"
tgt_bucket_name = "my-bucket-name"
tgt_object_name = "prefix/another-prefix/my-target-file.csv"
# =====> ==============================
# =====> ==============================
# =====> the process strats here
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/client.html#Client.lookup_bucket
gcs_tgt_bucket_ref = CS.lookup_bucket(tgt_bucket_name)
# check if the target bucket does exist
if gcs_tgt_bucket_ref is None:
# handle incorrect bucket name or its absence
# most likely we are to finish the execution here rather than 'pass'
pass
# calculate the hash for the local file
lcl_crc32c = _crc32sum(lcl_file_name)
base64_crc32c = base64.b64encode(lcl_crc32c.to_bytes(
length=4, byteorder='big')).decode('utf-8')
# check if the file/object in the bucket already exists
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/bucket.html#Bucket.blob
gcs_file_ref = gcs_tgt_bucket_ref.blob(tgt_object_name)
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/blob.html#Blob.exists
if gcs_file_ref.exists():
gcs_file_ref.reload()
# compare crc32c hashes - between the local file and the gcs file/object
if base64_crc32c != gcs_file_ref.crc32c:
# the blob file/object in the GCS has a different hash
# the blob file/object should be deleted and a new one to be uploaded
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/blob.html#Blob.delete
gcs_file_ref.delete()
else:
# the file/object is already in the bucket
# most likely we are to finish the execution here rather than 'pass'
pass
# upload file to the target bucket
# reinit the reference in case the target file/object was deleted
gcs_file_ref = gcs_tgt_bucket_ref.blob(tgt_file_name)
gcs_file_ref.crc32c = base64_crc32c
with open(lcl_file_name, 'rb') as file_obj:
try:
gcs_file_ref.metadata = {
"custom-metadata-key": "custom-metadata-value"
}
# https://googleapis.dev/python/storage/latest/_modules/google/cloud/storage/blob.html#Blob.upload_from_file
gcs_file_ref.upload_from_file(
file_obj=file_obj, content_type="text/csv", checksum="crc32c")
except exceptions.GoogleCloudError as gc_err:
# handle the exception here
# don't forget to delete the local file if it is not required anymore
# most likely we are to finish the execution here rather than 'pass'
pass
# clean behind
if lcl_file_name and os.path.exists(lcl_file_name):
os.remove(lcl_file_name)
# =====> the process ends here
# =====> ==============================
如果有重大错误请告诉我,我会修改示例。