如何使用 python boto3 更新 AWS S3 中现有对象的元数据?
How to update metadata of an existing object in AWS S3 using python boto3?
boto3 文档没有明确指定如何更新已存在的 S3 对象的用户元数据。
可以使用copy_from()方法来完成-
import boto3
s3 = boto3.resource('s3')
s3_object = s3.Object('bucket-name', 'key')
s3_object.metadata.update({'id':'value'})
s3_object.copy_from(CopySource={'Bucket':'bucket-name', 'Key':'key'}, Metadata=s3_object.metadata, MetadataDirective='REPLACE')
您可以通过添加某些内容来更新元数据,也可以使用新元数据更新当前元数据值,这是我正在使用的代码片段:
import sys
import os
import boto3
import pprint
from boto3 import client
from botocore.utils import fix_s3_host
param_1= YOUR_ACCESS_KEY
param_2= YOUR_SECRETE_KEY
param_3= YOUR_END_POINT
param_4= YOUR_BUCKET
#Create the S3 client
s3ressource = client(
service_name='s3',
endpoint_url= param_3,
aws_access_key_id= param_1,
aws_secret_access_key=param_2,
use_ssl=True,
)
# Building a list of of object per bucket
def BuildObjectListPerBucket (variablebucket):
global listofObjectstobeanalyzed
listofObjectstobeanalyzed = []
extensions = ['.jpg','.png']
for key in s3ressource.list_objects(Bucket=variablebucket)["Contents"]:
#print (key ['Key'])
onemoreObject=key['Key']
if onemoreObject.endswith(tuple(extensions)):
listofObjectstobeanalyzed.append(onemoreObject)
#print listofObjectstobeanalyzed
else :
s3ressource.delete_object(Bucket=variablebucket,Key=onemoreObject)
return listofObjectstobeanalyzed
# for a given existing object, create metadata
def createmetdata(bucketname,objectname):
s3ressource.upload_file(objectname, bucketname, objectname, ExtraArgs={"Metadata": {"metadata1":"ImageName","metadata2":"ImagePROPERTIES" ,"metadata3":"ImageCREATIONDATE"}})
# for a given existing object, add new metadata
def ADDmetadata(bucketname,objectname):
s3_object = s3ressource.get_object(Bucket=bucketname, Key=objectname)
k = s3ressource.head_object(Bucket = bucketname, Key = objectname)
m = k["Metadata"]
m["new_metadata"] = "ImageNEWMETADATA"
s3ressource.copy_object(Bucket = bucketname, Key = objectname, CopySource = bucketname + '/' + objectname, Metadata = m, MetadataDirective='REPLACE')
# for a given existing object, update a metadata with new value
def CHANGEmetadata(bucketname,objectname):
s3_object = s3ressource.get_object(Bucket=bucketname, Key=objectname)
k = s3ressource.head_object(Bucket = bucketname, Key = objectname)
m = k["Metadata"]
m.update({'watson_visual_rec_dic':'ImageCREATIONDATEEEEEEEEEEEEEEEEEEEEEEEEEE'})
s3ressource.copy_object(Bucket = bucketname, Key = objectname, CopySource = bucketname + '/' + objectname, Metadata = m, MetadataDirective='REPLACE')
def readmetadata (bucketname,objectname):
ALLDATAOFOBJECT = s3ressource.get_object(Bucket=bucketname, Key=objectname)
ALLDATAOFOBJECTMETADATA=ALLDATAOFOBJECT['Metadata']
print ALLDATAOFOBJECTMETADATA
# create the list of object on a per bucket basis
BuildObjectListPerBucket (param_4)
# Call functions to see the results
for objectitem in listofObjectstobeanalyzed:
# CALL The function you want
readmetadata(param_4,objectitem)
ADDmetadata(param_4,objectitem)
readmetadata(param_4,objectitem)
CHANGEmetadata(param_4,objectitem)
readmetadata(param_4,objectitem)
您可以使用 copy_from()
on the resource (like ) mentions, but you can also use the client's copy_object()
并指定相同的源和目标来执行此操作。这些方法是等效的,并在下面调用相同的代码。
import boto3
s3 = boto3.client("s3")
src_key = "my-key"
src_bucket = "my-bucket"
s3.copy_object(Key=src_key, Bucket=src_bucket,
CopySource={"Bucket": src_bucket, "Key": src_key},
Metadata={"my_new_key": "my_new_val"},
MetadataDirective="REPLACE")
'REPLACE' 值指定请求中传递的元数据应完全 覆盖源元数据。如果你的意思是只添加个新键值,或者只删除一些个键,你必须先阅读原始数据,编辑它并调用更新。
要仅正确替换元数据的 子集:
- 使用
head_object(Key=src_key, Bucket=src_bucket)
检索原始元数据。还要注意响应中的 Etag
- 在本地对元数据进行所需的更改。
- 如上所述调用
copy_object
以上传新的元数据,但在请求中传递 CopySourceIfMatch=original_etag
以确保远程对象在覆盖之前具有您期望的元数据。 original_etag
是您在第 1 步中获得的。如果元数据(或数据本身)在 head_object
被调用后发生了变化(例如,同时被另一个程序 运行 调用),copy_object
将因 HTTP 412 错误而失败。
与此类似 但保留现有元数据,同时只修改需要的内容。根据系统定义的元数据,在此示例中我只保留了 ContentType 和 ContentDisposition。其他系统定义的元数据也可以类似保存。
import boto3
s3 = boto3.client('s3')
response = s3.head_object(Bucket=bucket_name, Key=object_name)
response['Metadata']['new_meta_key'] = "new_value"
response['Metadata']['existing_meta_key'] = "new_value"
result = s3.copy_object(Bucket=bucket_name, Key=object_name,
CopySource={'Bucket': bucket_name,
'Key': object_name},
Metadata=response['Metadata'],
MetadataDirective='REPLACE', TaggingDirective='COPY',
ContentDisposition=response['ContentDisposition'],
ContentType=response['ContentType'])
boto3 文档没有明确指定如何更新已存在的 S3 对象的用户元数据。
可以使用copy_from()方法来完成-
import boto3
s3 = boto3.resource('s3')
s3_object = s3.Object('bucket-name', 'key')
s3_object.metadata.update({'id':'value'})
s3_object.copy_from(CopySource={'Bucket':'bucket-name', 'Key':'key'}, Metadata=s3_object.metadata, MetadataDirective='REPLACE')
您可以通过添加某些内容来更新元数据,也可以使用新元数据更新当前元数据值,这是我正在使用的代码片段:
import sys
import os
import boto3
import pprint
from boto3 import client
from botocore.utils import fix_s3_host
param_1= YOUR_ACCESS_KEY
param_2= YOUR_SECRETE_KEY
param_3= YOUR_END_POINT
param_4= YOUR_BUCKET
#Create the S3 client
s3ressource = client(
service_name='s3',
endpoint_url= param_3,
aws_access_key_id= param_1,
aws_secret_access_key=param_2,
use_ssl=True,
)
# Building a list of of object per bucket
def BuildObjectListPerBucket (variablebucket):
global listofObjectstobeanalyzed
listofObjectstobeanalyzed = []
extensions = ['.jpg','.png']
for key in s3ressource.list_objects(Bucket=variablebucket)["Contents"]:
#print (key ['Key'])
onemoreObject=key['Key']
if onemoreObject.endswith(tuple(extensions)):
listofObjectstobeanalyzed.append(onemoreObject)
#print listofObjectstobeanalyzed
else :
s3ressource.delete_object(Bucket=variablebucket,Key=onemoreObject)
return listofObjectstobeanalyzed
# for a given existing object, create metadata
def createmetdata(bucketname,objectname):
s3ressource.upload_file(objectname, bucketname, objectname, ExtraArgs={"Metadata": {"metadata1":"ImageName","metadata2":"ImagePROPERTIES" ,"metadata3":"ImageCREATIONDATE"}})
# for a given existing object, add new metadata
def ADDmetadata(bucketname,objectname):
s3_object = s3ressource.get_object(Bucket=bucketname, Key=objectname)
k = s3ressource.head_object(Bucket = bucketname, Key = objectname)
m = k["Metadata"]
m["new_metadata"] = "ImageNEWMETADATA"
s3ressource.copy_object(Bucket = bucketname, Key = objectname, CopySource = bucketname + '/' + objectname, Metadata = m, MetadataDirective='REPLACE')
# for a given existing object, update a metadata with new value
def CHANGEmetadata(bucketname,objectname):
s3_object = s3ressource.get_object(Bucket=bucketname, Key=objectname)
k = s3ressource.head_object(Bucket = bucketname, Key = objectname)
m = k["Metadata"]
m.update({'watson_visual_rec_dic':'ImageCREATIONDATEEEEEEEEEEEEEEEEEEEEEEEEEE'})
s3ressource.copy_object(Bucket = bucketname, Key = objectname, CopySource = bucketname + '/' + objectname, Metadata = m, MetadataDirective='REPLACE')
def readmetadata (bucketname,objectname):
ALLDATAOFOBJECT = s3ressource.get_object(Bucket=bucketname, Key=objectname)
ALLDATAOFOBJECTMETADATA=ALLDATAOFOBJECT['Metadata']
print ALLDATAOFOBJECTMETADATA
# create the list of object on a per bucket basis
BuildObjectListPerBucket (param_4)
# Call functions to see the results
for objectitem in listofObjectstobeanalyzed:
# CALL The function you want
readmetadata(param_4,objectitem)
ADDmetadata(param_4,objectitem)
readmetadata(param_4,objectitem)
CHANGEmetadata(param_4,objectitem)
readmetadata(param_4,objectitem)
您可以使用 copy_from()
on the resource (like copy_object()
并指定相同的源和目标来执行此操作。这些方法是等效的,并在下面调用相同的代码。
import boto3
s3 = boto3.client("s3")
src_key = "my-key"
src_bucket = "my-bucket"
s3.copy_object(Key=src_key, Bucket=src_bucket,
CopySource={"Bucket": src_bucket, "Key": src_key},
Metadata={"my_new_key": "my_new_val"},
MetadataDirective="REPLACE")
'REPLACE' 值指定请求中传递的元数据应完全 覆盖源元数据。如果你的意思是只添加个新键值,或者只删除一些个键,你必须先阅读原始数据,编辑它并调用更新。
要仅正确替换元数据的 子集:
- 使用
head_object(Key=src_key, Bucket=src_bucket)
检索原始元数据。还要注意响应中的 Etag - 在本地对元数据进行所需的更改。
- 如上所述调用
copy_object
以上传新的元数据,但在请求中传递CopySourceIfMatch=original_etag
以确保远程对象在覆盖之前具有您期望的元数据。original_etag
是您在第 1 步中获得的。如果元数据(或数据本身)在head_object
被调用后发生了变化(例如,同时被另一个程序 运行 调用),copy_object
将因 HTTP 412 错误而失败。
与此类似
import boto3
s3 = boto3.client('s3')
response = s3.head_object(Bucket=bucket_name, Key=object_name)
response['Metadata']['new_meta_key'] = "new_value"
response['Metadata']['existing_meta_key'] = "new_value"
result = s3.copy_object(Bucket=bucket_name, Key=object_name,
CopySource={'Bucket': bucket_name,
'Key': object_name},
Metadata=response['Metadata'],
MetadataDirective='REPLACE', TaggingDirective='COPY',
ContentDisposition=response['ContentDisposition'],
ContentType=response['ContentType'])