使用 python 2.7 和 boto 2 将文件从本地目录上传到 aws S3 的问题
Issue with uploading files from local directory to aws S3 using python 2.7 and boto 2
我正在做一个简单的操作,将 gzip 文件从 S3 存储桶下载到本地目录。我将它们提取到另一个本地目录中,然后将它们再次上传回 S3 存储桶到存档文件夹路径中。在执行此操作时,我想确保我正在处理我最初从 S3 存储桶下载的同一组文件,在下面的代码中是 (f_name)。现在,下面的代码没有将它们上传回 S3,这就是我遇到的问题。但能够从 S3 下载并将其解压缩到本地目录中。你能帮我理解 _uploadFile 函数有什么问题吗?
from boto.s3.connection import S3Connection
from boto.s3.key import *
import os
import os.path
aws_bucket= "event-logs-dev” ## S3 Bucket name
local_download_directory= "/Users/TargetData/Download/test_queue1/“ ## local directory to download the gzip files from S3.
Target_directory_to_extract = "/Users/TargetData/unzip” ##local directory to gunzip the downloaded files.
Target_s3_path_to_upload= "event-logs-dev/data/clean/xact/logs/archive/“ ## S3 bucket path to upload the files.
def decompressAllFilesFromNetfiler(self,aws_bucket,local_download_directory,Target_d irectory_to_extract,Target_s3_path_to_upload):
zipFiles = [f for f in os.listdir(local_download_directory) if re.match(r'.*\.tar\.gz', f)]
for f_name in zipFiles:
if os.path.exists(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')]) and os.access(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')], os.R_OK):
print ('File {} already exists!'.format(f_name))
else:
f_name_with_path = os.path.join(local_download_directory, f_name)
os.system('mkdir -p {} && tar vxzf {} -C {}'.format(Target_directory_to_extract, f_name_with_path, Target_directory_to_extract))
print ('Extracted file {}'.format(f_name))
self._uploadFile(aws_bucket,f_name,Target_s3_path_to_upload,Target_directory_to_extract)
def _uploadFile(self, aws_bucket, f_name,Target_s3_path_to_upload,Target_directory_to_extract):
full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload, f_name))
path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name))
try:
print "Uploaded extracted file to: %s" % (full_key_name)
key = aws_bucket.new_key(full_key_name)
key.set_contents_from_filename(path)
except:
if full_key_name is None:
print "Error uploading”
目前,输出显示 Uploaded extracted file to: event-logs-dev/data/clean/xact/logs/archive/1442235602129200000.tar.gz
,但没有任何内容上传到 S3 存储桶。非常感谢您的帮助!!提前致谢!
您似乎剪切并粘贴了部分代码 - 格式可能丢失,因为上面的代码无法像粘贴时那样工作。我冒昧地将其设为 PEP8(大部分),但是仍然缺少一些用于创建 S3 对象的代码。由于您导入了模块,我假设您有那段代码,只是没有粘贴它。
这是格式正确的代码清理版本。我还在你的 try: 块中添加了一个异常代码来打印出你得到的错误。您应该更新异常以更具体地针对 make_key 或 set_contents_ 抛出的异常......但是一般的异常会让您开始。如果仅此而已,这更具可读性,但您也应该包括您的 S3 连接代码 - 并删除任何特定于您的域的内容(例如密钥、商业机密等)。
#!/usr/bin/env python
"""
do some download
some extract
and some upload
"""
from boto.s3.connection import S3Connection
from boto.s3.key import *
import os
import os.path
aws_bucket = 'event-logs-dev'
local_download_directory = '/Users/TargetData/Download/test_queue1/'
Target_directory_to_extract = '/Users/TargetData/unzip'
Target_s3_path_to_upload = 'event-logs-dev/data/clean/xact/logs/archive/'
'''
MUST BE SOME MAGIC HERE TO GET AN S3 CONNECTION ???
aws_bucket IS NOT A BUCKET OBJECT ...
'''
def decompressAllFilesFromNetfiler(self,
aws_bucket,
local_download_directory,
Target_directory_to_extract,
Target_s3_path_to_upload):
'''
decompress stuff
'''
zipFiles = [f for f in os.listdir(
local_download_directory) if re.match(r'.*\.tar\.gz', f)]
for f_name in zipFiles:
if os.path.exists(
"{}/{}".format(Target_directory_to_extract,
f_name[:len('.tar.gz')])) and os.access(
"{}/{}".format(Target_directory_to_extract,
f_name[:len('.tar.gz')])) and os.R_OK:
print ('File {} already exists!'.format(f_name))
else:
f_name_with_path = os.path.join(local_download_directory, f_name)
os.system('mkdir -p {} && tar vxzf {} -C {}'.format(
Target_directory_to_extract,
f_name_with_path,
Target_directory_to_extract))
print ('Extracted file {}'.format(f_name))
self._uploadFile(aws_bucket,
f_name,
Target_s3_path_to_upload,
Target_directory_to_extract)
def _uploadFile(self,
aws_bucket,
f_name,
Target_s3_path_to_upload,
Target_directory_to_extract):
full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload,
f_name))
path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name))
try:
S3CONN = S3Connection()
BUCKET = S3CONN.get_bucket(aws_bucket)
key = BUCKET.new_key(full_key_name)
key.set_contents_from_filename(path)
print "Uploaded extracted file to: {}".format(full_key_name)
except Exception as UploadERR:
if full_key_name is None:
print 'Error uploading'
else:
print "Error : {}".format(UploadERR)
我正在做一个简单的操作,将 gzip 文件从 S3 存储桶下载到本地目录。我将它们提取到另一个本地目录中,然后将它们再次上传回 S3 存储桶到存档文件夹路径中。在执行此操作时,我想确保我正在处理我最初从 S3 存储桶下载的同一组文件,在下面的代码中是 (f_name)。现在,下面的代码没有将它们上传回 S3,这就是我遇到的问题。但能够从 S3 下载并将其解压缩到本地目录中。你能帮我理解 _uploadFile 函数有什么问题吗?
from boto.s3.connection import S3Connection
from boto.s3.key import *
import os
import os.path
aws_bucket= "event-logs-dev” ## S3 Bucket name
local_download_directory= "/Users/TargetData/Download/test_queue1/“ ## local directory to download the gzip files from S3.
Target_directory_to_extract = "/Users/TargetData/unzip” ##local directory to gunzip the downloaded files.
Target_s3_path_to_upload= "event-logs-dev/data/clean/xact/logs/archive/“ ## S3 bucket path to upload the files.
def decompressAllFilesFromNetfiler(self,aws_bucket,local_download_directory,Target_d irectory_to_extract,Target_s3_path_to_upload):
zipFiles = [f for f in os.listdir(local_download_directory) if re.match(r'.*\.tar\.gz', f)]
for f_name in zipFiles:
if os.path.exists(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')]) and os.access(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')], os.R_OK):
print ('File {} already exists!'.format(f_name))
else:
f_name_with_path = os.path.join(local_download_directory, f_name)
os.system('mkdir -p {} && tar vxzf {} -C {}'.format(Target_directory_to_extract, f_name_with_path, Target_directory_to_extract))
print ('Extracted file {}'.format(f_name))
self._uploadFile(aws_bucket,f_name,Target_s3_path_to_upload,Target_directory_to_extract)
def _uploadFile(self, aws_bucket, f_name,Target_s3_path_to_upload,Target_directory_to_extract):
full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload, f_name))
path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name))
try:
print "Uploaded extracted file to: %s" % (full_key_name)
key = aws_bucket.new_key(full_key_name)
key.set_contents_from_filename(path)
except:
if full_key_name is None:
print "Error uploading”
目前,输出显示 Uploaded extracted file to: event-logs-dev/data/clean/xact/logs/archive/1442235602129200000.tar.gz
,但没有任何内容上传到 S3 存储桶。非常感谢您的帮助!!提前致谢!
您似乎剪切并粘贴了部分代码 - 格式可能丢失,因为上面的代码无法像粘贴时那样工作。我冒昧地将其设为 PEP8(大部分),但是仍然缺少一些用于创建 S3 对象的代码。由于您导入了模块,我假设您有那段代码,只是没有粘贴它。
这是格式正确的代码清理版本。我还在你的 try: 块中添加了一个异常代码来打印出你得到的错误。您应该更新异常以更具体地针对 make_key 或 set_contents_ 抛出的异常......但是一般的异常会让您开始。如果仅此而已,这更具可读性,但您也应该包括您的 S3 连接代码 - 并删除任何特定于您的域的内容(例如密钥、商业机密等)。
#!/usr/bin/env python
"""
do some download
some extract
and some upload
"""
from boto.s3.connection import S3Connection
from boto.s3.key import *
import os
import os.path
aws_bucket = 'event-logs-dev'
local_download_directory = '/Users/TargetData/Download/test_queue1/'
Target_directory_to_extract = '/Users/TargetData/unzip'
Target_s3_path_to_upload = 'event-logs-dev/data/clean/xact/logs/archive/'
'''
MUST BE SOME MAGIC HERE TO GET AN S3 CONNECTION ???
aws_bucket IS NOT A BUCKET OBJECT ...
'''
def decompressAllFilesFromNetfiler(self,
aws_bucket,
local_download_directory,
Target_directory_to_extract,
Target_s3_path_to_upload):
'''
decompress stuff
'''
zipFiles = [f for f in os.listdir(
local_download_directory) if re.match(r'.*\.tar\.gz', f)]
for f_name in zipFiles:
if os.path.exists(
"{}/{}".format(Target_directory_to_extract,
f_name[:len('.tar.gz')])) and os.access(
"{}/{}".format(Target_directory_to_extract,
f_name[:len('.tar.gz')])) and os.R_OK:
print ('File {} already exists!'.format(f_name))
else:
f_name_with_path = os.path.join(local_download_directory, f_name)
os.system('mkdir -p {} && tar vxzf {} -C {}'.format(
Target_directory_to_extract,
f_name_with_path,
Target_directory_to_extract))
print ('Extracted file {}'.format(f_name))
self._uploadFile(aws_bucket,
f_name,
Target_s3_path_to_upload,
Target_directory_to_extract)
def _uploadFile(self,
aws_bucket,
f_name,
Target_s3_path_to_upload,
Target_directory_to_extract):
full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload,
f_name))
path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name))
try:
S3CONN = S3Connection()
BUCKET = S3CONN.get_bucket(aws_bucket)
key = BUCKET.new_key(full_key_name)
key.set_contents_from_filename(path)
print "Uploaded extracted file to: {}".format(full_key_name)
except Exception as UploadERR:
if full_key_name is None:
print 'Error uploading'
else:
print "Error : {}".format(UploadERR)