使用 Go 在 Google 云存储中压缩文件夹的最佳方法?
Best Approach to Zipping up a Folder in Google Cloud Storage using Go?
我的 Google App Engine Go 项目在 Google 云存储中的 "folder" 中创建了一个包含多个文件的 zip。使用现已弃用和删除的文件 API 在 BlobStore 中实施时,它曾经非常快。我最近将代码转换为使用 Google 云存储,现在性能非常糟糕,有时会超时。正在压缩的文件大小在 1K 到 2M 之间。
我正在寻找任何改进压缩文件内容的建议。下面的代码是我为将云中的多个文件压缩为云中的新 zip 文件而编写的代码。它可能需要很长时间才能执行,并且需要在将每个文件写入 zip 之前将其全部内容(参见下面的性能问题)加载到内存中。必须有更好的方法。
// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) {
log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName)
srcFolder = fmt.Sprintf("%v/", srcFolder)
query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
if err != nil {
log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
return
}
totalFiles := len(objs.Results)
if totalFiles == 0 {
log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
return
}
// create storage file for writing
log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
// add optional content type and meta data
if len(contentType) > 0 { storageWriter.ContentType = contentType }
if metaData != nil { storageWriter.Metadata = *metaData }
// Create a buffer to write our archive to.
buf := new(bytes.Buffer)
// Create a new zip archive to memory buffer
zipWriter := zip.NewWriter(buf)
// go through each file in the folder
for _, obj := range objs.Results {
log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
//d.dumpStats(obj)
// read file in our source folder from storage - io.ReadCloser returned from storage
storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return
}
defer storageReader.Close()
// PERFORMANCE ISSUE: have to load the entire file into memory to get random access from the cloud
slurp, err := ioutil.ReadAll(storageReader)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read data from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return
}
// grab just the filename from directory listing (don't want to store paths in zip)
_, zipFileName := filepath.Split(obj.Name)
newFileName := strings.ToLower(zipFileName)
// add filename to zip
zipFile, err := zipWriter.Create(newFileName)
if err != nil {
log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return
}
// write entire file into zip archive
_, err = zipFile.Write(slurp)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return
}
// flush that to buffer so we can write it off now
//err = zipFile.Flush()
//if err != nil {
// d.errorf("pack: unable to flush write of zip file from bucket %q, file %q: %v", cloud.bucket, zipFileName, err)
// //return
//}
// now drain all that buffered zip data to the cloud storage file
log.Infof(cloud.c, "Writing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)
_, err = buf.WriteTo(storageWriter)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)
return
}
}
// Make sure to check the error on Close.
log.Infof(cloud.c, "Closing zip writer")
err = zipWriter.Close()
if err != nil {
log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
}
// write any leftover data
if buf.Len() > 0 {
// now drain all that buffered zip data to the cloud storage file
// log.Infof(cloud.c, "Packing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)
_, err := buf.WriteTo(storageWriter)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)
}
}
// close up final write file
//log.Infof(cloud.c, "Closing cloud storage file %v", fileName)
if err := storageWriter.Close(); err != nil {
log.Errorf(cloud.c, "Packing failed to close bucket %q file %q: %v", cloud.bucket, fileName, err)
return
}
// success!
log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)
}
感谢 Stephen 建议在写入 zip 时不要将文件加载到内存缓冲区。这是固定代码供参考:
// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) bool {
log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName)
srcFolder = fmt.Sprintf("%v/", srcFolder)
query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
if err != nil {
log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
return false
}
totalFiles := len(objs.Results)
if totalFiles == 0 {
log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
return false
}
// create storage file for writing
log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
defer storageWriter.Close()
// add optional content type and meta data
if len(contentType) > 0 { storageWriter.ContentType = contentType }
if metaData != nil { storageWriter.Metadata = *metaData }
// Create a new zip archive to memory buffer
zipWriter := zip.NewWriter(storageWriter)
// go through each file in the folder
for _, obj := range objs.Results {
log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
//d.dumpStats(obj)
// read file in our source folder from storage - io.ReadCloser returned from storage
storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return false
}
defer storageReader.Close()
// grab just the filename from directory listing (don't want to store paths in zip)
_, zipFileName := filepath.Split(obj.Name)
newFileName := strings.ToLower(zipFileName)
// add filename to zip
zipFile, err := zipWriter.Create(newFileName)
if err != nil {
log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return false
}
// copy from storage reader to zip writer
_, err = io.Copy(zipFile, storageReader)
if err != nil {
log.Errorf(cloud.c, "Failed to copy from storage reader to zip file: %v", err)
return false
}
}
// Make sure to check the error on Close.
log.Infof(cloud.c, "Closing zip writer")
err = zipWriter.Close()
if err != nil {
log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
return false
}
// success!
log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)
return true
}
我的 Google App Engine Go 项目在 Google 云存储中的 "folder" 中创建了一个包含多个文件的 zip。使用现已弃用和删除的文件 API 在 BlobStore 中实施时,它曾经非常快。我最近将代码转换为使用 Google 云存储,现在性能非常糟糕,有时会超时。正在压缩的文件大小在 1K 到 2M 之间。
我正在寻找任何改进压缩文件内容的建议。下面的代码是我为将云中的多个文件压缩为云中的新 zip 文件而编写的代码。它可能需要很长时间才能执行,并且需要在将每个文件写入 zip 之前将其全部内容(参见下面的性能问题)加载到内存中。必须有更好的方法。
// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) {
log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName)
srcFolder = fmt.Sprintf("%v/", srcFolder)
query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
if err != nil {
log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
return
}
totalFiles := len(objs.Results)
if totalFiles == 0 {
log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
return
}
// create storage file for writing
log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
// add optional content type and meta data
if len(contentType) > 0 { storageWriter.ContentType = contentType }
if metaData != nil { storageWriter.Metadata = *metaData }
// Create a buffer to write our archive to.
buf := new(bytes.Buffer)
// Create a new zip archive to memory buffer
zipWriter := zip.NewWriter(buf)
// go through each file in the folder
for _, obj := range objs.Results {
log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
//d.dumpStats(obj)
// read file in our source folder from storage - io.ReadCloser returned from storage
storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return
}
defer storageReader.Close()
// PERFORMANCE ISSUE: have to load the entire file into memory to get random access from the cloud
slurp, err := ioutil.ReadAll(storageReader)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read data from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return
}
// grab just the filename from directory listing (don't want to store paths in zip)
_, zipFileName := filepath.Split(obj.Name)
newFileName := strings.ToLower(zipFileName)
// add filename to zip
zipFile, err := zipWriter.Create(newFileName)
if err != nil {
log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return
}
// write entire file into zip archive
_, err = zipFile.Write(slurp)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return
}
// flush that to buffer so we can write it off now
//err = zipFile.Flush()
//if err != nil {
// d.errorf("pack: unable to flush write of zip file from bucket %q, file %q: %v", cloud.bucket, zipFileName, err)
// //return
//}
// now drain all that buffered zip data to the cloud storage file
log.Infof(cloud.c, "Writing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)
_, err = buf.WriteTo(storageWriter)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)
return
}
}
// Make sure to check the error on Close.
log.Infof(cloud.c, "Closing zip writer")
err = zipWriter.Close()
if err != nil {
log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
}
// write any leftover data
if buf.Len() > 0 {
// now drain all that buffered zip data to the cloud storage file
// log.Infof(cloud.c, "Packing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)
_, err := buf.WriteTo(storageWriter)
if err != nil {
log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)
}
}
// close up final write file
//log.Infof(cloud.c, "Closing cloud storage file %v", fileName)
if err := storageWriter.Close(); err != nil {
log.Errorf(cloud.c, "Packing failed to close bucket %q file %q: %v", cloud.bucket, fileName, err)
return
}
// success!
log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)
}
感谢 Stephen 建议在写入 zip 时不要将文件加载到内存缓冲区。这是固定代码供参考:
// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) bool {
log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName)
srcFolder = fmt.Sprintf("%v/", srcFolder)
query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}
objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
if err != nil {
log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
return false
}
totalFiles := len(objs.Results)
if totalFiles == 0 {
log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
return false
}
// create storage file for writing
log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
defer storageWriter.Close()
// add optional content type and meta data
if len(contentType) > 0 { storageWriter.ContentType = contentType }
if metaData != nil { storageWriter.Metadata = *metaData }
// Create a new zip archive to memory buffer
zipWriter := zip.NewWriter(storageWriter)
// go through each file in the folder
for _, obj := range objs.Results {
log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
//d.dumpStats(obj)
// read file in our source folder from storage - io.ReadCloser returned from storage
storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
if err != nil {
log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
return false
}
defer storageReader.Close()
// grab just the filename from directory listing (don't want to store paths in zip)
_, zipFileName := filepath.Split(obj.Name)
newFileName := strings.ToLower(zipFileName)
// add filename to zip
zipFile, err := zipWriter.Create(newFileName)
if err != nil {
log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
return false
}
// copy from storage reader to zip writer
_, err = io.Copy(zipFile, storageReader)
if err != nil {
log.Errorf(cloud.c, "Failed to copy from storage reader to zip file: %v", err)
return false
}
}
// Make sure to check the error on Close.
log.Infof(cloud.c, "Closing zip writer")
err = zipWriter.Close()
if err != nil {
log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
return false
}
// success!
log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)
return true
}