使用 Go 在 Google 云存储中压缩文件夹的最佳方法?

Best Approach to Zipping up a Folder in Google Cloud Storage using Go?

我的 Google App Engine Go 项目在 Google 云存储中的 "folder" 中创建了一个包含多个文件的 zip。使用现已弃用和删除的文件 API 在 BlobStore 中实施时,它曾经非常快。我最近将代码转换为使用 Google 云存储,现在性能非常糟糕,有时会超时。正在压缩的文件大小在 1K 到 2M 之间。

我正在寻找任何改进压缩文件内容的建议。下面的代码是我为将云中的多个文件压缩为云中的新 zip 文件而编写的代码。它可能需要很长时间才能执行,并且需要在将每个文件写入 zip 之前将其全部内容(参见下面的性能问题)加载到内存中。必须有更好的方法。

// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) {

    log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName) 

    srcFolder = fmt.Sprintf("%v/", srcFolder)
    query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}

    objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
    if err != nil {
        log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
        return
    }

    totalFiles := len(objs.Results)
    if totalFiles == 0 {
        log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
        return
    }

    // create storage file for writing
    log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
    storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)

    // add optional content type and meta data  
    if len(contentType) > 0 { storageWriter.ContentType = contentType }
    if metaData != nil { storageWriter.Metadata = *metaData }

    // Create a buffer to write our archive to.
    buf := new(bytes.Buffer)

    // Create a new zip archive to memory buffer
    zipWriter := zip.NewWriter(buf)

    // go through each file in the folder
    for _, obj := range objs.Results {

        log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
        //d.dumpStats(obj)

        // read file in our source folder from storage - io.ReadCloser returned from storage
        storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
            return  
        }
        defer storageReader.Close()

        // PERFORMANCE ISSUE: have to load the entire file into memory to get random access from the cloud
        slurp, err := ioutil.ReadAll(storageReader)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to read data from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
            return
        }

        // grab just the filename from directory listing (don't want to store paths in zip)
        _, zipFileName := filepath.Split(obj.Name)

        newFileName := strings.ToLower(zipFileName)

        // add filename to zip
        zipFile, err := zipWriter.Create(newFileName)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
            return
        }

        // write entire file into zip archive
        _, err = zipFile.Write(slurp)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
            return
        }

        // flush that to buffer so we can write it off now
        //err = zipFile.Flush()
        //if err != nil {
        //  d.errorf("pack: unable to flush write of zip file from bucket %q, file %q: %v", cloud.bucket, zipFileName, err)
        //  //return
        //}

        // now drain all that buffered zip data to the cloud storage file   
        log.Infof(cloud.c, "Writing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)   
        _, err = buf.WriteTo(storageWriter)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)   
            return          
        }
    }

    // Make sure to check the error on Close.
    log.Infof(cloud.c, "Closing zip writer")    
    err = zipWriter.Close()
    if err != nil {
        log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
    }

    // write any leftover data
    if buf.Len() > 0 {
        // now drain all that buffered zip data to the cloud storage file   
        // log.Infof(cloud.c, "Packing zip buffer of size %v to cloud storage file %v", buf.Len(), fileName)    
        _, err := buf.WriteTo(storageWriter)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to write data to bucket %q file %q: %v", cloud.bucket, fileName, err)               
        }
    }

    // close up final write file
    //log.Infof(cloud.c, "Closing cloud storage file %v", fileName) 
    if err := storageWriter.Close(); err != nil {
        log.Errorf(cloud.c, "Packing failed to close bucket %q file %q: %v", cloud.bucket, fileName, err)
        return
    }

    // success!
    log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)   
}

感谢 Stephen 建议在写入 zip 时不要将文件加载到内存缓冲区。这是固定代码供参考:

// Pack a folder into zip file
func (cloud *Cloud) Pack(srcFolder string, fileName string, contentType string, metaData *map[string]string) bool {

    log.Infof(cloud.c, "Packing bucket %v folder %v to file %v", cloud.bucket, srcFolder, fileName) 

    srcFolder = fmt.Sprintf("%v/", srcFolder)
    query := &storage.Query{Prefix: srcFolder, Delimiter: "/"}

    objs, err := storage.ListObjects(cloud.ctx, cloud.bucket, query)
    if err != nil {
        log.Errorf(cloud.c, "Packing failed to list bucket %q: %v", cloud.bucket, err)
        return false
    }

    totalFiles := len(objs.Results)
    if totalFiles == 0 {
        log.Errorf(cloud.c, "Packing failed to find objects found in folder %q: %v", cloud.bucket, srcFolder)
        return false
    }

    // create storage file for writing
    log.Infof(cloud.c, "Writing new zip file to %v/%v for %v files", cloud.bucket, fileName, totalFiles)
    storageWriter := storage.NewWriter(cloud.ctx, cloud.bucket, fileName)
    defer storageWriter.Close()

    // add optional content type and meta data  
    if len(contentType) > 0 { storageWriter.ContentType = contentType }
    if metaData != nil { storageWriter.Metadata = *metaData }

    // Create a new zip archive to memory buffer
    zipWriter := zip.NewWriter(storageWriter)

    // go through each file in the folder
    for _, obj := range objs.Results {

        log.Infof(cloud.c, "Packing file %v of size %v to zip file", obj.Name, obj.Size)
        //d.dumpStats(obj)

        // read file in our source folder from storage - io.ReadCloser returned from storage
        storageReader, err := storage.NewReader(cloud.ctx, cloud.bucket, obj.Name)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to read from bucket %q file %q: %v", cloud.bucket, obj.Name, err)
            return false
        }
        defer storageReader.Close()

        // grab just the filename from directory listing (don't want to store paths in zip)
        _, zipFileName := filepath.Split(obj.Name)
        newFileName := strings.ToLower(zipFileName)

        // add filename to zip
        zipFile, err := zipWriter.Create(newFileName)
        if err != nil {
            log.Errorf(cloud.c, "Packing failed to create zip file from bucket %q file %q: %v", cloud.bucket, zipFileName, err)
            return false
        }

        // copy from storage reader to zip writer   
        _, err = io.Copy(zipFile, storageReader)
        if err != nil {
            log.Errorf(cloud.c, "Failed to copy from storage reader to zip file: %v", err)
            return false
        }   
    }

    // Make sure to check the error on Close.
    log.Infof(cloud.c, "Closing zip writer")    
    err = zipWriter.Close()
    if err != nil {
        log.Errorf(cloud.c, "Packing failed to close zip file writer from bucket %q file %q : %v", cloud.bucket, fileName, err)
        return false
    }

    // success!
    log.Infof(cloud.c, "Packed files to new cloud storage file %v successful!", fileName)   
    return true
}