如何在 Azure 中将多个 CloudBlockBlob 对象串联成一个 CloudBlockBlob 对象?

How do I concatenate multiple CloudBlockBlob objects into one CloudBlockBlob object in Azure?

我正在尝试将多个 CloudBlockBlob 对象连接到 Azure 函数内的单个 CloudBlockBlob 对象中。我尝试将多个对象下载到内存流中,然后将内存流上传到新对象,但该函数在操作完成前超时。我还尝试在每次读取输入 blob 后写入新的 blob,但写入 CloudBlockBlob 会覆盖之前的输出。我知道 CloudAppendBlob,但我希望输出文件的类型为 CloudBlockBlob。

有更好的方法吗?

这是我的代码,它将多个 CloudBlockBlob 读取到一个内存流中,然后将该流写入一个新的 CloudBlockBlob。

        public async Task CatBlob(string[] srcBlobs, string destinationBlob)
    {
        var connectionString = Config.AzConnStr;
        var container = Config.AzContainer;

        CloudStorageAccount storageAccount = null;
        CloudBlobContainer cloudBlobContainer = null;

        if (CloudStorageAccount.TryParse(connectionString, out storageAccount))
        {
            try
            {
                CloudBlobClient cloudBlobClient = storageAccount.CreateCloudBlobClient();
                cloudBlobContainer = cloudBlobClient.GetContainerReference(container);

                CloudBlockBlob blockBlobDest = cloudBlobContainer.GetBlockBlobReference("subfolder/test.zip");
                using (MemoryStream memStream = new MemoryStream())
                {
                    for (int i = 0; i < srcBlobs.Length; i++)
                    {
                        CloudBlockBlob cloudBlockBlobSrc = cloudBlobContainer.GetBlockBlobReference(srcBlobs[i]);
                        Console.WriteLine("loop {0}", i);
                        await cloudBlockBlobSrc.DownloadToStreamAsync(memStream);
                    }
                    memStream.Seek(0, SeekOrigin.Begin);
                    await blockBlobDest.UploadFromStreamAsync(memStream);
                }
            }
            catch (Exception ex)
            {
                Logger.Error("Exception while concatenating files: " + ex.Message, Context);
                throw;
            }
        }
        else
        {
            Logger.Error("Exception connecting to cloud storage while concatenating files", Context);
            throw new Exception("Could not connect to the Azure using Connection String.");
        }
    }

请参阅下面的示例代码。它使用 Azure.Storage.Blobs (version 12.9.1) SDK。我没有尝试 运行 这段代码,所以它可能会抛出一些错误。

基本上这个想法是您分别下载每个 blob 并立即将其内容作为一个块存储在目标 blob 中(目前您正在客户端上创建一个非常大的内存流)。上传所有块后,您提交块以创建目标 blob。

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading.Tasks;
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Models;
using Azure.Storage.Blobs.Specialized;

namespace SO68566758
{
    class Program
    {
        private const string connectionString = "your-connection-string";
        private const string container = "your-container-name";
        
        static async Task Main(string[] args)
        {
            string[] srcBlobs = new[] { "blob1.txt", "blob2.txt"};//Specify source blob names.
            string destinationBlob = "subfolder/test.zip";//Specify destination blob name.
            await CatBlob(srcBlobs, destinationBlob);
        }

        /// <summary>
        /// This method downloads the blobs specified in source blobs list (one at a time)
        /// and uploads the contents of that blob as a block in the destination blob. Once
        /// all blocks are uploaded, block list is committed to create the destination blob.
        /// </summary>
        /// <param name="srcBlobs"></param>
        /// <param name="destinationBlob"></param>
        public static async Task CatBlob(string[] srcBlobs, string destinationBlob)
        {
            BlobServiceClient blobServiceClient = new BlobServiceClient(connectionString);
            BlobContainerClient containerClient = blobServiceClient.GetBlobContainerClient(container);
            BlockBlobClient destinationBlobClient = containerClient.GetBlockBlobClient(destinationBlob);
            List<string> blockIds = new List<string>();
            for (var i = 0; i < srcBlobs.Length; i++)
            {
                BlockBlobClient sourceBlobClient = containerClient.GetBlockBlobClient(srcBlobs[i]);
                //Download source blob and read its contents as stream.
                BlobDownloadResult downloadResult = await sourceBlobClient.DownloadContentAsync();
                using (Stream stream = downloadResult.Content.ToStream())
                {
                    string blockId = Convert.ToBase64String(Encoding.UTF8.GetBytes(i.ToString("d6")));
                    stream.Position = 0;
                    //Upload that as a block in the destination blob.
                    await destinationBlobClient.StageBlockAsync(blockId, stream);
                    blockIds.Add(blockId);
                }
            }
            //All blobs have been uploaded. Now its time to commit the destination blob.
            await destinationBlobClient.CommitBlockListAsync(blockIds);
        }
    }
}

请参阅 Put Block and Put Block List REST API 操作以了解有关如何分块上传块 blob 的更多信息。