Python 3 个可迭代的惰性块获取
Python 3 iterable lazy chunk fetch
是否有标准方法能够延迟获取下一块数据并按元素生成它
目前我正在获取所有块并使用 itertools 链接它们
def list_blobs(container_name:str, prefix:str):
chunks = []
next_marker=None
while True:
blobs = blob_service.list_blobs(container_name, prefix=prefix, num_results=100, marker=next_marker)
next_marker = blobs.next_marker
chunks.append(blobs)
if not next_marker:
break
return itertools.chain.from_iterable(chunks)
list_blobs fetcher 的 "lazy" 版本是什么?
你可以直接使用 yield from
:
def list_blobs(container_name:str, prefix:str):
next_marker = True
while next_marker:
blobs = blob_service.list_blobs(container_name, prefix=prefix, num_results=100, marker=next_marker)
next_marker = blobs.next_marker
yield from blobs
将 chunks.append(blobs)
替换为 yield from blobs
,并完全删除 return
和 chunks
list
:
def generate_blobs(container_name:str, prefix:str):
next_marker = None
while True:
blobs = blob_service.list_blobs(container_name, prefix=prefix, num_results=100, marker=next_marker)
next_marker = blobs.next_marker
yield from blobs
if not next_marker:
break
将函数转换为一次生成单个项目的生成器函数。
@ShadowRanger,@Kasrâmvd 非常感谢
@timgeb,通过 Azure Blob 存储实现惰性迭代的完整代码
from azure.storage.blob import BlockBlobService
from azure.storage.blob import Blob
from typing import Iterable, Tuple
def blob_iterator(account:str, account_key:str, bucket:str, prefix:str)-> Iterable[Tuple[str, str]]:
blob_service = BlockBlobService(account_name=account, account_key=account_key)
def list_blobs(bucket:str, prefix:str)->Blob:
next_marker = None
while True:
blobs = blob_service.list_blobs(bucket, prefix=prefix, num_results=100, marker=next_marker)
yield from blobs
next_marker = blobs.next_marker
if not next_marker:
break
def get_text(bucket:str, name:str)->str:
return blob_service.get_blob_to_text(bucket, name).content
return ( (blob.name, get_text(bucket, blob.name)) for blob in list_blobs(bucket, prefix) )
it = blob_iterator('account', 'account_key', 'container_name', prefix='AA')
是否有标准方法能够延迟获取下一块数据并按元素生成它
目前我正在获取所有块并使用 itertools 链接它们
def list_blobs(container_name:str, prefix:str):
chunks = []
next_marker=None
while True:
blobs = blob_service.list_blobs(container_name, prefix=prefix, num_results=100, marker=next_marker)
next_marker = blobs.next_marker
chunks.append(blobs)
if not next_marker:
break
return itertools.chain.from_iterable(chunks)
list_blobs fetcher 的 "lazy" 版本是什么?
你可以直接使用 yield from
:
def list_blobs(container_name:str, prefix:str):
next_marker = True
while next_marker:
blobs = blob_service.list_blobs(container_name, prefix=prefix, num_results=100, marker=next_marker)
next_marker = blobs.next_marker
yield from blobs
将 chunks.append(blobs)
替换为 yield from blobs
,并完全删除 return
和 chunks
list
:
def generate_blobs(container_name:str, prefix:str):
next_marker = None
while True:
blobs = blob_service.list_blobs(container_name, prefix=prefix, num_results=100, marker=next_marker)
next_marker = blobs.next_marker
yield from blobs
if not next_marker:
break
将函数转换为一次生成单个项目的生成器函数。
@ShadowRanger,@Kasrâmvd 非常感谢
@timgeb,通过 Azure Blob 存储实现惰性迭代的完整代码
from azure.storage.blob import BlockBlobService
from azure.storage.blob import Blob
from typing import Iterable, Tuple
def blob_iterator(account:str, account_key:str, bucket:str, prefix:str)-> Iterable[Tuple[str, str]]:
blob_service = BlockBlobService(account_name=account, account_key=account_key)
def list_blobs(bucket:str, prefix:str)->Blob:
next_marker = None
while True:
blobs = blob_service.list_blobs(bucket, prefix=prefix, num_results=100, marker=next_marker)
yield from blobs
next_marker = blobs.next_marker
if not next_marker:
break
def get_text(bucket:str, name:str)->str:
return blob_service.get_blob_to_text(bucket, name).content
return ( (blob.name, get_text(bucket, blob.name)) for blob in list_blobs(bucket, prefix) )
it = blob_iterator('account', 'account_key', 'container_name', prefix='AA')