如何上传 multipart/form POST 只有 URL 的文件到需要上传的文件(块)
How to upload file with multipart/form POST having only URL to file that need to be uploaded (chunks)
是否有机会通过 API 端点上传文件,该端点将 multipart/form-data 作为只有该文件的 URL 的内容类型?
规则:
将整个文件下载到内存中,然后通过此端点上传是不可行的(无法保证该框永远足够大以容纳临时文件)。
问题:
我想将文件从一台服务器 (GET) 分块流式传输到另一台 (multipart/form-data POST)。这可能吗?如何实现?
流程:
file_server <-GET- my_script.py -POST-> 上传服务器
这里有一个简单的下载到内存(RAM)选项的例子(但这是违反规则的):
from io import BytesIO
import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder
file_url = 'https://www.sysaid.com/wp-content/uploads/features/itam/image-banner-asset.png'
requested_file_response = requests.get(file_url, stream=True)
TOKEN_PAYLOAD = {
'grant_type': 'password',
'client_id': '#########',
'client_secret': '#########',
'username': '#########',
'password': '#########'
}
def get_token():
response = requests.post(
'https://upload_server/oauth/token',
params=TOKEN_PAYLOAD)
response_data = response.json()
token = response_data.get('access_token')
if not token:
print("token error!")
return token
token = get_token()
file_object = BytesIO()
file_object.write(requested_file_response.content)
# Form conctent
multipart_data = MultipartEncoder(
fields={
'--': (
'test.png',
file_object # AttributeError: 'generator' object has no attribute 'encode' when I try to pass generator here.
),
'id': '2217',
'fileFieldDefId': '4258',
}
)
# Create headers
headers = {
"Authorization": "Bearer {}".format(token),
'Content-Type': multipart_data.content_type
}
session = requests.Session()
response = session.post(
'https://upload_server/multipartUpdate',
headers=headers,
data=multipart_data,
)
答案在文件中,例如为流目的创建对象
非常感谢您的帮助。干杯!
如果我正确阅读 requests_toolbelt 源代码,它不仅需要能够 .read()
文件(我们可以通过传递 requests.get(..., stream=True).raw
获得),而且还需要以某种方式确定流中剩余多少数据。
假设您有信心始终拥有有效的 content-length
header,这就是我建议的解决方案:
import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder
file_url = 'https://www.sysaid.com/wp-content/uploads/features/itam/image-banner-asset.png'
target = 'http://localhost:5000/test'
class PinocchioFile:
"""I wish I was a real file"""
def __init__(self, url):
self.req = requests.get(url, stream=True)
length = self.req.headers.get('content-length')
self.len = None if length is None else int(length)
self._raw = self.req.raw
def read(self, chunk_size):
chunk = self._raw.read(chunk_size) or b''
self.len -= len(chunk)
if not chunk:
self.len = 0
return chunk
multipart_data = MultipartEncoder(
fields={
'--': (
'test.png',
PinocchioFile(file_url),
),
'id': '2217',
'fileFieldDefId': '4258',
}
)
# Create headers
headers = {
'Content-Type': multipart_data.content_type
}
response = requests.post(
target,
data=multipart_data,
headers=headers,
)
是否有机会通过 API 端点上传文件,该端点将 multipart/form-data 作为只有该文件的 URL 的内容类型?
规则: 将整个文件下载到内存中,然后通过此端点上传是不可行的(无法保证该框永远足够大以容纳临时文件)。
问题: 我想将文件从一台服务器 (GET) 分块流式传输到另一台 (multipart/form-data POST)。这可能吗?如何实现?
流程: file_server <-GET- my_script.py -POST-> 上传服务器
这里有一个简单的下载到内存(RAM)选项的例子(但这是违反规则的):
from io import BytesIO
import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder
file_url = 'https://www.sysaid.com/wp-content/uploads/features/itam/image-banner-asset.png'
requested_file_response = requests.get(file_url, stream=True)
TOKEN_PAYLOAD = {
'grant_type': 'password',
'client_id': '#########',
'client_secret': '#########',
'username': '#########',
'password': '#########'
}
def get_token():
response = requests.post(
'https://upload_server/oauth/token',
params=TOKEN_PAYLOAD)
response_data = response.json()
token = response_data.get('access_token')
if not token:
print("token error!")
return token
token = get_token()
file_object = BytesIO()
file_object.write(requested_file_response.content)
# Form conctent
multipart_data = MultipartEncoder(
fields={
'--': (
'test.png',
file_object # AttributeError: 'generator' object has no attribute 'encode' when I try to pass generator here.
),
'id': '2217',
'fileFieldDefId': '4258',
}
)
# Create headers
headers = {
"Authorization": "Bearer {}".format(token),
'Content-Type': multipart_data.content_type
}
session = requests.Session()
response = session.post(
'https://upload_server/multipartUpdate',
headers=headers,
data=multipart_data,
)
答案在文件中,例如为流目的创建对象
非常感谢您的帮助。干杯!
如果我正确阅读 requests_toolbelt 源代码,它不仅需要能够 .read()
文件(我们可以通过传递 requests.get(..., stream=True).raw
获得),而且还需要以某种方式确定流中剩余多少数据。
假设您有信心始终拥有有效的 content-length
header,这就是我建议的解决方案:
import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder
file_url = 'https://www.sysaid.com/wp-content/uploads/features/itam/image-banner-asset.png'
target = 'http://localhost:5000/test'
class PinocchioFile:
"""I wish I was a real file"""
def __init__(self, url):
self.req = requests.get(url, stream=True)
length = self.req.headers.get('content-length')
self.len = None if length is None else int(length)
self._raw = self.req.raw
def read(self, chunk_size):
chunk = self._raw.read(chunk_size) or b''
self.len -= len(chunk)
if not chunk:
self.len = 0
return chunk
multipart_data = MultipartEncoder(
fields={
'--': (
'test.png',
PinocchioFile(file_url),
),
'id': '2217',
'fileFieldDefId': '4258',
}
)
# Create headers
headers = {
'Content-Type': multipart_data.content_type
}
response = requests.post(
target,
data=multipart_data,
headers=headers,
)