Python 比较本地和远程文件的 MD5 哈希值
Python Compare local and remote file MD5 Hash
我正在尝试比较本地和远程文件的 MD5 哈希值(我 copy/paste 在我的 wamp "www" 目录中的同一个文件),但我不明白为什么 "checksums"不对应...
校验和代码如下:
#-*- coding: utf-8 -*-
import hashlib
import requests
def md5Checksum(filePath,url):
if url==None:
with open(filePath, 'rb') as fh:
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
else:
r = requests.get(url, stream=True)
m = hashlib.md5()
for line in r.iter_lines():
m.update(line)
return m.hexdigest()
print "checksum_local :",md5Checksum("projectg715gb.pak",None)
print "checksum_remote :",md5Checksum(None,"http://testpangya.ddns.net/projectg715gb.pak")
我很惊讶得到这个输出:
checksum_local : 9d33806fdebcb91c3d7bfee7cfbe4ad7
checksum_remote : a13aaeb99eb020a0bc8247685c274e7d
"projectg715gb.pak" 的大小是 14.7Mb
但是如果我尝试使用文本文件(大小 1Kb):
print "checksum_local :",md5Checksum("toto.txt",None)
print "checksum_remote :",md5Checksum(None,"http://testpangya.ddns.net/toto.txt")
然后就可以了 oO 我得到了这个输出:
checksum_local : f71dbe52628a3f83a77ab494817525c6
checksum_remote : f71dbe52628a3f83a77ab494817525c6
我是比较 MD5 散列的新手,所以请多多关照 ^^' 我可能犯了一些大错误,我不明白为什么它对大文件不起作用,如果有人可以给我提示,那就太好了!
但是感谢阅读和帮助!
感谢帮助者,这里是最终代码:
#-*- coding: utf-8 -*-
import hashlib
import requests
def md5Checksum(filePath,url):
m = hashlib.md5()
if url==None:
with open(filePath, 'rb') as fh:
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
else:
r = requests.get(url)
for data in r.iter_content(8192):
m.update(data)
return m.hexdigest()
print "checksum_local :",md5Checksum("projectg715gb.pak",None)
print "checksum_remote :",md5Checksum(None,"http://testpangya.ddns.net/projectg715gb.pak")
感谢post解决你的问题https://whosebug.com/users/7495742/framb-axa
对我的问题很有帮助。
我稍微修改了 python3 的 md5 部分和打印语句,并将它们交换为使用 sha256 供我使用,它非常适合我 download/check 本地和远程 sqlite DB 的需要我建立的应用程序。在这里留下代码,以供其他可能偶然发现此问题的人参考。post。
import hashlib
import requests
# current release version url
current_release_url = 'https://somedomain.here/current_release.txt'
current_release_notes_url = 'https://somedomain.here/current_release_notes.txt'
# current database release version url
current_db_release_url = 'https://somedomain.here/current_db_release.txt'
current_db_release_notes_url = 'https://somedomain.here/current_db_release_notes.txt'
current_db_release_notes_hash_url = 'https://somedomain.here/current_db_release_hash.txt'
current_db_release = ''
wizard_db_version = ''
# Default commands DB url
wizard_cmd_db_url = 'https://somedomain.here/sqlite.db'
wizard_cmd_db = 'some/path'
checksum_local = ''
checksum_remote = ''
checksum_remote_hash = ''
checksum_status = ''
def download_cmd_db():
try:
print('Downloading database update version: ' + str(current_db_release))
url = wizard_cmd_db_url
r = requests.get(url)
with open(wizard_cmd_db, 'wb') as f:
f.write(r.content)
# Retrieve HTTP meta-data
print(r.status_code)
# print(r.headers['content-type'])
# print(r.encoding)
settings.setValue('wizard_db_version', current_db_release)
print('Database downloaded to:' + str(wizard_cmd_db))
except:
print('Commands Database download failed.... ;( ')
def sha256_checksum(filepath, url):
m = hashlib.sha256()
if url is None:
with open(filepath, 'rb') as fh:
m = hashlib.sha256()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
else:
r = requests.get(url)
for data in r.iter_content(8192):
m.update(data)
return m.hexdigest()
def wizard_db_hash_check():
global checksum_local, checksum_remote, checksum_status
try:
checksum_local = sha256_checksum(wizard_cmd_db, None)
checksum_remote = sha256_checksum(None, wizard_cmd_db_url)
print("checksum_local : " + checksum_local)
print("checksum_remote: " + checksum_remote)
print("checksum_remote_hash: " + checksum_remote_hash)
if checksum_local == checksum_remote_hash:
print('Hash Check passed')
checksum_status = True
else:
print('Hash Check Failed')
checksum_status = False
except:
print('Could not perform wizard_db_hash_check')
# Sanity check for missing database file
file = pathlib.Path(wizard_cmd_db)
if file.exists():
print("DB File exists: " + wizard_cmd_db)
wizard_db_hash_check()
else:
print("DB File does NOT exist: " + wizard_cmd_db)
download_cmd_db()
wizard_db_hash_check()
# Check hash
# # Logic to decide when to download DB here
try:
if int(current_db_release) > int(wizard_db_version):
print('Database update available: ' + str(current_db_release))
download_cmd_db()
wizard_db_hash_check()
except:
print('Unable to check wizard_db_release')
if checksum_local != checksum_remote:
download_cmd_db()
wizard_db_hash_check()
# Logic to fallback to default packaged DB if no internet to download and compare hash
if checksum_status is True:
target_db = str(wizard_cmd_db)
else:
print('All hash checks and attempts to update commands DB have failed. Switching to bundled DB')
target_db = os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), "sqlite.db")
print('Sanity Checks completed')
好的,看来我找到了解决方案,所以我会 post 在这里:)
首先您需要编辑一个 .htaccess 文件到您的文件在服务器上的目录。
.htaccess 文件的内容:
ContentDigest On
现在您已经设置好了,服务器应该在 HTTP header.
中发送 Content-MD5 数据
它会产生类似这样的结果:
'Content-MD5': '7dVTxeHRktvI0Wh/7/4ZOQ=='
好的,现在让我们看看 Python 部分,所以我修改了代码以便能够比较此 HTTP header 数据和本地 md5 校验和。
#-*- coding: utf-8 -*-
import hashlib
import requests
import base64
def md5Checksum(filePath,url):
m = hashlib.md5()
if url==None:
with open(filePath, u'rb') as fh:
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
#Get BASE 64 Local File md5
return base64.b64encode(m.digest()).decode('ascii')#Encode MD5 digest to BASE 64
else:
#Get BASE 64 Remote File md5
r = requests.head(url) #You read HTTP Header here
return r.headers['Content-MD5'] #Take only Content-MD5 string
def compare():
local = md5Checksum("projectg502th.pak.zip",None)
remote = md5Checksum(None,"http://127.0.0.1/md5/projectg502th.pak.zip")
if local == remote :
print("The soft don't download the file")
else:
print("The soft download the file")
print ("checksum_local :",md5Checksum("projectg_ziinf.pak.zip",None))
print ("checksum_remote : ",md5Checksum(None,"http://127.0.0.1/md5/projectg_ziinf.pak.zip"))
compare()
输出:
checksum_local : 7dVTxeHRktvI0Wh/7/4ZOQ==
checksum_remote : 7dVTxeHRktvI0Wh/7/4ZOQ==
The soft don't download the file
希望对您有所帮助 ;)
我正在尝试比较本地和远程文件的 MD5 哈希值(我 copy/paste 在我的 wamp "www" 目录中的同一个文件),但我不明白为什么 "checksums"不对应...
校验和代码如下:
#-*- coding: utf-8 -*-
import hashlib
import requests
def md5Checksum(filePath,url):
if url==None:
with open(filePath, 'rb') as fh:
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
else:
r = requests.get(url, stream=True)
m = hashlib.md5()
for line in r.iter_lines():
m.update(line)
return m.hexdigest()
print "checksum_local :",md5Checksum("projectg715gb.pak",None)
print "checksum_remote :",md5Checksum(None,"http://testpangya.ddns.net/projectg715gb.pak")
我很惊讶得到这个输出:
checksum_local : 9d33806fdebcb91c3d7bfee7cfbe4ad7
checksum_remote : a13aaeb99eb020a0bc8247685c274e7d
"projectg715gb.pak" 的大小是 14.7Mb
但是如果我尝试使用文本文件(大小 1Kb):
print "checksum_local :",md5Checksum("toto.txt",None)
print "checksum_remote :",md5Checksum(None,"http://testpangya.ddns.net/toto.txt")
然后就可以了 oO 我得到了这个输出:
checksum_local : f71dbe52628a3f83a77ab494817525c6
checksum_remote : f71dbe52628a3f83a77ab494817525c6
我是比较 MD5 散列的新手,所以请多多关照 ^^' 我可能犯了一些大错误,我不明白为什么它对大文件不起作用,如果有人可以给我提示,那就太好了!
但是感谢阅读和帮助!
感谢帮助者,这里是最终代码:
#-*- coding: utf-8 -*-
import hashlib
import requests
def md5Checksum(filePath,url):
m = hashlib.md5()
if url==None:
with open(filePath, 'rb') as fh:
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
else:
r = requests.get(url)
for data in r.iter_content(8192):
m.update(data)
return m.hexdigest()
print "checksum_local :",md5Checksum("projectg715gb.pak",None)
print "checksum_remote :",md5Checksum(None,"http://testpangya.ddns.net/projectg715gb.pak")
感谢post解决你的问题https://whosebug.com/users/7495742/framb-axa
对我的问题很有帮助。
我稍微修改了 python3 的 md5 部分和打印语句,并将它们交换为使用 sha256 供我使用,它非常适合我 download/check 本地和远程 sqlite DB 的需要我建立的应用程序。在这里留下代码,以供其他可能偶然发现此问题的人参考。post。
import hashlib
import requests
# current release version url
current_release_url = 'https://somedomain.here/current_release.txt'
current_release_notes_url = 'https://somedomain.here/current_release_notes.txt'
# current database release version url
current_db_release_url = 'https://somedomain.here/current_db_release.txt'
current_db_release_notes_url = 'https://somedomain.here/current_db_release_notes.txt'
current_db_release_notes_hash_url = 'https://somedomain.here/current_db_release_hash.txt'
current_db_release = ''
wizard_db_version = ''
# Default commands DB url
wizard_cmd_db_url = 'https://somedomain.here/sqlite.db'
wizard_cmd_db = 'some/path'
checksum_local = ''
checksum_remote = ''
checksum_remote_hash = ''
checksum_status = ''
def download_cmd_db():
try:
print('Downloading database update version: ' + str(current_db_release))
url = wizard_cmd_db_url
r = requests.get(url)
with open(wizard_cmd_db, 'wb') as f:
f.write(r.content)
# Retrieve HTTP meta-data
print(r.status_code)
# print(r.headers['content-type'])
# print(r.encoding)
settings.setValue('wizard_db_version', current_db_release)
print('Database downloaded to:' + str(wizard_cmd_db))
except:
print('Commands Database download failed.... ;( ')
def sha256_checksum(filepath, url):
m = hashlib.sha256()
if url is None:
with open(filepath, 'rb') as fh:
m = hashlib.sha256()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
else:
r = requests.get(url)
for data in r.iter_content(8192):
m.update(data)
return m.hexdigest()
def wizard_db_hash_check():
global checksum_local, checksum_remote, checksum_status
try:
checksum_local = sha256_checksum(wizard_cmd_db, None)
checksum_remote = sha256_checksum(None, wizard_cmd_db_url)
print("checksum_local : " + checksum_local)
print("checksum_remote: " + checksum_remote)
print("checksum_remote_hash: " + checksum_remote_hash)
if checksum_local == checksum_remote_hash:
print('Hash Check passed')
checksum_status = True
else:
print('Hash Check Failed')
checksum_status = False
except:
print('Could not perform wizard_db_hash_check')
# Sanity check for missing database file
file = pathlib.Path(wizard_cmd_db)
if file.exists():
print("DB File exists: " + wizard_cmd_db)
wizard_db_hash_check()
else:
print("DB File does NOT exist: " + wizard_cmd_db)
download_cmd_db()
wizard_db_hash_check()
# Check hash
# # Logic to decide when to download DB here
try:
if int(current_db_release) > int(wizard_db_version):
print('Database update available: ' + str(current_db_release))
download_cmd_db()
wizard_db_hash_check()
except:
print('Unable to check wizard_db_release')
if checksum_local != checksum_remote:
download_cmd_db()
wizard_db_hash_check()
# Logic to fallback to default packaged DB if no internet to download and compare hash
if checksum_status is True:
target_db = str(wizard_cmd_db)
else:
print('All hash checks and attempts to update commands DB have failed. Switching to bundled DB')
target_db = os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), "sqlite.db")
print('Sanity Checks completed')
好的,看来我找到了解决方案,所以我会 post 在这里:)
首先您需要编辑一个 .htaccess 文件到您的文件在服务器上的目录。
.htaccess 文件的内容:
ContentDigest On
现在您已经设置好了,服务器应该在 HTTP header.
中发送 Content-MD5 数据它会产生类似这样的结果:
'Content-MD5': '7dVTxeHRktvI0Wh/7/4ZOQ=='
好的,现在让我们看看 Python 部分,所以我修改了代码以便能够比较此 HTTP header 数据和本地 md5 校验和。
#-*- coding: utf-8 -*-
import hashlib
import requests
import base64
def md5Checksum(filePath,url):
m = hashlib.md5()
if url==None:
with open(filePath, u'rb') as fh:
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
#Get BASE 64 Local File md5
return base64.b64encode(m.digest()).decode('ascii')#Encode MD5 digest to BASE 64
else:
#Get BASE 64 Remote File md5
r = requests.head(url) #You read HTTP Header here
return r.headers['Content-MD5'] #Take only Content-MD5 string
def compare():
local = md5Checksum("projectg502th.pak.zip",None)
remote = md5Checksum(None,"http://127.0.0.1/md5/projectg502th.pak.zip")
if local == remote :
print("The soft don't download the file")
else:
print("The soft download the file")
print ("checksum_local :",md5Checksum("projectg_ziinf.pak.zip",None))
print ("checksum_remote : ",md5Checksum(None,"http://127.0.0.1/md5/projectg_ziinf.pak.zip"))
compare()
输出:
checksum_local : 7dVTxeHRktvI0Wh/7/4ZOQ==
checksum_remote : 7dVTxeHRktvI0Wh/7/4ZOQ==
The soft don't download the file
希望对您有所帮助 ;)