csv 文件列的 AES 加密:解密不起作用
AES encryption for csv file columns: Decryption is not working
我正在尝试编写一个 AES 脚本,该脚本采用一些列的名称并进行加密,然后生成一个包含加密列的新 csv 文件。
该脚本还可以使用密码和加密的 csv 文件对其进行解密。
代码的解密部分无效。
我试过编码和解码。并检查逻辑错误。
import pandas as pd
import sys
import base64
from Crypto.Cipher import AES
from Crypto import Random
from Crypto.Protocol.KDF import PBKDF2
from Crypto.Util.Padding import pad, unpad
import os
import time
import getpass
'''
# AES 256 encryption/decryption using pycrypto library
Credits: https://www.quickprogrammingtips.com/python/aes-256-encryption-and-decryption-in-python.html
'''
BLOCK_SIZE = 16
def get_private_key_salt(password, salt=None):
if salt == None:
salt = os.urandom(16)
kdf = PBKDF2(password, salt, 64, 100000)
key = kdf[:32]
return [key,salt]
def encrypt(raw, private_key):
iv = os.urandom(AES.block_size)
cipher = AES.new(private_key, AES.MODE_CBC, iv)
return base64.b64encode(iv + cipher.encrypt(pad(raw.encode('utf-8'),BLOCK_SIZE)))
def decrypt(enc, private_key):
enc = base64.b64decode(enc)
iv = enc[:16]
cipher = AES.new(private_key, AES.MODE_CBC, iv)
#line56
return unpad(cipher.decrypt(enc[16:]),BLOCK_SIZE)
def encrypt_cols(encryption_dict,password):
encrypted_dict = {}
encrypted_dict['salt'] = []
for dict_key in encryption_dict.keys():
encrypted_dict[dict_key] = []
for index in range(0,len(encryption_dict[dict_key])):
try:
key_salt_pair = get_private_key_salt(password,encrypted_dict['salt'][index])
except IndexError:
key_salt_pair = get_private_key_salt(password)
encrypted_dict['salt'].append(key_salt_pair[1])
cipher_text = encrypt(str(encryption_dict[dict_key][index]), key_salt_pair[0])
encrypted_dict[dict_key].append(cipher_text.decode("utf-8"))
return encrypted_dict
def decrypt_cols(encrypted_dict,password):
decrypted_dict = {}
decrypted_dict['salt'] = encrypted_dict['salt']
for dict_key in encrypted_dict.keys():
decrypted_dict[dict_key] = []
if dict_key != 'salt':
for index in range(0,len(encrypted_dict[dict_key])):
key_salt_pair = get_private_key_salt(password,encrypted_dict['salt'][index])
cipher_text = encrypted_dict[dict_key][index].encode('utf-8')
#line88
plain_text = decrypt(cipher_text, key_salt_pair[0]).decode('utf-8')
decrypted_dict[dict_key].append(plain_text)
return decrypted_dict
def prep_encryption_cols(encrypt_list):
encryption_dict = {}
for col_name in encrypt_list:
try:
encryption_dict[col_name]=df[col_name]
except KeyError:
print('No column with name \''+col_name+'\' Found. De-identification Failed!')
sys.exit()
return encryption_dict
def encryption(df, encrypt_list,password):
encryption_dict = prep_encryption_cols(encrypt_list)
encrypted_dict = encrypt_cols(encryption_dict,password)
for key in encrypted_dict.keys():
df[key] = encrypted_dict[key]
def decryption(df, decrypt_list, password):
encrypted_dict = {}
for col in decrypt_list:
encrypted_dict[col] = df[col]
encrypted_dict['salt'] = df['salt']
decrypted_dict = decrypt_cols(encrypted_dict,password)
del df['salt']
for key in decrypted_dict.keys():
if key != 'salt':
df[key] = decrypted_dict[key]
password = getpass.getpass(prompt='Type Password Here:')
'''
ENCRYPTION
'''
start = time.time()
#read csv file
data = pd.read_csv('test.csv')
#convert it into dataframe
df = pd.DataFrame(data)
encrypt_list = ['FIELD2', 'FIELD3']
del_list = ['FIELD4','FIELD5']
encryption(df, encrypt_list, password)
df.to_csv('encrypted_test.csv')
end = time.time()
print('It took '+str(end - start)+' seconds to encrypt')
'''
DECRYPTION
'''
start = time.time()
data = pd.read_csv('encrypted_test.csv')
df = pd.DataFrame(data)
decrypt_list = ['FIELD2', 'FIELD3']
decryption(df, decrypt_list, password)
df.to_csv('decrypted_test.csv')
end = time.time()
print('It took '+str(end - start)+' seconds to decrypt')
当我将加密数据存储在 csv 文件中然后读取它进行解密时出现此错误
C:\Users\user\Desktop\>python3 clean.py
It took 0.06599974632263184 seconds to encrypt
Traceback (most recent call last):
File "clean.py", line 167, in <module>
decryption(df, decrypt_list, password)
File "clean.py", line 128, in decryption
decrypted_dict = decrypt_cols(encrypted_dict,password)
File "clean.py", line 88, in decrypt_cols
plain_text = decrypt(cipher_text, key_salt_pair[0]).decode('utf-8')
File "clean.py", line 56, in decrypt
return unpad(cipher.decrypt(enc[16:]),BLOCK_SIZE)
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\Crypto\Util\Padding.py", line 90, in unpad
raise ValueError("Padding is incorrect.")
ValueError: Padding is incorrect.
如果我直接使用数据帧进行解密而不将其存储在csv文件中,程序运行良好。
感谢您的宝贵时间。
加密时,盐在 csv 文件中存储为 字节 文字。解密时,salt 被解释为 string 文字,这实际上意味着使用了不同的 salt,因此使用了不同的密钥。错误的解密也会导致无效的填充,从而导致错误消息 Padding is incorrect。
通过比较encrypt_cols
中的key_salt_pair[1]
和[=中的(对应的)encrypted_dict['salt'][index]
的值和类型,可以用调试器轻松检查加密和解密过程中的不同盐值14=].
原始盐的确定如果以十六进制字符串形式存储是最简单的,可以在get_private_key_salt
:
中实现
def get_private_key_salt(password, salt=None):
if salt == None:
saltBin = os.urandom(16)
salt = saltBin.hex()
else:
saltBin = bytes.fromhex(salt)
kdf = PBKDF2(password, saltBin, 64, 100000)
key = kdf[:32]
return [key,salt]
或者,也可以在 encrypt_cols
和 decrypt_cols
中进行更改。
另一点是:在当前代码中,csv 文件仍然具有 DataFrame
列。这可以通过将 pandas#DataFrame#to_csv
中的索引参数设置为 False
、here 来删除。
我正在尝试编写一个 AES 脚本,该脚本采用一些列的名称并进行加密,然后生成一个包含加密列的新 csv 文件。
该脚本还可以使用密码和加密的 csv 文件对其进行解密。
代码的解密部分无效。
我试过编码和解码。并检查逻辑错误。
import pandas as pd
import sys
import base64
from Crypto.Cipher import AES
from Crypto import Random
from Crypto.Protocol.KDF import PBKDF2
from Crypto.Util.Padding import pad, unpad
import os
import time
import getpass
'''
# AES 256 encryption/decryption using pycrypto library
Credits: https://www.quickprogrammingtips.com/python/aes-256-encryption-and-decryption-in-python.html
'''
BLOCK_SIZE = 16
def get_private_key_salt(password, salt=None):
if salt == None:
salt = os.urandom(16)
kdf = PBKDF2(password, salt, 64, 100000)
key = kdf[:32]
return [key,salt]
def encrypt(raw, private_key):
iv = os.urandom(AES.block_size)
cipher = AES.new(private_key, AES.MODE_CBC, iv)
return base64.b64encode(iv + cipher.encrypt(pad(raw.encode('utf-8'),BLOCK_SIZE)))
def decrypt(enc, private_key):
enc = base64.b64decode(enc)
iv = enc[:16]
cipher = AES.new(private_key, AES.MODE_CBC, iv)
#line56
return unpad(cipher.decrypt(enc[16:]),BLOCK_SIZE)
def encrypt_cols(encryption_dict,password):
encrypted_dict = {}
encrypted_dict['salt'] = []
for dict_key in encryption_dict.keys():
encrypted_dict[dict_key] = []
for index in range(0,len(encryption_dict[dict_key])):
try:
key_salt_pair = get_private_key_salt(password,encrypted_dict['salt'][index])
except IndexError:
key_salt_pair = get_private_key_salt(password)
encrypted_dict['salt'].append(key_salt_pair[1])
cipher_text = encrypt(str(encryption_dict[dict_key][index]), key_salt_pair[0])
encrypted_dict[dict_key].append(cipher_text.decode("utf-8"))
return encrypted_dict
def decrypt_cols(encrypted_dict,password):
decrypted_dict = {}
decrypted_dict['salt'] = encrypted_dict['salt']
for dict_key in encrypted_dict.keys():
decrypted_dict[dict_key] = []
if dict_key != 'salt':
for index in range(0,len(encrypted_dict[dict_key])):
key_salt_pair = get_private_key_salt(password,encrypted_dict['salt'][index])
cipher_text = encrypted_dict[dict_key][index].encode('utf-8')
#line88
plain_text = decrypt(cipher_text, key_salt_pair[0]).decode('utf-8')
decrypted_dict[dict_key].append(plain_text)
return decrypted_dict
def prep_encryption_cols(encrypt_list):
encryption_dict = {}
for col_name in encrypt_list:
try:
encryption_dict[col_name]=df[col_name]
except KeyError:
print('No column with name \''+col_name+'\' Found. De-identification Failed!')
sys.exit()
return encryption_dict
def encryption(df, encrypt_list,password):
encryption_dict = prep_encryption_cols(encrypt_list)
encrypted_dict = encrypt_cols(encryption_dict,password)
for key in encrypted_dict.keys():
df[key] = encrypted_dict[key]
def decryption(df, decrypt_list, password):
encrypted_dict = {}
for col in decrypt_list:
encrypted_dict[col] = df[col]
encrypted_dict['salt'] = df['salt']
decrypted_dict = decrypt_cols(encrypted_dict,password)
del df['salt']
for key in decrypted_dict.keys():
if key != 'salt':
df[key] = decrypted_dict[key]
password = getpass.getpass(prompt='Type Password Here:')
'''
ENCRYPTION
'''
start = time.time()
#read csv file
data = pd.read_csv('test.csv')
#convert it into dataframe
df = pd.DataFrame(data)
encrypt_list = ['FIELD2', 'FIELD3']
del_list = ['FIELD4','FIELD5']
encryption(df, encrypt_list, password)
df.to_csv('encrypted_test.csv')
end = time.time()
print('It took '+str(end - start)+' seconds to encrypt')
'''
DECRYPTION
'''
start = time.time()
data = pd.read_csv('encrypted_test.csv')
df = pd.DataFrame(data)
decrypt_list = ['FIELD2', 'FIELD3']
decryption(df, decrypt_list, password)
df.to_csv('decrypted_test.csv')
end = time.time()
print('It took '+str(end - start)+' seconds to decrypt')
当我将加密数据存储在 csv 文件中然后读取它进行解密时出现此错误
C:\Users\user\Desktop\>python3 clean.py
It took 0.06599974632263184 seconds to encrypt
Traceback (most recent call last):
File "clean.py", line 167, in <module>
decryption(df, decrypt_list, password)
File "clean.py", line 128, in decryption
decrypted_dict = decrypt_cols(encrypted_dict,password)
File "clean.py", line 88, in decrypt_cols
plain_text = decrypt(cipher_text, key_salt_pair[0]).decode('utf-8')
File "clean.py", line 56, in decrypt
return unpad(cipher.decrypt(enc[16:]),BLOCK_SIZE)
File "C:\Users\user\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\Crypto\Util\Padding.py", line 90, in unpad
raise ValueError("Padding is incorrect.")
ValueError: Padding is incorrect.
如果我直接使用数据帧进行解密而不将其存储在csv文件中,程序运行良好。
感谢您的宝贵时间。
加密时,盐在 csv 文件中存储为 字节 文字。解密时,salt 被解释为 string 文字,这实际上意味着使用了不同的 salt,因此使用了不同的密钥。错误的解密也会导致无效的填充,从而导致错误消息 Padding is incorrect。
通过比较encrypt_cols
中的key_salt_pair[1]
和[=中的(对应的)encrypted_dict['salt'][index]
的值和类型,可以用调试器轻松检查加密和解密过程中的不同盐值14=].
原始盐的确定如果以十六进制字符串形式存储是最简单的,可以在get_private_key_salt
:
def get_private_key_salt(password, salt=None):
if salt == None:
saltBin = os.urandom(16)
salt = saltBin.hex()
else:
saltBin = bytes.fromhex(salt)
kdf = PBKDF2(password, saltBin, 64, 100000)
key = kdf[:32]
return [key,salt]
或者,也可以在 encrypt_cols
和 decrypt_cols
中进行更改。
另一点是:在当前代码中,csv 文件仍然具有 DataFrame
列。这可以通过将 pandas#DataFrame#to_csv
中的索引参数设置为 False
、here 来删除。