在 Python 个数据类上使用哈希作为标识
Using hash for identity on Python dataclasses
我有以下 Filer
实体(在领域驱动设计意义上)。
from dataclasses import dataclass, field
@dataclass
class Address:
street: str
city: str
state: str
zipcode: str
@dataclass
class Filer:
cik: int
name: str = field(hash=False, compare=True)
state: str = field(hash=False, compare=True)
yearend: str = field(hash=False, compare=True)
businessaddress: Address = field(hash=False, compare=True)
mailingaddress: Address = field(hash=False, compare=True)
sic: int = field(hash=False, compare=True)
ein: str = field(hash=False, compare=True, default=None)
对于任何Filer
,cik
本身就确定身份。但是,我想使用相等比较来查看有关 Filer
的任何其他细节是否可能发生了变化(例如,与同一 Filer
的先前版本相比)。在此基础上,我在除 cik
之外的所有字段上设置 hash=False, compare=True
(默认情况下 hash=True
)。
以下测试用例简要概述了预期的行为:
- 身份:完全由
cik
确定并通过 AssertIs
和 AssertIsNot
测试
- 相等性:由所有字段确定并通过
AssertEqual
和 AssertNotEqual
进行测试
import unittest
class TestFiler(unittest.TestCase):
def test_equality_same_filer(self,):
a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertEqual(a, b)
def test_identity_same_filer(self,):
a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertIs(a, b)
def test_equality_same_filer_new_name(self,):
a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "A new name for the company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertNotEqual(a, b)
def test_identity_same_filer_new_name(self,):
a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "A new name for the company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertIs(a, b)
def test_equality_different_filer_same_details(self,):
a = Filer(4321, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertNotEqual(a, b)
def test_identity_different_filer_same_details(self,):
a = Filer(4321, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertIsNot(a, b)
if __name__ == "__main__":
unittest.main()
结果没有按预期进行。
base) randm@pearljam /home/randm/Projects/secfilings $ /home/randm/Libraries/anaconda3/bin/python /home/randm/Projects/scrap/filer.py
....FF
======================================================================
FAIL: test_identity_same_filer (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/randm/Projects/scrap/filer.py", line 51, in test_identity_same_filer
self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)
======================================================================
FAIL: test_identity_same_filer_new_name (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/randm/Projects/scrap/filer.py", line 77, in test_identity_same_filer_new_name
self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='A new name for the company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)
----------------------------------------------------------------------
Ran 6 tests in 0.001s
FAILED (failures=2)
有没有办法让我使用 is
身份测试(无需求助于数据类方法 is_
或类似的方法,这会改变我在客户端中寻找的简洁语法代码)。还是我只是滥用了标识(我认为它基于 CPython 中的指针值)而应该在我的客户端代码中显式使用哈希相等性?
您没有使用 assertIs。它使用 python 的 is 行为。也就是说,它们必须指的是同一个对象。由于您构建了 2 个不同的对象,因此它们之间的 is 测试将始终为假。 Equals 是对等价性的正确检验。
无法覆盖 Python 的 is
身份检查,因为 . It will always refer to objects being the actual same object. (Kinda works for strings but behaves "unexpectedly" for integers。)
您可以在数据类定义中使用 ,这样您就可以使用 hash(a) == hash(b)
。但是如果你想让它感觉更自然,你也可以创建一个方法is_
并做a.is_(b)
。注意,如果你的类还有其他users/coders,你需要清楚什么时候is_
可以为True而==
可以为false;和所有其他组合。
@dataclass(unsafe_hash=True)
Filer:
... # everything else the same
那么你的身份测试将基于 hash()
。
此外,您应该使用测试 setUp
for a
and b
instead of copy-pasting them in every test. Someone who reads your code (like us) still has to check the entire definition of both in each test, to see what's different. And in a month, so will you. For objects which are only slightly different for your tests, use dataclasses.replace()
。
这是一个更具可读性的单元测试版本,其中添加了基于散列的检查:
import dataclasses
import unittest
class TestFiler(unittest.TestCase):
def setUp(self):
self.a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000, 1234567)
self.b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000, 1234567)
def test_equality_same_filer(self):
self.assertEqual(self.a, self.b)
def test_identity_same_filer(self): # will still fail
self.assertIs(self.a, self.b)
def test_equality_same_filer_new_name(self):
# make it clear that `a` and `c` only differ by name:
c = dataclasses.replace(self.a, name="A new name for the company")
self.assertNotEqual(self.a, c)
def test_identity_same_filer_new_name(self): # will still fail
# or put c also in `setUp`
c = dataclasses.replace(self.a, name="A new name for the company")
self.assertIs(self.a, c)
def test_equality_different_filer_same_details(self):
new_a = dataclasses.replace(self.a, cik=4321)
self.assertIsNot(new_a, self.a) # better
def test_identity_different_filer_same_details(self):
new_a = dataclasses.replace(self.a, cik=4321)
self.assertIsNot(new_a, self.a)
def test_hash_same_filer(self): # NEW
self.assertEqual(hash(self.a), hash(self.b))
def test_hash_same_filer_new_name(self): # NEW
c = dataclasses.replace(self.a, name="A new name for the company")
self.assertEqual(hash(c), hash(self.a))
def test_identity_different_filer_same_details(self): # NEW
diff_a = dataclasses.replace(self.a, cik=4321)
self.assertNotEqual(hash(diff_a), hash(self.a))
if __name__ == "__main__":
unittest.main()
我有以下 Filer
实体(在领域驱动设计意义上)。
from dataclasses import dataclass, field
@dataclass
class Address:
street: str
city: str
state: str
zipcode: str
@dataclass
class Filer:
cik: int
name: str = field(hash=False, compare=True)
state: str = field(hash=False, compare=True)
yearend: str = field(hash=False, compare=True)
businessaddress: Address = field(hash=False, compare=True)
mailingaddress: Address = field(hash=False, compare=True)
sic: int = field(hash=False, compare=True)
ein: str = field(hash=False, compare=True, default=None)
对于任何Filer
,cik
本身就确定身份。但是,我想使用相等比较来查看有关 Filer
的任何其他细节是否可能发生了变化(例如,与同一 Filer
的先前版本相比)。在此基础上,我在除 cik
之外的所有字段上设置 hash=False, compare=True
(默认情况下 hash=True
)。
以下测试用例简要概述了预期的行为:
- 身份:完全由
cik
确定并通过AssertIs
和AssertIsNot
测试
- 相等性:由所有字段确定并通过
AssertEqual
和AssertNotEqual
进行测试
import unittest
class TestFiler(unittest.TestCase):
def test_equality_same_filer(self,):
a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertEqual(a, b)
def test_identity_same_filer(self,):
a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertIs(a, b)
def test_equality_same_filer_new_name(self,):
a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "A new name for the company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertNotEqual(a, b)
def test_identity_same_filer_new_name(self,):
a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "A new name for the company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertIs(a, b)
def test_equality_different_filer_same_details(self,):
a = Filer(4321, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertNotEqual(a, b)
def test_identity_different_filer_same_details(self,):
a = Filer(4321, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000,
1234567)
self.assertIsNot(a, b)
if __name__ == "__main__":
unittest.main()
结果没有按预期进行。
base) randm@pearljam /home/randm/Projects/secfilings $ /home/randm/Libraries/anaconda3/bin/python /home/randm/Projects/scrap/filer.py
....FF
======================================================================
FAIL: test_identity_same_filer (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/randm/Projects/scrap/filer.py", line 51, in test_identity_same_filer
self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)
======================================================================
FAIL: test_identity_same_filer_new_name (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/randm/Projects/scrap/filer.py", line 77, in test_identity_same_filer_new_name
self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='A new name for the company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)
----------------------------------------------------------------------
Ran 6 tests in 0.001s
FAILED (failures=2)
有没有办法让我使用 is
身份测试(无需求助于数据类方法 is_
或类似的方法,这会改变我在客户端中寻找的简洁语法代码)。还是我只是滥用了标识(我认为它基于 CPython 中的指针值)而应该在我的客户端代码中显式使用哈希相等性?
您没有使用 assertIs。它使用 python 的 is 行为。也就是说,它们必须指的是同一个对象。由于您构建了 2 个不同的对象,因此它们之间的 is 测试将始终为假。 Equals 是对等价性的正确检验。
无法覆盖 Python 的 is
身份检查,因为
您可以在数据类定义中使用 hash(a) == hash(b)
。但是如果你想让它感觉更自然,你也可以创建一个方法is_
并做a.is_(b)
。注意,如果你的类还有其他users/coders,你需要清楚什么时候is_
可以为True而==
可以为false;和所有其他组合。
@dataclass(unsafe_hash=True)
Filer:
... # everything else the same
那么你的身份测试将基于 hash()
。
此外,您应该使用测试 setUp
for a
and b
instead of copy-pasting them in every test. Someone who reads your code (like us) still has to check the entire definition of both in each test, to see what's different. And in a month, so will you. For objects which are only slightly different for your tests, use dataclasses.replace()
。
这是一个更具可读性的单元测试版本,其中添加了基于散列的检查:
import dataclasses
import unittest
class TestFiler(unittest.TestCase):
def setUp(self):
self.a = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000, 1234567)
self.b = Filer(1234, "Some company", "Some state", "0930",
Address("Some address", "Some city", "AB", "12345"),
Address("Some address", "Some city", "AB", "12345"),
1000, 1234567)
def test_equality_same_filer(self):
self.assertEqual(self.a, self.b)
def test_identity_same_filer(self): # will still fail
self.assertIs(self.a, self.b)
def test_equality_same_filer_new_name(self):
# make it clear that `a` and `c` only differ by name:
c = dataclasses.replace(self.a, name="A new name for the company")
self.assertNotEqual(self.a, c)
def test_identity_same_filer_new_name(self): # will still fail
# or put c also in `setUp`
c = dataclasses.replace(self.a, name="A new name for the company")
self.assertIs(self.a, c)
def test_equality_different_filer_same_details(self):
new_a = dataclasses.replace(self.a, cik=4321)
self.assertIsNot(new_a, self.a) # better
def test_identity_different_filer_same_details(self):
new_a = dataclasses.replace(self.a, cik=4321)
self.assertIsNot(new_a, self.a)
def test_hash_same_filer(self): # NEW
self.assertEqual(hash(self.a), hash(self.b))
def test_hash_same_filer_new_name(self): # NEW
c = dataclasses.replace(self.a, name="A new name for the company")
self.assertEqual(hash(c), hash(self.a))
def test_identity_different_filer_same_details(self): # NEW
diff_a = dataclasses.replace(self.a, cik=4321)
self.assertNotEqual(hash(diff_a), hash(self.a))
if __name__ == "__main__":
unittest.main()