在 Python 个数据类上使用哈希作为标识

Using hash for identity on Python dataclasses

我有以下 Filer 实体(在领域驱动设计意义上)。

from dataclasses import dataclass, field

@dataclass
class Address:
    street: str
    city: str
    state: str
    zipcode: str

@dataclass
class Filer:
    cik: int
    name: str = field(hash=False, compare=True)
    state: str = field(hash=False, compare=True)
    yearend: str = field(hash=False, compare=True)
    businessaddress: Address = field(hash=False, compare=True)
    mailingaddress: Address = field(hash=False, compare=True)
    sic: int = field(hash=False, compare=True)
    ein: str = field(hash=False, compare=True, default=None)

对于任何Filercik本身就确定身份。但是,我想使用相等比较来查看有关 Filer 的任何其他细节是否可能发生了变化(例如,与同一 Filer 的先前版本相比)。在此基础上,我在除 cik 之外的所有字段上设置 hash=False, compare=True(默认情况下 hash=True)。

以下测试用例简要概述了预期的行为:

import unittest

class TestFiler(unittest.TestCase):
    
    def test_equality_same_filer(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertEqual(a, b)

    def test_identity_same_filer(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIs(a, b)

    def test_equality_same_filer_new_name(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "A new name for the company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertNotEqual(a, b)

    def test_identity_same_filer_new_name(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "A new name for the company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIs(a, b)

    def test_equality_different_filer_same_details(self,):
        a = Filer(4321, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertNotEqual(a, b)

    def test_identity_different_filer_same_details(self,):
        a = Filer(4321, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIsNot(a, b)

if __name__ == "__main__":
    unittest.main()

结果没有按预期进行。

base) randm@pearljam /home/randm/Projects/secfilings $ /home/randm/Libraries/anaconda3/bin/python /home/randm/Projects/scrap/filer.py
....FF
======================================================================
FAIL: test_identity_same_filer (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/randm/Projects/scrap/filer.py", line 51, in test_identity_same_filer
    self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)

======================================================================
FAIL: test_identity_same_filer_new_name (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/randm/Projects/scrap/filer.py", line 77, in test_identity_same_filer_new_name
    self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='A new name for the company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)

----------------------------------------------------------------------
Ran 6 tests in 0.001s

FAILED (failures=2)

有没有办法让我使用 is 身份测试(无需求助于数据类方法 is_ 或类似的方法,这会改变我在客户端中寻找的简洁语法代码)。还是我只是滥用了标识(我认为它基于 CPython 中的指针值)而应该在我的客户端代码中显式使用哈希相等性?

您没有使用 assertIs。它使用 python 的 is 行为。也就是说,它们必须指的是同一个对象。由于您构建了 2 个不同的对象,因此它们之间的 is 测试将始终为假。 Equals 是对等价性的正确检验。

无法覆盖 Python 的 is 身份检查,因为 . It will always refer to objects being the actual same object. (Kinda works for strings but behaves "unexpectedly" for integers。)

您可以在数据类定义中使用 ,这样您就可以使用 hash(a) == hash(b)。但是如果你想让它感觉更自然,你也可以创建一个方法is_并做a.is_(b)。注意,如果你的类还有其他users/coders,你需要清楚什么时候is_可以为True而==可以为false;和所有其他组合。

@dataclass(unsafe_hash=True)
Filer:
    ...  # everything else the same

那么你的身份测试将基于 hash()

此外,您应该使用测试 setUp for a and b instead of copy-pasting them in every test. Someone who reads your code (like us) still has to check the entire definition of both in each test, to see what's different. And in a month, so will you. For objects which are only slightly different for your tests, use dataclasses.replace()

这是一个更具可读性的单元测试版本,其中添加了基于散列的检查:

import dataclasses
import unittest

class TestFiler(unittest.TestCase):
    def setUp(self):
        self.a = Filer(1234, "Some company", "Some state", "0930",
                       Address("Some address", "Some city", "AB", "12345"),
                       Address("Some address", "Some city", "AB", "12345"),
                       1000, 1234567)
        self.b = Filer(1234, "Some company", "Some state", "0930",
                       Address("Some address", "Some city", "AB", "12345"),
                       Address("Some address", "Some city", "AB", "12345"),
                       1000, 1234567)
    
    def test_equality_same_filer(self):
        self.assertEqual(self.a, self.b)
    
    def test_identity_same_filer(self):  # will still fail
        self.assertIs(self.a, self.b)
    
    def test_equality_same_filer_new_name(self):
        # make it clear that `a` and `c` only differ by name:
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertNotEqual(self.a, c)
    
    def test_identity_same_filer_new_name(self):  # will still fail
        # or put c also in `setUp`
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertIs(self.a, c)
    
    def test_equality_different_filer_same_details(self):
        new_a = dataclasses.replace(self.a, cik=4321)
        self.assertIsNot(new_a, self.a)  # better
    
    def test_identity_different_filer_same_details(self):
        new_a = dataclasses.replace(self.a, cik=4321)
        self.assertIsNot(new_a, self.a)
    
    def test_hash_same_filer(self):  # NEW
        self.assertEqual(hash(self.a), hash(self.b))
    
    def test_hash_same_filer_new_name(self):  # NEW
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertEqual(hash(c), hash(self.a))
    
    def test_identity_different_filer_same_details(self):  # NEW
        diff_a = dataclasses.replace(self.a, cik=4321)
        self.assertNotEqual(hash(diff_a), hash(self.a))


if __name__ == "__main__":
    unittest.main()