如何验证 Python 中字典的结构(或模式)?
How to validate structure (or schema) of dictionary in Python?
我有一本包含配置信息的字典:
my_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
我想检查字典是否符合我需要的结构。
我正在寻找这样的东西:
conf_structure = {
'version': int,
'info': {
'conf_one': float,
'conf_two': str,
'conf_three': bool
}
}
is_ok = check_structure(conf_structure, my_conf)
是否有针对此问题的任何解决方案或任何可以使实施 check_structure
更容易的库?
您可以使用 schema
(PyPi Link)
schema is a library for validating Python data structures, such as those obtained from config-files, forms, external services or command-line parsing, converted from JSON/YAML (or something else) to Python data-types.
from schema import Schema, And, Use, Optional, SchemaError
def check(conf_schema, conf):
try:
conf_schema.validate(conf)
return True
except SchemaError:
return False
conf_schema = Schema({
'version': And(Use(int)),
'info': {
'conf_one': And(Use(float)),
'conf_two': And(Use(str)),
'conf_three': And(Use(bool)),
Optional('optional_conf'): And(Use(str))
}
})
conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
print(check(conf_schema, conf))
不使用库,你也可以像这样定义一个简单的递归函数:
def check_structure(struct, conf):
if isinstance(struct, dict) and isinstance(conf, dict):
# struct is a dict of types or other dicts
return all(k in conf and check_structure(struct[k], conf[k]) for k in struct)
if isinstance(struct, list) and isinstance(conf, list):
# struct is list in the form [type or dict]
return all(check_structure(struct[0], c) for c in conf)
elif isinstance(struct, type):
# struct is the type of conf
return isinstance(conf, struct)
else:
# struct is neither a dict, nor list, not type
return False
这假定配置可以具有不在您的结构中的键,如您的示例中所示。
更新:新版本也支持列表,例如喜欢 'foo': [{'bar': int}]
@tobias_k 打败了我(可能在时间和质量上都如此),但这是另一个任务的递归函数,您(和我)可能更容易理解:
def check_dict(my_dict, check_against):
for k, v in check_against.items():
if isinstance(v, dict):
return check_dict(my_dict[k], v)
else:
if not isinstance(my_dict[k], v):
return False
return True
您可以使用递归构建结构:
def get_type(value):
if isinstance(value, dict):
return {key: get_type(value[key]) for key in value}
else:
return str(type(value))
然后将所需的结构与您的字典进行比较:
get_type(current_conf) == get_type(required_conf)
示例:
required_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
get_type(required_conf)
{'info': {'conf_two': "<type 'str'>", 'conf_one': "<type 'float'>", 'optional_conf': "<type 'str'>", 'conf_three': "<type 'bool'>"}, 'version': "<type 'int'>"}
字典的本质是,如果它们在 python 中使用而不是作为某些 JSON 导出,则不需要设置字典的顺序。相反,查找键 returns 值(因此是字典)。
在任何一种情况下,这些函数都应该为您提供您所寻找的样本中存在的嵌套级别。
#assuming identical order of keys is required
def check_structure(conf_structure,my_conf):
if my_conf.keys() != conf_structure.keys():
return False
for key in my_conf.keys():
if type(my_conf[key]) == dict:
if my_conf[key].keys() != conf_structure[key].keys():
return False
return True
#assuming identical order of keys is not required
def check_structure(conf_structure,my_conf):
if sorted(my_conf.keys()) != sorted(conf_structure.keys()):
return False
for key in my_conf.keys():
if type(my_conf[key]) != dict:
return False
else:
if sorted(my_conf[key].keys()) != sorted(conf_structure[key].keys()):
return False
return True
如果嵌套级别更高,则显然需要更改此解决方案(即它被配置为评估具有某些值的字典结构的相似性还有字典)。
看起来 dict-schema-validator 软件包完全符合您的需要:
这是一个表示客户的简单模式:
{
"_id": "ObjectId",
"created": "date",
"is_active": "bool",
"fullname": "string",
"age": ["int", "null"],
"contact": {
"phone": "string",
"email": "string"
},
"cards": [{
"type": "string",
"expires": "date"
}]
}
验证:
from datetime import datetime
import json
from dict_schema_validator import validator
with open('models/customer.json', 'r') as j:
schema = json.loads(j.read())
customer = {
"_id": 123,
"created": datetime.now(),
"is_active": True,
"fullname": "Jorge York",
"age": 32,
"contact": {
"phone": "559-940-1435",
"email": "york@example.com",
"skype": "j.york123"
},
"cards": [
{"type": "visa", "expires": "12/2029"},
{"type": "visa"},
]
}
errors = validator.validate(schema, customer)
for err in errors:
print(err['msg'])
输出:
[*] "_id" has wrong type. Expected: "ObjectId", found: "int"
[+] Extra field: "contact.skype" having type: "str"
[*] "cards[0].expires" has wrong type. Expected: "date", found: "str"
[-] Missing field: "cards[1].expires"
您还可以使用 dataclasses_json 库。这是我通常的做法
from dataclasses import dataclass
from dataclasses_json import dataclass_json, Undefined
from dataclasses_json.undefined import UndefinedParameterError
from typing import Optional
#### define schema #######
@dataclass_json(undefined=Undefined.RAISE)
@dataclass
class Info:
conf_one: float
# conf_two: str
conf_three: bool
optional_conf: Optional[str]
@dataclass_json
@dataclass
class ConfStructure:
version: int
info: Info
####### test for compliance####
try:
ConfStructure.from_dict(my_conf).to_dict()
except KeyError as e:
print('theres a missing parameter')
except UndefinedParameterError as e:
print('extra parameters')
您可以使用 https://pypi.org/project/dictify/ 中的 dictify
。
在此处阅读文档 https://dictify.readthedocs.io/en/latest/index.html
这是可以做到的。
from dictify import Field, Model
class Info(Model):
conf_one = Field(required=True).instance(float)
conf_two = Field(required=True).instance(str)
conf_three = Field(required=True).instance(bool)
optional_conf = Field().instance(str)
class MyConf(Model):
version = Field(required=True).instance(int)
info = Field().model(Info)
my_conf = MyConf() # Invalid without required fields
# Valid
my_conf = MyConf({
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
})
my_conf['info']['conf_one'] = 'hi' # Invalid, won't be assinged
对未来的建议:使用 Pydantic!
Pydantic 在运行时强制执行类型提示,并在数据无效时提供用户友好的错误。定义数据应该如何以纯粹、规范的方式呈现 python;用 pydantic 验证它,就这么简单:
from pydantic import BaseModel
class Info(BaseModel):
conf_one: float
conf_two: str
conf_three: bool
class Config:
extra = 'forbid'
class ConfStructure(BaseModel):
version: int
info: Info
如果验证失败,pydantic 将引发错误并详细说明错误:
my_conf_wrong = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
my_conf_right = {
'version': 10,
'info': {
'conf_one': 14.5,
'conf_two': 'something',
'conf_three': False
}
}
model = ConfStructure(**my_conf_right)
print(model.dict())
# {'version': 10, 'info': {'conf_one': 14.5, 'conf_two': 'something', 'conf_three': False}}
res = ConfStructure(**my_conf_wrong)
# pydantic.error_wrappers.ValidationError: 1 validation error for ConfStructure
# info -> optional_conf
# extra fields not permitted (type=value_error.extra)
有一个用于验证 JSON 文件的标准,称为 JSON Schema。
验证器已经在我熟悉的 many languages, including the Python. Read also the documentation for more details. In the following example I will use a Python package jsonschema
(docs) 中实现。
给定配置数据
my_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar',
},
}
和相应的配置架构
conf_structure = {
'type': 'object',
'properties': {
'version': {'type': 'integer'},
'info': {
'type': 'object',
'properties': {
'conf_one': {'type': 'number'},
'conf_two': {'type': 'string'},
'conf_three': {'type': 'boolean'},
'optional_conf': {'type': 'string'},
},
'required': ['conf_one', 'conf_two', 'conf_three'],
},
},
}
验证此数据的实际代码就这么简单:
import jsonschema
jsonschema.validate(my_conf, schema=conf_structure)
这种方法的一大优势是您可以将数据和模式存储为 JSON 格式的文件。
我有一本包含配置信息的字典:
my_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
我想检查字典是否符合我需要的结构。
我正在寻找这样的东西:
conf_structure = {
'version': int,
'info': {
'conf_one': float,
'conf_two': str,
'conf_three': bool
}
}
is_ok = check_structure(conf_structure, my_conf)
是否有针对此问题的任何解决方案或任何可以使实施 check_structure
更容易的库?
您可以使用 schema
(PyPi Link)
schema is a library for validating Python data structures, such as those obtained from config-files, forms, external services or command-line parsing, converted from JSON/YAML (or something else) to Python data-types.
from schema import Schema, And, Use, Optional, SchemaError
def check(conf_schema, conf):
try:
conf_schema.validate(conf)
return True
except SchemaError:
return False
conf_schema = Schema({
'version': And(Use(int)),
'info': {
'conf_one': And(Use(float)),
'conf_two': And(Use(str)),
'conf_three': And(Use(bool)),
Optional('optional_conf'): And(Use(str))
}
})
conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
print(check(conf_schema, conf))
不使用库,你也可以像这样定义一个简单的递归函数:
def check_structure(struct, conf):
if isinstance(struct, dict) and isinstance(conf, dict):
# struct is a dict of types or other dicts
return all(k in conf and check_structure(struct[k], conf[k]) for k in struct)
if isinstance(struct, list) and isinstance(conf, list):
# struct is list in the form [type or dict]
return all(check_structure(struct[0], c) for c in conf)
elif isinstance(struct, type):
# struct is the type of conf
return isinstance(conf, struct)
else:
# struct is neither a dict, nor list, not type
return False
这假定配置可以具有不在您的结构中的键,如您的示例中所示。
更新:新版本也支持列表,例如喜欢 'foo': [{'bar': int}]
@tobias_k 打败了我(可能在时间和质量上都如此),但这是另一个任务的递归函数,您(和我)可能更容易理解:
def check_dict(my_dict, check_against):
for k, v in check_against.items():
if isinstance(v, dict):
return check_dict(my_dict[k], v)
else:
if not isinstance(my_dict[k], v):
return False
return True
您可以使用递归构建结构:
def get_type(value):
if isinstance(value, dict):
return {key: get_type(value[key]) for key in value}
else:
return str(type(value))
然后将所需的结构与您的字典进行比较:
get_type(current_conf) == get_type(required_conf)
示例:
required_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
get_type(required_conf)
{'info': {'conf_two': "<type 'str'>", 'conf_one': "<type 'float'>", 'optional_conf': "<type 'str'>", 'conf_three': "<type 'bool'>"}, 'version': "<type 'int'>"}
字典的本质是,如果它们在 python 中使用而不是作为某些 JSON 导出,则不需要设置字典的顺序。相反,查找键 returns 值(因此是字典)。
在任何一种情况下,这些函数都应该为您提供您所寻找的样本中存在的嵌套级别。
#assuming identical order of keys is required
def check_structure(conf_structure,my_conf):
if my_conf.keys() != conf_structure.keys():
return False
for key in my_conf.keys():
if type(my_conf[key]) == dict:
if my_conf[key].keys() != conf_structure[key].keys():
return False
return True
#assuming identical order of keys is not required
def check_structure(conf_structure,my_conf):
if sorted(my_conf.keys()) != sorted(conf_structure.keys()):
return False
for key in my_conf.keys():
if type(my_conf[key]) != dict:
return False
else:
if sorted(my_conf[key].keys()) != sorted(conf_structure[key].keys()):
return False
return True
如果嵌套级别更高,则显然需要更改此解决方案(即它被配置为评估具有某些值的字典结构的相似性还有字典)。
看起来 dict-schema-validator 软件包完全符合您的需要:
这是一个表示客户的简单模式:
{
"_id": "ObjectId",
"created": "date",
"is_active": "bool",
"fullname": "string",
"age": ["int", "null"],
"contact": {
"phone": "string",
"email": "string"
},
"cards": [{
"type": "string",
"expires": "date"
}]
}
验证:
from datetime import datetime
import json
from dict_schema_validator import validator
with open('models/customer.json', 'r') as j:
schema = json.loads(j.read())
customer = {
"_id": 123,
"created": datetime.now(),
"is_active": True,
"fullname": "Jorge York",
"age": 32,
"contact": {
"phone": "559-940-1435",
"email": "york@example.com",
"skype": "j.york123"
},
"cards": [
{"type": "visa", "expires": "12/2029"},
{"type": "visa"},
]
}
errors = validator.validate(schema, customer)
for err in errors:
print(err['msg'])
输出:
[*] "_id" has wrong type. Expected: "ObjectId", found: "int"
[+] Extra field: "contact.skype" having type: "str"
[*] "cards[0].expires" has wrong type. Expected: "date", found: "str"
[-] Missing field: "cards[1].expires"
您还可以使用 dataclasses_json 库。这是我通常的做法
from dataclasses import dataclass
from dataclasses_json import dataclass_json, Undefined
from dataclasses_json.undefined import UndefinedParameterError
from typing import Optional
#### define schema #######
@dataclass_json(undefined=Undefined.RAISE)
@dataclass
class Info:
conf_one: float
# conf_two: str
conf_three: bool
optional_conf: Optional[str]
@dataclass_json
@dataclass
class ConfStructure:
version: int
info: Info
####### test for compliance####
try:
ConfStructure.from_dict(my_conf).to_dict()
except KeyError as e:
print('theres a missing parameter')
except UndefinedParameterError as e:
print('extra parameters')
您可以使用 https://pypi.org/project/dictify/ 中的 dictify
。
在此处阅读文档 https://dictify.readthedocs.io/en/latest/index.html
这是可以做到的。
from dictify import Field, Model
class Info(Model):
conf_one = Field(required=True).instance(float)
conf_two = Field(required=True).instance(str)
conf_three = Field(required=True).instance(bool)
optional_conf = Field().instance(str)
class MyConf(Model):
version = Field(required=True).instance(int)
info = Field().model(Info)
my_conf = MyConf() # Invalid without required fields
# Valid
my_conf = MyConf({
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
})
my_conf['info']['conf_one'] = 'hi' # Invalid, won't be assinged
对未来的建议:使用 Pydantic!
Pydantic 在运行时强制执行类型提示,并在数据无效时提供用户友好的错误。定义数据应该如何以纯粹、规范的方式呈现 python;用 pydantic 验证它,就这么简单:
from pydantic import BaseModel
class Info(BaseModel):
conf_one: float
conf_two: str
conf_three: bool
class Config:
extra = 'forbid'
class ConfStructure(BaseModel):
version: int
info: Info
如果验证失败,pydantic 将引发错误并详细说明错误:
my_conf_wrong = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar'
}
}
my_conf_right = {
'version': 10,
'info': {
'conf_one': 14.5,
'conf_two': 'something',
'conf_three': False
}
}
model = ConfStructure(**my_conf_right)
print(model.dict())
# {'version': 10, 'info': {'conf_one': 14.5, 'conf_two': 'something', 'conf_three': False}}
res = ConfStructure(**my_conf_wrong)
# pydantic.error_wrappers.ValidationError: 1 validation error for ConfStructure
# info -> optional_conf
# extra fields not permitted (type=value_error.extra)
有一个用于验证 JSON 文件的标准,称为 JSON Schema。
验证器已经在我熟悉的 many languages, including the Python. Read also the documentation for more details. In the following example I will use a Python package jsonschema
(docs) 中实现。
给定配置数据
my_conf = {
'version': 1,
'info': {
'conf_one': 2.5,
'conf_two': 'foo',
'conf_three': False,
'optional_conf': 'bar',
},
}
和相应的配置架构
conf_structure = {
'type': 'object',
'properties': {
'version': {'type': 'integer'},
'info': {
'type': 'object',
'properties': {
'conf_one': {'type': 'number'},
'conf_two': {'type': 'string'},
'conf_three': {'type': 'boolean'},
'optional_conf': {'type': 'string'},
},
'required': ['conf_one', 'conf_two', 'conf_three'],
},
},
}
验证此数据的实际代码就这么简单:
import jsonschema
jsonschema.validate(my_conf, schema=conf_structure)
这种方法的一大优势是您可以将数据和模式存储为 JSON 格式的文件。