南京市葬花

Python中JSON数据验证的三种专业级方案

2026-04-09 06:09:02 浏览次数:0
详细信息

1. Pydantic(推荐首选)

核心优势

基础示例

from pydantic import BaseModel, Field, validator
from typing import List, Optional
from datetime import datetime

class User(BaseModel):
    id: int = Field(..., gt=0)
    name: str = Field(..., min_length=1, max_length=50)
    email: str = Field(..., regex=r'^[\w\.-]+@[\w\.-]+\.\w+$')
    age: Optional[int] = Field(None, ge=18, le=120)
    roles: List[str] = []
    created_at: datetime = Field(default_factory=datetime.now)

    @validator('name')
    def name_must_contain_space(cls, v):
        if ' ' not in v:
            raise ValueError('必须包含空格')
        return v.title()

# 使用
user_data = {
    "id": 1,
    "name": "john doe",
    "email": "john@example.com",
    "age": 25,
    "roles": ["admin", "user"]
}

user = User(**user_data)
print(user.dict())  # 获取验证后的数据

高级特性

# 复杂嵌套模型
class Address(BaseModel):
    city: str
    street: str
    zip_code: str

class Company(BaseModel):
    name: str
    address: Address
    employees: List[User]

# 动态模型创建
from pydantic import create_model

DynamicModel = create_model(
    'DynamicModel',
    name=(str, ...),
    value=(int, 0)
)

# 设置配置
class ConfigModel(BaseModel):
    class Config:
        allow_population_by_field_name = True
        anystr_strip_whitespace = True
        extra = 'forbid'  # 禁止额外字段

2. JSON Schema + jsonschema库

核心优势

基础示例

import jsonschema
from jsonschema import validate, ValidationError

# 定义JSON Schema
user_schema = {
    "type": "object",
    "required": ["id", "name", "email"],
    "properties": {
        "id": {
            "type": "integer",
            "minimum": 1
        },
        "name": {
            "type": "string",
            "minLength": 1,
            "maxLength": 50,
            "pattern": "^[A-Za-z ]+$"
        },
        "email": {
            "type": "string",
            "format": "email"
        },
        "age": {
            "type": "integer",
            "minimum": 18,
            "maximum": 120
        },
        "roles": {
            "type": "array",
            "items": {
                "type": "string",
                "enum": ["admin", "user", "guest"]
            }
        }
    },
    "additionalProperties": False
}

# 验证函数
def validate_user(data):
    try:
        validate(instance=data, schema=user_schema)
        return True, None
    except ValidationError as e:
        return False, e.message

# 使用
data = {"id": 1, "name": "John", "email": "john@example.com"}
is_valid, error = validate_user(data)

高级特性

# 复杂Schema示例
complex_schema = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "definitions": {
        "address": {
            "type": "object",
            "properties": {
                "street": {"type": "string"},
                "city": {"type": "string"}
            }
        }
    },
    "oneOf": [
        {"$ref": "#/definitions/address"},
        {"type": "string"}
    ],
    "if": {
        "properties": {"type": {"const": "user"}}
    },
    "then": {
        "required": ["username"]
    }
}

# 自定义验证器
class CustomValidator(jsonschema.Draft7Validator):
    def validate_email(self, email, instance, schema):
        if '@' not in instance:
            yield jsonschema.ValidationError("无效的邮箱")

# 使用自定义验证器
validator = CustomValidator(schema=user_schema)
validator.validate(data)

3. Marshmallow

核心优势

基础示例

from marshmallow import Schema, fields, validate, validates, ValidationError
from datetime import datetime

class UserSchema(Schema):
    id = fields.Int(required=True, validate=validate.Range(min=1))
    name = fields.Str(
        required=True,
        validate=[
            validate.Length(min=1, max=50),
            validate.Regexp(r'^[A-Za-z ]+$')
        ]
    )
    email = fields.Email(required=True)
    age = fields.Int(validate=validate.Range(min=18, max=120))
    created_at = fields.DateTime(missing=datetime.now)
    roles = fields.List(fields.Str(), missing=list)

    @validates('name')
    def validate_name(self, value):
        if 'admin' in value.lower():
            raise ValidationError('名称不能包含admin')

    class Meta:
        strict = True  # 严格模式,禁止额外字段

# 使用
schema = UserSchema()

# 验证
data = {"id": 1, "name": "John", "email": "john@example.com"}
try:
    result = schema.load(data)  # 验证并反序列化
    print(result)  # 返回字典
except ValidationError as err:
    print(err.messages)

# 序列化
serialized = schema.dump(user_object)

高级特性

# 嵌套Schema
class AddressSchema(Schema):
    street = fields.Str()
    city = fields.Str()

class CompanySchema(Schema):
    name = fields.Str()
    address = fields.Nested(AddressSchema)
    employees = fields.Nested(UserSchema, many=True)

# 字段过滤
class PartialUserSchema(Schema):
    class Meta:
        fields = ('id', 'name')  # 只返回这些字段
        exclude = ('password',)  # 排除这些字段

# 动态Schema
from marshmallow import Schema, fields

def create_dynamic_schema(fields_config):
    attrs = {}
    for field_name, field_type in fields_config.items():
        if field_type == 'string':
            attrs[field_name] = fields.Str()
        elif field_type == 'integer':
            attrs[field_name] = fields.Int()

    return type('DynamicSchema', (Schema,), attrs)

方案对比与选择建议

特性 Pydantic JSON Schema Marshmallow
学习曲线 平缓 中等 中等
性能 ⭐⭐⭐⭐⭐ ⭐⭐⭐ ⭐⭐⭐⭐
类型提示 完美支持 有限支持 有限支持
标准兼容 Python生态 IETF标准 Python生态
序列化 内置支持 需要额外库 内置支持
错误信息 详细友好 标准格式 可自定义
复杂验证 强大 非常强大 灵活

选择建议:

选择Pydantic当

选择JSON Schema当

选择Marshmallow当

实战示例:综合方案

# 结合Pydantic和JSON Schema的优势
from pydantic import BaseModel
from jsonschema import validate as js_validate
import json

class HybridValidator:
    def __init__(self):
        self.schemas = {}

    def add_schema(self, name: str, pydantic_model: BaseModel):
        """从Pydantic模型生成JSON Schema"""
        schema = pydantic_model.schema()
        self.schemas[name] = schema

    def validate(self, name: str, data: dict):
        """双重验证"""
        # 第一步:JSON Schema验证
        if name in self.schemas:
            js_validate(data, self.schemas[name])

        # 第二步:Pydantic验证
        model_class = globals().get(name)
        if model_class and issubclass(model_class, BaseModel):
            return model_class(**data)
        return data

# 使用
validator = HybridValidator()
validator.add_schema('User', User)
result = validator.validate('User', user_data)

最佳实践

始终验证输入:不要信任任何外部数据 明确的错误消息:提供具体的验证失败信息 性能考虑:对高频验证考虑缓存Schema 版本控制:为API提供Schema版本管理 文档生成:利用验证Schema自动生成API文档 安全验证:特别注意敏感字段的验证

根据具体项目需求和技术栈选择合适的方案,Pydantic通常是大多数Python项目的首选。

相关推荐