I'm trying to implement a Lazily evaluated generic field type for Pydantic v2. This is the simple implementation I have. You can assign either a value, a function or an async function to the lazy field and, it's only evaluated when you access it. If you use this in any normal class, it works perfectly. But it doesn't work as a Pydantic field.
The problem is __set__
is never called here. __get__
is called twice for some reason though. I know Pydantic does some weird stuff internally which might be the reason. Any help would be highly appreciated to resolve this.
import asyncio
import inspect
from typing import Any, Awaitable, Callable, Generic, Optional, TypeVar, Union, cast
from pydantic import BaseModel, GetCoreSchemaHandler
from pydantic_core import CoreSchema, core_schema
T = TypeVar("T")
class LazyField(Generic[T]):
"""A lazy field that can hold a value, function, or async function.
The value is evaluated only when accessed and then cached.
"""
def __init__(self, value=None) -> None:
print("LazyField.__init__")
self._value: Optional[T] = None
self._loader: Optional[Callable[[], Union[T, Awaitable[T]]]] = None
self._is_loaded: bool = False
def __get__(self, obj: Any, objtype=None) -> T:
print("LazyField.__get__")
if obj is None:
return self # type: ignore
if not self._is_loaded:
if self._loader is None:
if self._value is None:
raise AttributeError("LazyField has no value or loader set")
return self._value
if inspect.iscoroutinefunction(self._loader):
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
self._value = loop.run_until_complete(self._loader()) # type: ignore
else:
self._value = self._loader() # type: ignore
self._is_loaded = True
self._loader = None
assert self._value is not None
return self._value
def __set__(
self, obj: Any, value: Union[T, Callable[[], T], Callable[[], Awaitable[T]]]
) -> None:
print("LazyField.__set__")
self._is_loaded = False
if callable(value):
self._loader = cast(
Union[Callable[[], T], Callable[[], Awaitable[T]]], value
)
self._value = None
else:
self._loader = None
self._value = cast(T, value)
@classmethod
def __get_pydantic_core_schema__(
cls, source_type: type[Any], handler: GetCoreSchemaHandler
) -> CoreSchema:
print("LazyField.__get_pydantic_core_schema__")
# Extract the inner type from LazyField[T]
inner_type = (
source_type.__args__[0] if hasattr(source_type, "__args__") else Any
)
# Generate schema for the inner type
inner_schema = handler.generate_schema(inner_type)
schema = core_schema.json_or_python_schema(
json_schema=inner_schema,
python_schema=core_schema.union_schema(
[
# Handle direct value assignment
inner_schema,
# Handle callable assignment
core_schema.callable_schema(),
# Handle coroutine function assignment
core_schema.callable_schema(),
]
),
serialization=core_schema.plain_serializer_function_ser_schema(
lambda x: x._value if hasattr(x, "_value") and x._is_loaded else None,
return_schema=inner_schema,
when_used="json",
),
)
return schema
class A(BaseModel):
content: LazyField[bytes] = LazyField()
async def get_content():
return b"Hello, world!"
a = A(content=get_content)
print(a.content)
This is the output from above:
LazyField.__init__
LazyField.__get__
LazyField.__get__
LazyField.__get_pydantic_core_schema__
<function get_content at 0x102cc4860>
As you can see, __get__
is called twice. And because __set__
is never called, _is_loaded
and _loader
is None
, so __get__
just returns the raw value as a function without evaluating.
I'm trying to implement a Lazily evaluated generic field type for Pydantic v2. This is the simple implementation I have. You can assign either a value, a function or an async function to the lazy field and, it's only evaluated when you access it. If you use this in any normal class, it works perfectly. But it doesn't work as a Pydantic field.
The problem is __set__
is never called here. __get__
is called twice for some reason though. I know Pydantic does some weird stuff internally which might be the reason. Any help would be highly appreciated to resolve this.
import asyncio
import inspect
from typing import Any, Awaitable, Callable, Generic, Optional, TypeVar, Union, cast
from pydantic import BaseModel, GetCoreSchemaHandler
from pydantic_core import CoreSchema, core_schema
T = TypeVar("T")
class LazyField(Generic[T]):
"""A lazy field that can hold a value, function, or async function.
The value is evaluated only when accessed and then cached.
"""
def __init__(self, value=None) -> None:
print("LazyField.__init__")
self._value: Optional[T] = None
self._loader: Optional[Callable[[], Union[T, Awaitable[T]]]] = None
self._is_loaded: bool = False
def __get__(self, obj: Any, objtype=None) -> T:
print("LazyField.__get__")
if obj is None:
return self # type: ignore
if not self._is_loaded:
if self._loader is None:
if self._value is None:
raise AttributeError("LazyField has no value or loader set")
return self._value
if inspect.iscoroutinefunction(self._loader):
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
self._value = loop.run_until_complete(self._loader()) # type: ignore
else:
self._value = self._loader() # type: ignore
self._is_loaded = True
self._loader = None
assert self._value is not None
return self._value
def __set__(
self, obj: Any, value: Union[T, Callable[[], T], Callable[[], Awaitable[T]]]
) -> None:
print("LazyField.__set__")
self._is_loaded = False
if callable(value):
self._loader = cast(
Union[Callable[[], T], Callable[[], Awaitable[T]]], value
)
self._value = None
else:
self._loader = None
self._value = cast(T, value)
@classmethod
def __get_pydantic_core_schema__(
cls, source_type: type[Any], handler: GetCoreSchemaHandler
) -> CoreSchema:
print("LazyField.__get_pydantic_core_schema__")
# Extract the inner type from LazyField[T]
inner_type = (
source_type.__args__[0] if hasattr(source_type, "__args__") else Any
)
# Generate schema for the inner type
inner_schema = handler.generate_schema(inner_type)
schema = core_schema.json_or_python_schema(
json_schema=inner_schema,
python_schema=core_schema.union_schema(
[
# Handle direct value assignment
inner_schema,
# Handle callable assignment
core_schema.callable_schema(),
# Handle coroutine function assignment
core_schema.callable_schema(),
]
),
serialization=core_schema.plain_serializer_function_ser_schema(
lambda x: x._value if hasattr(x, "_value") and x._is_loaded else None,
return_schema=inner_schema,
when_used="json",
),
)
return schema
class A(BaseModel):
content: LazyField[bytes] = LazyField()
async def get_content():
return b"Hello, world!"
a = A(content=get_content)
print(a.content)
This is the output from above:
LazyField.__init__
LazyField.__get__
LazyField.__get__
LazyField.__get_pydantic_core_schema__
<function get_content at 0x102cc4860>
As you can see, __get__
is called twice. And because __set__
is never called, _is_loaded
and _loader
is None
, so __get__
just returns the raw value as a function without evaluating.
1 Answer
Reset to default 0I'm interested in a similar feature. In my case, my FastAPI route may receive a deeply nested JSON payload. In some cases, sub-models may not be useful, and so validating them is a waste of time. I'm looking for a way to defer the validation of sub-models to first access, something like:
# Define a submodel
class B(BaseModel):
value: int
# Define the main model with a lazy field
class A(BaseModel):
toplevel: int
b: B = LazyField()
# Or using a decorator
# @lazyfield
# b: B
# Create an instance of A without validating the submodel immediately
data = {"toplevel": 42, "b": {"value": None}} # Incorrect submodel type to demonstrate lazy validation
model = A(**data)
# Access eager field without any problem
print(model.toplevel)
# Access the submodel field to trigger validation
try:
print(model.b) # This will validate B and raise an error if invalid
except ValidationError as e:
print("Validation error:", e)
My current solution is to allow extra fields and use cached property:
from functools import cached_property
from pydantic import (
BaseModel,
ConfigDict,
)
class B(BaseModel):
value: int
class A(BaseModel):
model_config = ConfigDict(extra="allow")
value: int
@cached_property
def b(self) -> B:
if self.model_extra is None or "b" not in self.model_extra:
raise ValueError("Missing field `b`")
return B.model_validate(self.model_extra["b"])
a = A.model_validate({"value": 4, "b": {"value": None}})
print(a.value)
print(a.model_dump_json()) # Print the object "as-is"
print(a.b) # Fails because `b` does not validate
This requires more boilerplate, and I don't know if there is a way to make it more "user-friendly", through a LazyField
class or an annotation.
While writing this, I understand that this may raise a concern about ValidationError
that may be raised at any access (not only during model validation), but this is something I can live with.
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1742325199a4422592.html
self.__dict__[name] = value
. I'm not sure, but I guess, the__set__
method is not invoked in this case as the main logic behind descriptors should be inside the__getattribute__
(and__setattr__
?) method of theobject
class: docs.python./3/howto/… – lord_haffi Commented Nov 21, 2024 at 21:44property
, the__set__
method will be explicitly called. So it may be worth it to try and wrap your logic inside a@property
. – lord_haffi Commented Nov 21, 2024 at 21:46__setattr__
method of your model class to explicitly invoke the__set__
method if the field does implement it, i.e. if it is a descriptor. – lord_haffi Commented Nov 21, 2024 at 21:48@computed_field
s. Awesome suggestions thanks! – Dulaj Disanayaka Commented Nov 22, 2024 at 19:17