Skip to content

karenina.storage.auto_mapper

auto_mapper

Auto-mapper for generating SQLAlchemy models from Pydantic models.

This module provides utilities to automatically generate SQLAlchemy ORM models from Pydantic BaseModel classes, with support for: - Flattening nested models into prefixed columns - Automatic type mapping (str→Text, int→Integer, etc.) - Index hints via Pydantic Field metadata - Handling Optional/Union types

Classes

ColumnSpec

Specification for a generated SQLAlchemy column.

Source code in src/karenina/storage/auto_mapper.py
class ColumnSpec:
    """Specification for a generated SQLAlchemy column."""

    def __init__(
        self,
        name: str,
        sa_type: Any,
        nullable: bool = True,
        index: bool = False,
        default: Any = None,
        comment: str | None = None,
    ):
        self.name = name
        self.sa_type = sa_type
        self.nullable = nullable
        self.index = index
        self.default = default
        self.comment = comment

    def to_column(self) -> Column[Any]:
        """Convert to SQLAlchemy Column."""
        kwargs: dict[str, Any] = {
            "nullable": self.nullable,
        }
        if self.index:
            kwargs["index"] = True
        if self.default is not None:
            kwargs["default"] = self.default
        if self.comment:
            kwargs["comment"] = self.comment

        return Column(self.sa_type, **kwargs)

    def __repr__(self) -> str:
        return f"ColumnSpec({self.name}, {self.sa_type}, nullable={self.nullable}, index={self.index})"
Functions
to_column
to_column() -> Column[Any]

Convert to SQLAlchemy Column.

Source code in src/karenina/storage/auto_mapper.py
def to_column(self) -> Column[Any]:
    """Convert to SQLAlchemy Column."""
    kwargs: dict[str, Any] = {
        "nullable": self.nullable,
    }
    if self.index:
        kwargs["index"] = True
    if self.default is not None:
        kwargs["default"] = self.default
    if self.comment:
        kwargs["comment"] = self.comment

    return Column(self.sa_type, **kwargs)

PydanticSQLAlchemyMapper

Auto-generates SQLAlchemy column specifications from Pydantic models.

This mapper introspects Pydantic BaseModel classes and generates corresponding SQLAlchemy column definitions with support for: - Recursive flattening of nested models - Automatic type mapping - Index hints from Field metadata - Nullable handling for Optional fields

Example

mapper = PydanticSQLAlchemyMapper() columns = mapper.generate_columns(VerificationResult, flatten_config={ ... "metadata": {"prefix": "metadata_"}, ... "template": {"prefix": "template_", "optional": True}, ... })

Source code in src/karenina/storage/auto_mapper.py
class PydanticSQLAlchemyMapper:
    """Auto-generates SQLAlchemy column specifications from Pydantic models.

    This mapper introspects Pydantic BaseModel classes and generates
    corresponding SQLAlchemy column definitions with support for:
    - Recursive flattening of nested models
    - Automatic type mapping
    - Index hints from Field metadata
    - Nullable handling for Optional fields

    Example:
        >>> mapper = PydanticSQLAlchemyMapper()
        >>> columns = mapper.generate_columns(VerificationResult, flatten_config={
        ...     "metadata": {"prefix": "metadata_"},
        ...     "template": {"prefix": "template_", "optional": True},
        ... })
    """

    def __init__(self, type_map: dict[type, Any] | None = None):
        """Initialize the mapper.

        Args:
            type_map: Optional custom type mapping to extend/override defaults
        """
        self.type_map = {**TYPE_MAP}
        if type_map:
            self.type_map.update(type_map)

    def generate_columns(
        self,
        model: type[BaseModel],
        prefix: str = "",
        flatten_nested: bool = True,
        parent_optional: bool = False,
    ) -> dict[str, ColumnSpec]:
        """Generate SQLAlchemy column specifications from Pydantic model fields.

        Args:
            model: The Pydantic model class to introspect
            prefix: Prefix to add to all column names (e.g., "metadata_")
            flatten_nested: Whether to recursively flatten nested models
            parent_optional: Whether the parent field was optional (affects nullability)

        Returns:
            Dictionary mapping column names to ColumnSpec objects
        """
        columns: dict[str, ColumnSpec] = {}

        # Get type hints and field info
        try:
            hints = get_type_hints(model)
        except Exception:
            logger.debug("get_type_hints failed for %s, falling back to model_fields", model, exc_info=True)
            hints = {name: field.annotation for name, field in model.model_fields.items() if field.annotation}

        for field_name, field_type in hints.items():
            field_info = model.model_fields.get(field_name)
            metadata = _get_field_metadata(field_info)

            # Build column name with prefix
            column_name = f"{prefix}{field_name}"

            # Unwrap Optional types
            inner_type, is_optional = _unwrap_optional(field_type)

            # Determine nullability
            # Field is nullable if: it's optional, parent is optional, or has default None
            has_default_none = field_info is not None and field_info.default is None
            nullable = is_optional or parent_optional or has_default_none

            # Check if this is a nested Pydantic model
            if flatten_nested and _is_pydantic_model(inner_type):
                # Recursively generate columns for nested model
                nested_columns = self.generate_columns(
                    model=inner_type,
                    prefix=f"{column_name}_",
                    flatten_nested=True,
                    parent_optional=nullable,
                )
                columns.update(nested_columns)
            else:
                # Generate single column
                sa_type = _get_sqlalchemy_type(inner_type, metadata)
                index = metadata.get("index", False)
                comment = metadata.get("comment")

                columns[column_name] = ColumnSpec(
                    name=column_name,
                    sa_type=sa_type,
                    nullable=nullable,
                    index=index,
                    comment=comment,
                )

        return columns

    def generate_columns_for_result(
        self,
        model: type[BaseModel],
        flatten_config: dict[str, dict[str, Any]],
    ) -> dict[str, ColumnSpec]:
        """Generate columns with custom flatten configuration per field.

        This is designed for the VerificationResult model where each
        component (metadata, template, rubric, etc.) has its own prefix
        and optional status.

        Args:
            model: The root Pydantic model class
            flatten_config: Configuration for each field, e.g.:
                {
                    "metadata": {"prefix": "metadata_", "optional": False},
                    "template": {"prefix": "template_", "optional": True},
                }

        Returns:
            Dictionary mapping column names to ColumnSpec objects
        """
        columns: dict[str, ColumnSpec] = {}

        try:
            hints = get_type_hints(model)
        except Exception:
            logger.debug("get_type_hints failed for %s, falling back to model_fields", model, exc_info=True)
            hints = {name: field.annotation for name, field in model.model_fields.items() if field.annotation}

        for field_name, field_type in hints.items():
            # Get configuration for this field
            config = flatten_config.get(field_name, {})
            prefix = config.get("prefix", f"{field_name}_")
            force_optional = config.get("optional", False)

            # Unwrap Optional types
            inner_type, is_optional = _unwrap_optional(field_type)
            parent_optional = is_optional or force_optional

            if _is_pydantic_model(inner_type):
                # Recursively generate columns for nested model
                nested_columns = self.generate_columns(
                    model=inner_type,
                    prefix=prefix,
                    flatten_nested=True,
                    parent_optional=parent_optional,
                )
                columns.update(nested_columns)
            else:
                # Root-level field (not nested)
                field_info = model.model_fields.get(field_name)
                metadata = _get_field_metadata(field_info)
                sa_type = _get_sqlalchemy_type(inner_type, metadata)

                columns[field_name] = ColumnSpec(
                    name=field_name,
                    sa_type=sa_type,
                    nullable=parent_optional,
                    index=metadata.get("index", False),
                    comment=metadata.get("comment"),
                )

        return columns

    def create_model_class(
        self,
        base: type[DeclarativeBase],
        name: str,
        tablename: str,
        columns: dict[str, ColumnSpec],
        extra_columns: dict[str, Column[Any]] | None = None,
        relationships: dict[str, Any] | None = None,
        table_args: tuple[Any, ...] | None = None,
    ) -> type:
        """Dynamically create a SQLAlchemy ORM model class.

        Args:
            base: SQLAlchemy declarative base class
            name: Name for the generated class
            tablename: Database table name
            columns: Column specifications from generate_columns()
            extra_columns: Additional columns (e.g., id, foreign keys)
            relationships: SQLAlchemy relationship definitions
            table_args: Additional table arguments (indexes, constraints)

        Returns:
            Generated SQLAlchemy model class
        """
        # Build class attributes
        attrs: dict[str, Any] = {
            "__tablename__": tablename,
        }

        # Add extra columns first (id, foreign keys, etc.)
        if extra_columns:
            attrs.update(extra_columns)

        # Add generated columns
        for col_name, col_spec in columns.items():
            attrs[col_name] = col_spec.to_column()

        # Add relationships
        if relationships:
            attrs.update(relationships)

        # Add table args
        if table_args:
            attrs["__table_args__"] = table_args

        # Create and return the class
        return type(name, (base,), attrs)
Functions
__init__
__init__(type_map: dict[type, Any] | None = None)

Parameters:

Name Type Description Default
type_map dict[type, Any] | None

Optional custom type mapping to extend/override defaults

None
Source code in src/karenina/storage/auto_mapper.py
def __init__(self, type_map: dict[type, Any] | None = None):
    """Initialize the mapper.

    Args:
        type_map: Optional custom type mapping to extend/override defaults
    """
    self.type_map = {**TYPE_MAP}
    if type_map:
        self.type_map.update(type_map)
create_model_class
create_model_class(
    base: type[DeclarativeBase],
    name: str,
    tablename: str,
    columns: dict[str, ColumnSpec],
    extra_columns: dict[str, Column[Any]] | None = None,
    relationships: dict[str, Any] | None = None,
    table_args: tuple[Any, ...] | None = None,
) -> type

Dynamically create a SQLAlchemy ORM model class.

Parameters:

Name Type Description Default
base type[DeclarativeBase]

SQLAlchemy declarative base class

required
name str

Name for the generated class

required
tablename str

Database table name

required
columns dict[str, ColumnSpec]

Column specifications from generate_columns()

required
extra_columns dict[str, Column[Any]] | None

Additional columns (e.g., id, foreign keys)

None
relationships dict[str, Any] | None

SQLAlchemy relationship definitions

None
table_args tuple[Any, ...] | None

Additional table arguments (indexes, constraints)

None

Returns:

Type Description
type

Generated SQLAlchemy model class

Source code in src/karenina/storage/auto_mapper.py
def create_model_class(
    self,
    base: type[DeclarativeBase],
    name: str,
    tablename: str,
    columns: dict[str, ColumnSpec],
    extra_columns: dict[str, Column[Any]] | None = None,
    relationships: dict[str, Any] | None = None,
    table_args: tuple[Any, ...] | None = None,
) -> type:
    """Dynamically create a SQLAlchemy ORM model class.

    Args:
        base: SQLAlchemy declarative base class
        name: Name for the generated class
        tablename: Database table name
        columns: Column specifications from generate_columns()
        extra_columns: Additional columns (e.g., id, foreign keys)
        relationships: SQLAlchemy relationship definitions
        table_args: Additional table arguments (indexes, constraints)

    Returns:
        Generated SQLAlchemy model class
    """
    # Build class attributes
    attrs: dict[str, Any] = {
        "__tablename__": tablename,
    }

    # Add extra columns first (id, foreign keys, etc.)
    if extra_columns:
        attrs.update(extra_columns)

    # Add generated columns
    for col_name, col_spec in columns.items():
        attrs[col_name] = col_spec.to_column()

    # Add relationships
    if relationships:
        attrs.update(relationships)

    # Add table args
    if table_args:
        attrs["__table_args__"] = table_args

    # Create and return the class
    return type(name, (base,), attrs)
generate_columns
generate_columns(
    model: type[BaseModel],
    prefix: str = "",
    flatten_nested: bool = True,
    parent_optional: bool = False,
) -> dict[str, ColumnSpec]

Generate SQLAlchemy column specifications from Pydantic model fields.

Parameters:

Name Type Description Default
model type[BaseModel]

The Pydantic model class to introspect

required
prefix str

Prefix to add to all column names (e.g., "metadata_")

''
flatten_nested bool

Whether to recursively flatten nested models

True
parent_optional bool

Whether the parent field was optional (affects nullability)

False

Returns:

Type Description
dict[str, ColumnSpec]

Dictionary mapping column names to ColumnSpec objects

Source code in src/karenina/storage/auto_mapper.py
def generate_columns(
    self,
    model: type[BaseModel],
    prefix: str = "",
    flatten_nested: bool = True,
    parent_optional: bool = False,
) -> dict[str, ColumnSpec]:
    """Generate SQLAlchemy column specifications from Pydantic model fields.

    Args:
        model: The Pydantic model class to introspect
        prefix: Prefix to add to all column names (e.g., "metadata_")
        flatten_nested: Whether to recursively flatten nested models
        parent_optional: Whether the parent field was optional (affects nullability)

    Returns:
        Dictionary mapping column names to ColumnSpec objects
    """
    columns: dict[str, ColumnSpec] = {}

    # Get type hints and field info
    try:
        hints = get_type_hints(model)
    except Exception:
        logger.debug("get_type_hints failed for %s, falling back to model_fields", model, exc_info=True)
        hints = {name: field.annotation for name, field in model.model_fields.items() if field.annotation}

    for field_name, field_type in hints.items():
        field_info = model.model_fields.get(field_name)
        metadata = _get_field_metadata(field_info)

        # Build column name with prefix
        column_name = f"{prefix}{field_name}"

        # Unwrap Optional types
        inner_type, is_optional = _unwrap_optional(field_type)

        # Determine nullability
        # Field is nullable if: it's optional, parent is optional, or has default None
        has_default_none = field_info is not None and field_info.default is None
        nullable = is_optional or parent_optional or has_default_none

        # Check if this is a nested Pydantic model
        if flatten_nested and _is_pydantic_model(inner_type):
            # Recursively generate columns for nested model
            nested_columns = self.generate_columns(
                model=inner_type,
                prefix=f"{column_name}_",
                flatten_nested=True,
                parent_optional=nullable,
            )
            columns.update(nested_columns)
        else:
            # Generate single column
            sa_type = _get_sqlalchemy_type(inner_type, metadata)
            index = metadata.get("index", False)
            comment = metadata.get("comment")

            columns[column_name] = ColumnSpec(
                name=column_name,
                sa_type=sa_type,
                nullable=nullable,
                index=index,
                comment=comment,
            )

    return columns
generate_columns_for_result
generate_columns_for_result(
    model: type[BaseModel],
    flatten_config: dict[str, dict[str, Any]],
) -> dict[str, ColumnSpec]

Generate columns with custom flatten configuration per field.

This is designed for the VerificationResult model where each component (metadata, template, rubric, etc.) has its own prefix and optional status.

Parameters:

Name Type Description Default
model type[BaseModel]

The root Pydantic model class

required
flatten_config dict[str, dict[str, Any]]

Configuration for each field, e.g.: { "metadata": {"prefix": "metadata_", "optional": False}, "template": {"prefix": "template_", "optional": True}, }

required

Returns:

Type Description
dict[str, ColumnSpec]

Dictionary mapping column names to ColumnSpec objects

Source code in src/karenina/storage/auto_mapper.py
def generate_columns_for_result(
    self,
    model: type[BaseModel],
    flatten_config: dict[str, dict[str, Any]],
) -> dict[str, ColumnSpec]:
    """Generate columns with custom flatten configuration per field.

    This is designed for the VerificationResult model where each
    component (metadata, template, rubric, etc.) has its own prefix
    and optional status.

    Args:
        model: The root Pydantic model class
        flatten_config: Configuration for each field, e.g.:
            {
                "metadata": {"prefix": "metadata_", "optional": False},
                "template": {"prefix": "template_", "optional": True},
            }

    Returns:
        Dictionary mapping column names to ColumnSpec objects
    """
    columns: dict[str, ColumnSpec] = {}

    try:
        hints = get_type_hints(model)
    except Exception:
        logger.debug("get_type_hints failed for %s, falling back to model_fields", model, exc_info=True)
        hints = {name: field.annotation for name, field in model.model_fields.items() if field.annotation}

    for field_name, field_type in hints.items():
        # Get configuration for this field
        config = flatten_config.get(field_name, {})
        prefix = config.get("prefix", f"{field_name}_")
        force_optional = config.get("optional", False)

        # Unwrap Optional types
        inner_type, is_optional = _unwrap_optional(field_type)
        parent_optional = is_optional or force_optional

        if _is_pydantic_model(inner_type):
            # Recursively generate columns for nested model
            nested_columns = self.generate_columns(
                model=inner_type,
                prefix=prefix,
                flatten_nested=True,
                parent_optional=parent_optional,
            )
            columns.update(nested_columns)
        else:
            # Root-level field (not nested)
            field_info = model.model_fields.get(field_name)
            metadata = _get_field_metadata(field_info)
            sa_type = _get_sqlalchemy_type(inner_type, metadata)

            columns[field_name] = ColumnSpec(
                name=field_name,
                sa_type=sa_type,
                nullable=parent_optional,
                index=metadata.get("index", False),
                comment=metadata.get("comment"),
            )

    return columns

Functions

generate_indexes_from_columns

generate_indexes_from_columns(
    columns: dict[str, ColumnSpec],
    tablename: str,
    composite_indexes: list[tuple[str, ...]] | None = None,
) -> list[Index]

Generate SQLAlchemy Index objects from column specifications.

Parameters:

Name Type Description Default
columns
dict[str, ColumnSpec]

Column specifications

required
tablename
str

Table name for naming indexes

required
composite_indexes
list[tuple[str, ...]] | None

List of column name tuples for composite indexes

None

Returns:

Type Description
list[Index]

List of Index objects

Source code in src/karenina/storage/auto_mapper.py
def generate_indexes_from_columns(
    columns: dict[str, ColumnSpec],  # noqa: ARG001
    tablename: str,
    composite_indexes: list[tuple[str, ...]] | None = None,
) -> list[Index]:
    """Generate SQLAlchemy Index objects from column specifications.

    Args:
        columns: Column specifications
        tablename: Table name for naming indexes
        composite_indexes: List of column name tuples for composite indexes

    Returns:
        List of Index objects
    """
    indexes = []

    # Add composite indexes if specified
    if composite_indexes:
        for col_names in composite_indexes:
            index_name = f"idx_{tablename}_{'_'.join(col_names)}"
            indexes.append(Index(index_name, *col_names))

    return indexes

get_flat_field_mapping

get_flat_field_mapping(
    model: type[BaseModel],
    flatten_config: dict[str, dict[str, Any]],
) -> dict[str, str]

Get mapping from nested field paths to flat column names.

Parameters:

Name Type Description Default
model
type[BaseModel]

The Pydantic model class

required
flatten_config
dict[str, dict[str, Any]]

Flatten configuration

required

Returns:

Type Description
dict[str, str]

Dictionary mapping "component.field" to "prefix_field"

Source code in src/karenina/storage/auto_mapper.py
def get_flat_field_mapping(
    model: type[BaseModel],
    flatten_config: dict[str, dict[str, Any]],
) -> dict[str, str]:
    """Get mapping from nested field paths to flat column names.

    Args:
        model: The Pydantic model class
        flatten_config: Flatten configuration

    Returns:
        Dictionary mapping "component.field" to "prefix_field"
    """
    mapping: dict[str, str] = {}

    try:
        hints = get_type_hints(model)
    except Exception:
        logger.debug("get_type_hints failed for %s, falling back to model_fields", model, exc_info=True)
        hints = {name: field.annotation for name, field in model.model_fields.items() if field.annotation}

    for field_name, field_type in hints.items():
        config = flatten_config.get(field_name, {})
        prefix = config.get("prefix", f"{field_name}_")

        inner_type, _ = _unwrap_optional(field_type)

        if _is_pydantic_model(inner_type):
            # Get fields from nested model
            try:
                nested_hints = get_type_hints(inner_type)
            except Exception:
                logger.debug(
                    "get_type_hints failed for nested model %s, falling back to model_fields", inner_type, exc_info=True
                )
                nested_hints = {
                    name: field.annotation
                    for name, field in inner_type.model_fields.items()  # type: ignore[attr-defined]
                    if field.annotation
                }

            for nested_field in nested_hints:
                nested_path = f"{field_name}.{nested_field}"
                column_name = f"{prefix}{nested_field}"
                mapping[nested_path] = column_name
        else:
            # Root-level field
            mapping[field_name] = field_name

    return mapping