Source code for sqlspec.driver._sync

"""Synchronous driver protocol implementation."""

from abc import abstractmethod
from time import perf_counter
from typing import TYPE_CHECKING, Any, Final, TypeVar, overload

from sqlspec.core import SQL, StackResult, create_arrow_result
from sqlspec.core.stack import StackOperation, StatementStack
from sqlspec.driver._common import (
    CommonDriverAttributesMixin,
    DataDictionaryMixin,
    ExecutionResult,
    StackExecutionObserver,
    VersionInfo,
    describe_stack_statement,
    handle_single_row_error,
)
from sqlspec.driver.mixins import SQLTranslatorMixin, StorageDriverMixin
from sqlspec.exceptions import ImproperConfigurationError, StackExecutionError
from sqlspec.utils.arrow_helpers import convert_dict_to_arrow
from sqlspec.utils.logging import get_logger
from sqlspec.utils.module_loader import ensure_pyarrow

if TYPE_CHECKING:
    from collections.abc import Sequence
    from contextlib import AbstractContextManager

    from sqlspec.builder import QueryBuilder
    from sqlspec.core import ArrowResult, SQLResult, Statement, StatementConfig, StatementFilter
    from sqlspec.typing import ArrowReturnFormat, SchemaT, StatementParameters

_LOGGER_NAME: Final[str] = "sqlspec"
logger = get_logger(_LOGGER_NAME)

__all__ = ("SyncDataDictionaryBase", "SyncDriverAdapterBase", "SyncDriverT")


EMPTY_FILTERS: Final["list[StatementFilter]"] = []

SyncDriverT = TypeVar("SyncDriverT", bound="SyncDriverAdapterBase")


[docs] class SyncDriverAdapterBase(CommonDriverAttributesMixin, SQLTranslatorMixin, StorageDriverMixin): """Base class for synchronous database drivers.""" __slots__ = () is_async: bool = False @property @abstractmethod def data_dictionary(self) -> "SyncDataDictionaryBase": """Get the data dictionary for this driver. Returns: Data dictionary instance for metadata queries """
[docs] def dispatch_statement_execution(self, statement: "SQL", connection: "Any") -> "SQLResult": """Central execution dispatcher using the Template Method Pattern. Args: statement: The SQL statement to execute connection: The database connection to use Returns: The result of the SQL execution """ runtime = self.observability compiled_sql, execution_parameters = statement.compile() processed_state = statement.get_processed_state() operation = getattr(processed_state, "operation_type", statement.operation_type) query_context = { "sql": compiled_sql, "parameters": execution_parameters, "driver": type(self).__name__, "operation": operation, "is_many": statement.is_many, "is_script": statement.is_script, } runtime.emit_query_start(**query_context) span = runtime.start_query_span(compiled_sql, operation, type(self).__name__) started = perf_counter() try: with self.handle_database_exceptions(), self.with_cursor(connection) as cursor: special_result = self._try_special_handling(cursor, statement) if special_result is not None: result = special_result elif statement.is_script: execution_result = self._execute_script(cursor, statement) result = self.build_statement_result(statement, execution_result) elif statement.is_many: execution_result = self._execute_many(cursor, statement) result = self.build_statement_result(statement, execution_result) else: execution_result = self._execute_statement(cursor, statement) result = self.build_statement_result(statement, execution_result) except Exception as exc: # pragma: no cover - instrumentation path runtime.span_manager.end_span(span, error=exc) runtime.emit_error(exc, **query_context) raise runtime.span_manager.end_span(span) duration = perf_counter() - started runtime.emit_query_complete(**{**query_context, "rows_affected": result.rows_affected}) runtime.emit_statement_event( sql=compiled_sql, parameters=execution_parameters, driver=type(self).__name__, operation=operation, execution_mode=self.statement_config.execution_mode, is_many=statement.is_many, is_script=statement.is_script, rows_affected=result.rows_affected, duration_s=duration, storage_backend=(result.metadata or {}).get("storage_backend") if hasattr(result, "metadata") else None, started_at=started, ) return result
[docs] @abstractmethod def with_cursor(self, connection: Any) -> Any: """Create and return a context manager for cursor acquisition and cleanup. Returns a context manager that yields a cursor for database operations. Concrete implementations handle database-specific cursor creation and cleanup. """
[docs] @abstractmethod def handle_database_exceptions(self) -> "AbstractContextManager[None]": """Handle database-specific exceptions and wrap them appropriately. Returns: ContextManager that can be used in with statements """
[docs] @abstractmethod def begin(self) -> None: """Begin a database transaction on the current connection."""
[docs] @abstractmethod def rollback(self) -> None: """Rollback the current transaction on the current connection."""
[docs] @abstractmethod def commit(self) -> None: """Commit the current transaction on the current connection."""
@abstractmethod def _try_special_handling(self, cursor: Any, statement: "SQL") -> "SQLResult | None": """Hook for database-specific special operations (e.g., PostgreSQL COPY, bulk operations). This method is called first in dispatch_statement_execution() to allow drivers to handle special operations that don't follow the standard SQL execution pattern. Args: cursor: Database cursor/connection object statement: SQL statement to analyze Returns: SQLResult if the special operation was handled and completed, None if standard execution should proceed """ def _execute_script(self, cursor: Any, statement: "SQL") -> ExecutionResult: """Execute a SQL script containing multiple statements. Default implementation splits the script and executes statements individually. Drivers can override for database-specific script execution methods. Args: cursor: Database cursor/connection object statement: SQL statement object with all necessary data and configuration Returns: ExecutionResult with script execution data including statement counts """ sql, prepared_parameters = self._get_compiled_sql(statement, self.statement_config) statements = self.split_script_statements(sql, self.statement_config, strip_trailing_semicolon=True) statement_count: int = len(statements) successful_count: int = 0 for stmt in statements: single_stmt = statement.copy(statement=stmt, parameters=prepared_parameters) self._execute_statement(cursor, single_stmt) successful_count += 1 return self.create_execution_result( cursor, statement_count=statement_count, successful_statements=successful_count, is_script_result=True )
[docs] def execute_stack(self, stack: "StatementStack", *, continue_on_error: bool = False) -> "tuple[StackResult, ...]": """Execute a StatementStack sequentially using the adapter's primitives.""" if not isinstance(stack, StatementStack): msg = "execute_stack expects a StatementStack instance" raise TypeError(msg) if not stack: msg = "Cannot execute an empty StatementStack" raise ValueError(msg) results: list[StackResult] = [] single_transaction = not continue_on_error with StackExecutionObserver(self, stack, continue_on_error, native_pipeline=False) as observer: started_transaction = False try: if single_transaction and not self._connection_in_transaction(): self.begin() started_transaction = True for index, operation in enumerate(stack.operations): try: result = self._execute_stack_operation(operation) except Exception as exc: # pragma: no cover - exercised via tests stack_error = StackExecutionError( index, describe_stack_statement(operation.statement), exc, adapter=type(self).__name__, mode="continue-on-error" if continue_on_error else "fail-fast", ) if started_transaction and not continue_on_error: try: self.rollback() except Exception as rollback_error: # pragma: no cover - diagnostics only logger.debug("Rollback after stack failure failed: %s", rollback_error) started_transaction = False if continue_on_error: self._rollback_after_stack_error() observer.record_operation_error(stack_error) results.append(StackResult.from_error(stack_error)) continue raise stack_error from exc results.append(StackResult(result=result)) if continue_on_error: self._commit_after_stack_operation() if started_transaction: self.commit() except Exception: if started_transaction: try: self.rollback() except Exception as rollback_error: # pragma: no cover - diagnostics only logger.debug("Rollback after stack failure failed: %s", rollback_error) raise return tuple(results)
def _rollback_after_stack_error(self) -> None: """Attempt to rollback after a stack operation error to clear connection state.""" try: self.rollback() except Exception as rollback_error: # pragma: no cover - driver-specific cleanup logger.debug("Rollback after stack error failed: %s", rollback_error) def _commit_after_stack_operation(self) -> None: """Attempt to commit after a successful stack operation when not batching.""" try: self.commit() except Exception as commit_error: # pragma: no cover - driver-specific cleanup logger.debug("Commit after stack operation failed: %s", commit_error) @abstractmethod def _execute_many(self, cursor: Any, statement: "SQL") -> ExecutionResult: """Execute SQL with multiple parameter sets (executemany). Must be implemented by each driver for database-specific executemany logic. Args: cursor: Database cursor/connection object statement: SQL statement object with all necessary data and configuration Returns: ExecutionResult with execution data for the many operation """ @abstractmethod def _execute_statement(self, cursor: Any, statement: "SQL") -> ExecutionResult: """Execute a single SQL statement. Must be implemented by each driver for database-specific execution logic. Args: cursor: Database cursor/connection object statement: SQL statement object with all necessary data and configuration Returns: ExecutionResult with execution data """
[docs] def execute( self, statement: "SQL | Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "SQLResult": """Execute a statement with parameter handling.""" sql_statement = self.prepare_statement( statement, parameters, statement_config=statement_config or self.statement_config, kwargs=kwargs ) return self.dispatch_statement_execution(statement=sql_statement, connection=self.connection)
[docs] def execute_many( self, statement: "SQL | Statement | QueryBuilder", /, parameters: "Sequence[StatementParameters]", *filters: "StatementParameters | StatementFilter", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "SQLResult": """Execute statement multiple times with different parameters. Parameters passed will be used as the batch execution sequence. """ config = statement_config or self.statement_config if isinstance(statement, SQL): sql_statement = SQL(statement.raw_sql, parameters, statement_config=config, is_many=True, **kwargs) else: base_statement = self.prepare_statement(statement, filters, statement_config=config, kwargs=kwargs) sql_statement = SQL(base_statement.raw_sql, parameters, statement_config=config, is_many=True, **kwargs) return self.dispatch_statement_execution(statement=sql_statement, connection=self.connection)
[docs] def execute_script( self, statement: "str | SQL", /, *parameters: "StatementParameters | StatementFilter", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "SQLResult": """Execute a multi-statement script. By default, validates each statement and logs warnings for dangerous operations. Use suppress_warnings=True for migrations and admin scripts. """ config = statement_config or self.statement_config sql_statement = self.prepare_statement(statement, parameters, statement_config=config, kwargs=kwargs) return self.dispatch_statement_execution(statement=sql_statement.as_script(), connection=self.connection)
@overload def select_one( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: "type[SchemaT]", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "SchemaT": ... @overload def select_one( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: None = None, statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "dict[str, Any]": ...
[docs] def select_one( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: "type[SchemaT] | None" = None, statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "SchemaT | dict[str, Any]": """Execute a select statement and return exactly one row. Raises an exception if no rows or more than one row is returned. """ result = self.execute(statement, *parameters, statement_config=statement_config, **kwargs) try: return result.one(schema_type=schema_type) except ValueError as error: handle_single_row_error(error)
@overload def select_one_or_none( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: "type[SchemaT]", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "SchemaT | None": ... @overload def select_one_or_none( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: None = None, statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "dict[str, Any] | None": ...
[docs] def select_one_or_none( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: "type[SchemaT] | None" = None, statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "SchemaT | dict[str, Any] | None": """Execute a select statement and return at most one row. Returns None if no rows are found. Raises an exception if more than one row is returned. """ result = self.execute(statement, *parameters, statement_config=statement_config, **kwargs) return result.one_or_none(schema_type=schema_type)
@overload def select( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: "type[SchemaT]", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "list[SchemaT]": ... @overload def select( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: None = None, statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "list[dict[str, Any]]": ...
[docs] def select( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: "type[SchemaT] | None" = None, statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "list[SchemaT] | list[dict[str, Any]]": """Execute a select statement and return all rows.""" result = self.execute(statement, *parameters, statement_config=statement_config, **kwargs) return result.get_data(schema_type=schema_type)
[docs] def select_to_arrow( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", statement_config: "StatementConfig | None" = None, return_format: "ArrowReturnFormat" = "table", native_only: bool = False, batch_size: int | None = None, arrow_schema: Any = None, **kwargs: Any, ) -> "ArrowResult": """Execute query and return results as Apache Arrow format. This base implementation uses the conversion path: execute() → dict → Arrow. Adapters with native Arrow support (ADBC, DuckDB, BigQuery) override this method to use zero-copy native paths for 5-10x performance improvement. Args: statement: SQL query string, Statement, or QueryBuilder *parameters: Query parameters (same format as execute()/select()) statement_config: Optional statement configuration override return_format: "table" for pyarrow.Table (default), "batch" for single RecordBatch, "batches" for iterator of RecordBatches, "reader" for RecordBatchReader native_only: If True, raise error if native Arrow unavailable (default: False) batch_size: Rows per batch for "batch"/"batches" format (default: None = all rows) arrow_schema: Optional pyarrow.Schema for type casting **kwargs: Additional keyword arguments Returns: ArrowResult containing pyarrow.Table, RecordBatchReader, or RecordBatches Raises: ImproperConfigurationError: If native_only=True and adapter doesn't support native Arrow Examples: >>> result = driver.select_to_arrow( ... "SELECT * FROM users WHERE age > ?", 18 ... ) >>> df = result.to_pandas() >>> print(df.head()) >>> # Force native Arrow path (raises error if unavailable) >>> result = driver.select_to_arrow( ... "SELECT * FROM users", native_only=True ... ) """ ensure_pyarrow() if native_only: msg = ( f"Adapter '{self.__class__.__name__}' does not support native Arrow results. " f"Use native_only=False to allow conversion path, or switch to an adapter " f"with native Arrow support (ADBC, DuckDB, BigQuery)." ) raise ImproperConfigurationError(msg) result = self.execute(statement, *parameters, statement_config=statement_config, **kwargs) arrow_data = convert_dict_to_arrow(result.data, return_format=return_format, batch_size=batch_size) if arrow_schema is not None: import pyarrow as pa if not isinstance(arrow_schema, pa.Schema): msg = f"arrow_schema must be a pyarrow.Schema, got {type(arrow_schema).__name__}" raise TypeError(msg) arrow_data = arrow_data.cast(arrow_schema) # type: ignore[union-attr] return create_arrow_result( statement=result.statement, data=arrow_data, rows_affected=result.rows_affected, last_inserted_id=result.last_inserted_id, execution_time=result.execution_time, metadata=result.metadata, )
[docs] def select_value( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> Any: """Execute a select statement and return a single scalar value. Expects exactly one row with one column. Raises an exception if no rows or more than one row/column is returned. """ result = self.execute(statement, *parameters, statement_config=statement_config, **kwargs) try: return result.scalar() except ValueError as error: handle_single_row_error(error)
[docs] def select_value_or_none( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> Any: """Execute a select statement and return a single scalar value or None. Returns None if no rows are found. Expects at most one row with one column. Raises an exception if more than one row is returned. """ result = self.execute(statement, *parameters, statement_config=statement_config, **kwargs) return result.scalar_or_none()
@overload def select_with_total( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: "type[SchemaT]", statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "tuple[list[SchemaT], int]": ... @overload def select_with_total( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: None = None, statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "tuple[list[dict[str, Any]], int]": ...
[docs] def select_with_total( self, statement: "Statement | QueryBuilder", /, *parameters: "StatementParameters | StatementFilter", schema_type: "type[SchemaT] | None" = None, statement_config: "StatementConfig | None" = None, **kwargs: Any, ) -> "tuple[list[SchemaT] | list[dict[str, Any]], int]": """Execute a select statement and return both the data and total count. This method is designed for pagination scenarios where you need both the current page of data and the total number of rows that match the query. Args: statement: The SQL statement, QueryBuilder, or raw SQL string *parameters: Parameters for the SQL statement schema_type: Optional schema type for data transformation statement_config: Optional SQL configuration **kwargs: Additional keyword arguments Returns: A tuple containing: - List of data rows (transformed by schema_type if provided) - Total count of rows matching the query (ignoring LIMIT/OFFSET) """ sql_statement = self.prepare_statement( statement, parameters, statement_config=statement_config or self.statement_config, kwargs=kwargs ) count_result = self.dispatch_statement_execution(self._create_count_query(sql_statement), self.connection) select_result = self.execute(sql_statement) return (select_result.get_data(schema_type=schema_type), count_result.scalar())
def _execute_stack_operation(self, operation: "StackOperation") -> "SQLResult | ArrowResult | None": kwargs = dict(operation.keyword_arguments) if operation.keyword_arguments else {} if operation.method == "execute": return self.execute(operation.statement, *operation.arguments, **kwargs) if operation.method == "execute_many": if not operation.arguments: msg = "execute_many stack operation requires parameter sets" raise ValueError(msg) parameter_sets = operation.arguments[0] filters = operation.arguments[1:] return self.execute_many(operation.statement, parameter_sets, *filters, **kwargs) if operation.method == "execute_script": return self.execute_script(operation.statement, *operation.arguments, **kwargs) if operation.method == "execute_arrow": return self.select_to_arrow(operation.statement, *operation.arguments, **kwargs) msg = f"Unsupported stack operation method: {operation.method}" raise ValueError(msg)
class SyncDataDictionaryBase(DataDictionaryMixin): """Base class for synchronous data dictionary implementations.""" @abstractmethod def get_version(self, driver: "SyncDriverAdapterBase") -> "VersionInfo | None": """Get database version information. Args: driver: Sync database driver instance Returns: Version information or None if detection fails """ @abstractmethod def get_feature_flag(self, driver: "SyncDriverAdapterBase", feature: str) -> bool: """Check if database supports a specific feature. Args: driver: Sync database driver instance feature: Feature name to check Returns: True if feature is supported, False otherwise """ @abstractmethod def get_optimal_type(self, driver: "SyncDriverAdapterBase", type_category: str) -> str: """Get optimal database type for a category. Args: driver: Sync database driver instance type_category: Type category (e.g., 'json', 'uuid', 'boolean') Returns: Database-specific type name """ def get_tables(self, driver: "SyncDriverAdapterBase", schema: "str | None" = None) -> "list[str]": """Get list of tables in schema. Args: driver: Sync database driver instance schema: Schema name (None for default) Returns: List of table names """ _ = driver, schema return [] def get_columns( self, driver: "SyncDriverAdapterBase", table: str, schema: "str | None" = None ) -> "list[dict[str, Any]]": """Get column information for a table. Args: driver: Sync database driver instance table: Table name schema: Schema name (None for default) Returns: List of column metadata dictionaries """ _ = driver, table, schema return [] def get_indexes( self, driver: "SyncDriverAdapterBase", table: str, schema: "str | None" = None ) -> "list[dict[str, Any]]": """Get index information for a table. Args: driver: Sync database driver instance table: Table name schema: Schema name (None for default) Returns: List of index metadata dictionaries """ _ = driver, table, schema return [] def list_available_features(self) -> "list[str]": """List all features that can be checked via get_feature_flag. Returns: List of feature names this data dictionary supports """ return self.get_default_features()