Search code examples
pythonmypy

Python: code duplication on class attribute definition


I'm trying to implement a simple ORM in python. I'm facing a code duplication issue and I do not know how to solve it. Here is a simplified example of a class in my project:

class Person:

    TABLE_NAME = 'person'

    FIELDS = [
        ('name', 'VARCHAR(50)'),
        ('age', 'INTEGER')
    ]

    # CODE DUPLICATION: the two next lines shoudl be genereated with FIELDS not hard coded...
    name: str
    age: int

    def __init__(self, **kwargs):
        self.__dict__ = kwargs

    @classmethod
    def create_sql_table(cls):
        # use TABLE_NAME and FIELDS to create sql table
        pass


alice = Person(name='Alice', age=25)

print(alice.name)

If I remove the two lines name: strand age: int I lose auto-completion and I get a mypy error on the print line (Error: Person has no attribute name)

But If I keep it, I have code duplication (I write twice each field name).

Is there a way to avoid the code duplication (by generating this two lines using FIELDS variable for instance) ?

Or another way to implement this class that avoid code duplication (without mypy error and auto-completion loss) ?


Solution

  • You can use descriptors:

    from typing import Generic, TypeVar, Any, overload, Union
    
    T = TypeVar('T')
    
    class Column(Generic[T]):
        sql_type: str  # the field type used for this column
    
        def __init__(self) -> None:
            self.name = ''  # the name of the column
    
        # this is called when the Person class (not the instance) is created
        def __set_name__(self, owner: Any, name: str) -> None:
            self.name = name  # now contains the name of the attribute in the class
    
        # the overload for the case: Person.name -> Column[str]
        @overload
        def __get__(self, instance: None, owner: Any) -> 'Column[T]': ...
    
        # the overload for the case: Person().name -> str
        @overload
        def __get__(self, instance: Any, owner: Any) -> T: ...
    
        # the implementation of attribute access
        def __get__(self, instance: Any, owner: Any) -> Union[T, 'Column[T]']:
            if instance is None:
                return self
            # implement your attribute access here
            return getattr(instance, f'_{self.name}')  # type: ignore
    
        # the implementation for setting attributes
        def __set__(self, instance: Any, value: T) -> None:
            # maybe check here that the type matches
            setattr(instance, f'_{self.name}', value)
    

    Now we can create specializations for each column type:

    class Integer(Column[int]):
        sql_type = 'INTEGER'
    
    class VarChar(Column[str]):
        def __init__(self, size: int) -> None:
            self.sql_type = f'VARCHAR({size})'
            super().__init__()
    
    

    And when you define the Person class we can use the column types

    class Person:
        TABLE_NAME = 'person'
    
        name = VarChar(50)
        age = Integer()
    
        def __init__(self, **kwargs: Any) -> None:
            for key, value in kwargs.items():
                setattr(self, key, value)
    
    
        @classmethod
        def create_sql_table(cls) -> None:
            print("CREATE TABLE", cls.TABLE_NAME)
            for key, value in vars(cls).items():
                if isinstance(value, Column):
                    print(key, value.sql_type)
    
    
    Person.create_sql_table()
    
    p = Person(age=10)
    print(p.age)
    p.age = 20
    print(p.age)
    

    This prints:

    CREATE TABLE person

    name VARCHAR(50)

    age INTEGER

    10

    20

    You should probably also create a base Model class that contains the __init__ and the class method of Person

    You can also extend the Column class to allow nullable columns and add default values.

    Mypy does not complain and can correctly infer the types for Person.name to str and Person.age to int.