Search code examples
mysqlpython-3.xaws-lambdasqlalchemysqlmodel

sqlalchemy/sqlmodel: how to speed up this query?


I am having trouble with the following select query with joins. Not sure what could be the issue, it loads quickly in localhost but when i connect to mysql in staging (under its own VPC) from aws lambda function, it can take several seconds depending on the limit (200 for example). I didn't think thats a ton of record so not sure why it takes several seconds for it to return. Note the transaction table has 125 columns but that shouldn't take this long.

class Transaction(SQLModel, table=True):
    
    id: int = Field(default=None, primary_key=True)
    code: str
    
    transactionTypeId: Optional[int] = Field(default=None, foreign_key='transaction_type.id')
    propertyId: Optional[int] = Field(default=None, foreign_key='property_type.id')
    policyId: Optional[int] = Field(default=None, foreign_key='policy_type.id')

    def toDict(self):
        d = {}

        for c in inspect(self).mapper.column_attrs:
            o = getattr(self, c.key)

            if isinstance(o, (date, datetime, time)):
                d[c.key] = o.isoformat()
            else:
                d[c.key] = o
        
        return d


def getOrders():
    
    try:

        queryParams = app.current_request.query_params
        limit = 1
        offset = 0
        filters = []
        qry = select(Transaction, TransactionType, PropertyType).join(TransactionType).join(PropertyType).offset(offset).limit(limit)
        
        if queryParams is not None:
            if 'offset' in queryParams and isinstance(int(queryParams['offset']), int):
                offset = int(queryParams['offset'])
            if 'limit' in queryParams and isinstance(int(queryParams['limit']), int):
                limit = int(queryParams['limit'])   
                
            
        with Session(engine) as session:
            results = session.exec(select(Transaction, TransactionType, PropertyType)
                                   .join(TransactionType)
                                   .join(PropertyType)
                                   .offset(offset)
                                   .limit(limit)
                                   .where(and_(*filters))
                                   .order_by(Transaction
                                             .transactionDate
                                             .desc())
                                   ).all()

            formattedResults = []
            
            for transaction, transactionType, propertyType in results:
                transactionResult = transaction.toDict().subset(['id', 'address',                                                      'propertyId','transactionType','transactionDate'])
                transactionTypeResult = transactionType.toDict().subset(['type'])
                propertyTypeResult = propertyType.toDict().subset(['type'])
                
                finalResult = benedict({'transaction': transactionResult,
                                        'transactionType': transactionTypeResult,
                                        'propertyType': propertyTypeResult})
                formattedResults.append(finalResult.flatten())
            
            msg = formattedResults

            return msg

the benedict stuff is just this library that lets me manipulate dicts easily: https://pypi.org/project/python-benedict/

Is there something wrong with my query? Or is this down to code?


Solution

  • I would add an index to transactionTypeId and propertyId (you probably need a database migration to add these indexes). I would also select set of columns you need to serialize, instead of whole model. Selecting all columns to setup ORM object can be expensive.

    class Transaction(SQLModel, table=True):
        
        id: int = Field(default=None, primary_key=True)
        code: str
        
        transactionTypeId: Optional[int] = Field(default=None, foreign_key='transaction_type.id', index=True)
        propertyId: Optional[int] = Field(default=None, foreign_key='property_type.id', index=True)
        policyId: Optional[int] = Field(default=None, foreign_key='policy_type.id')
    
        def toDict(self):
            d = {}
            for c in inspect(self).mapper.column_attrs:
                o = getattr(self, c.key)
    
                if isinstance(o, (date, datetime, time)):
                    d[c.key] = o.isoformat()
                else:
                    d[c.key] = o
            
            return d
    
    
    def getOrders():
        try:
            queryParams = app.current_request.query_params
            limit = 1
            offset = 0
            filters = []    
            if queryParams is not None:
                if 'offset' in queryParams and isinstance(int(queryParams['offset']), int):
                    offset = int(queryParams['offset'])
                if 'limit' in queryParams and isinstance(int(queryParams['limit']), int):
                    limit = int(queryParams['limit'])   
                             
            with Session(engine) as session:
                results = session.exec(select(
                                           Transaction.id, 
                                           Transaction.address,
                                           Transaction.propertyId,                                                   
                                           Transaction.transactionTypeId,
                                           Transaction.transactionDate,
                                           TransactionType.type, 
                                           PropertyType.type
                                       )
                                       .join(TransactionType)
                                       .join(PropertyType)
                                       .offset(offset)
                                       .limit(limit)
                                       .where(and_(*filters))
                                       .order_by(Transaction
                                                 .transactionDate
                                                 .desc())
                                       ).all()
    
                formattedResults = []
                
                for transactionId, transactionAddress, propertyId, transactionTypeId, transactionDate, transactionType, propertyType in results:
                    transactionResult = {
                         'id': transactionId, 
                         'address': transactionAddress, 
                         'propertyId': propertyId, 
                         'transactionTypeId': transactionTypeId
                         'transactionDate': transactionDate
                    }
                    transactionTypeResult = {'type': transactionType}
                    propertyTypeResult = {'type': policyType}
            
                    finalResult = benedict({'transaction': transactionResult,
                                            'transactionType': transactionTypeResult,
                                            'propertyType': propertyTypeResult})
                    formattedResults.append(finalResult.flatten())       
                msg = formattedResults
                return msg