Search code examples
pythoncassandramodeltimeuuiddatastax-python-driver

Use cassandra inbuild `now()` function to generate TimeUUID with Model in Python driver


I have code as

import time

from uuid import uuid4

import cassandra
from cassandra.cqlengine.models import Model
from cassandra.cqlengine.query import BatchQuery
from cassandra.cqlengine import columns, connection
from cassandra.cqlengine.management import sync_table


class StudentModel(Model):
    __table_name__ = 'student'
    id = columns.UUID(primary_key=True, default=uuid4)
    created_timestamp = columns.TimeUUID(primary_key=True,
                                         clustering_order='DESC',
                                         default=cassandra.util.uuid_from_time(time.time()))
    name = columns.Text(required=True, default='')

class ClassRoomModel(Model):
    __table_name__ = 'class_room'
    id = columns.UUID(primary_key=True, default=uuid4)
    created_timestamp = columns.TimeUUID(primary_key=True,
                                         clustering_order='DESC',
                                         default=cassandra.util.uuid_from_time(time.time()))
    name = columns.Text(required=True, default='')

class StudentToClass(Model):
    __table_name__ = 'student_to_class_mapping'
    class_room_id = columns.UUID(primary_key=True)
    created_timestamp = \
        columns.TimeUUID(primary_key=True,
                         clustering_order='DESC',
                         default=cassandra.util.uuid_from_time(time.time()))
    student_id = columns.UUID()

class ClassToStudent(Model):
    __table_name__ = 'class_to_student_mapping'
    student_id = columns.UUID(primary_key=True)
    created_timestamp = \
        columns.TimeUUID(primary_key=True,
                         clustering_order='DESC',
                         default=cassandra.util.uuid_from_time(time.time()))
    class_room_id = columns.UUID()

if __name__ == '__main__':
    connection.setup(hosts=['localhost'],
                     default_keyspace='test')
    sync_table(StudentModel)
    sync_table(ClassRoomModel)
    sync_table(StudentToClass)
    sync_table(ClassToStudent)

    students = []
    for i in xrange(100):
        students.append(StudentModel.create(name='student' + str(i)))

    class_room = ClassRoomModel.create(name='class1')

    for student in students:
        print "Creating batch for: ", student.name
        with BatchQuery() as batch_query:
            ClassToStudent.batch(batch_query).create(
                student_id=student.id, class_room_id=class_room.id)
            StudentToClass.batch(batch_query).create(
                student_id=student.id, class_room_id=class_room.id)

This code works fine, and it created records too. When I check the records count, it match for 3 tables, but for test.student_to_class_mapping, it has to be 100, but it gives only 1.

cqlsh> select count(*) from test.student;

 count
-------
   100

(1 rows)
cqlsh> select count(*) from test.class_room ;

 count
-------
     1

(1 rows)
cqlsh> select count(*) from test.class_to_student_mapping;

 count
-------
   100

(1 rows)
cqlsh> select count(*) from test.student_to_class_mapping ;

 count
-------
     1

(1 rows)

I found the issue, logic wise its correct, only issue is clusturing_key in test.student_to_class_mapping.

created_timestamp = \
    columns.TimeUUID(primary_key=True,
                     clustering_order='DESC',
                     default=cassandra.util.uuid_from_time(time.time()))

cassandra.util.uuid_from_time(time.time()) is not able to generate Unique uuid for each record. I can use uuid1 but I already face issue with uuid1.

I know, we can use now(), I change my code to

from cassandra.query import BatchStatement, SimpleStatement
from cassandra.cqlengine import connection
...
...
    batch_query = BatchStatement()
    batch_query.add(
        SimpleStatement('INSERT INTO {0} '
            '("student_id", "created_timestamp", "class_room_id") '
            'VALUES ({1}, now(), {2})'.format(
                StudentToClass.column_family_name(),
                student.id, class_room.id)))
    batch_query.add(
        SimpleStatement('INSERT INTO {0} '
            '("student_id", "created_timestamp", "class_room_id") '
            'VALUES ({1}, now(), {2})'.format(
                ClassToStudent.column_family_name(),
                student.id, class_room.id)))   
    connection.session.execute(batch_query) 
...
... 

Now its working fine and creating all records as per logic.

I want to know that, is there any way to use now() with Model's create method?


Solution

  • What happend:

    default = None
        the default value, can be a value or a callable (no args)
    

    (from https://datastax.github.io/python-driver/api/cassandra/cqlengine/columns.html)

    Your line with

    default=cassandra.util.uuid_from_time(time.time())
    

    got evaluated at startup time and contained a single value as uuid. Try something like this:

    from uuid import uuid1,uuid4
    
    class Comment(Model):
        photo_id = UUID(primary_key=True)
        comment_id = TimeUUID(primary_key=True, default=uuid1) # second primary key component is a clustering key
        comment = Text()
    

    Found here. https://datastax.github.io/python-driver/api/cassandra/cqlengine/query.html

    Another (pure personal) remark - generate the uuid explicit as one often need it afterwards ;)