Search code examples
python-3.xgoogle-cloud-platformbigtablegoogle-cloud-bigtable

How to add multiple cells to a row in a table in bigtable?


I've been trying so many different github project examples and read the bigtable api guide multiple times. I can't figure out why it doesn't allow me to set multiple cells in a row. In the examples shown they only have examples with one value per row.

I also used the cbt commands to see if the column families I added are in the table and they are in the table but when I used the count command I see no entries.

I've used both the mutate_rows command for the table and the commit command on the row but neither add the row. I also do realize that the row commit command is literally just:

table.mutate_rows([row])

So, I cant seem to understand what I'm doing wrong at all.

import base64
import json
import ast
import datetime

from google.cloud import bigtable
from google.cloud.bigtable import column_family
from google.cloud.bigtable import row_filters


def function(event, context):

    data = base64.b64decode(event['data']).decode('utf-8')
    data = ast.literal_eval(data)

    print(type(data))
    print(data)

    # Create a Cloud Bigtable client.
    client = bigtable.Client(project=project_id, admin=True)

    # Connect to an existing Cloud Bigtable instance.
    instance = client.instance(instance_id)

    print('opening the {} table.'.format(table_id))
    table = instance.table(table_id)

    # [START writing_rows]

    max_versions_rule = column_family.MaxVersionsGCRule(2)
    column_family_id = 'states'.encode('utf-8')
    column_families = {column_family_id: max_versions_rule}
    if not table.exists():
        table.create(column_families=column_families)
    else:
        print("Table {} already exists.".format(table_id))


    row_key = (data['serial_num'] + str(datetime.datetime.utcnow())).encode('utf-8')

    row_obj = table.row(row_key)

    for key, value in data.items():
        row_obj.set_cell(
            column_family_id, 
            str(key).encode('utf-8'), 
            str(value).encode('utf-8'), 
            timestamp=datetime.datetime.utcnow()
        )

    print(row_obj)
    print(str(row_obj))
    print(row_obj.table)
    print(row_obj.row_key)

    row_obj.commit()

    '''
    table.mutate_rows([row_obj])
    '''

    print('Inserted/updated data.')

    # [END writing_rows]

    # [START creating_a_filter]

    # Create a filter to only retrieve the most recent version of the cell
    # for each column across entire row.
    row_filter = row_filters.CellsColumnLimitFilter(1)

    # [END creating_a_filter]

    # [START read_rows]

    row = table.read_row(row_key, row_filter)
    print(row)
    for key, value in data.items():
        cell_values = row.cells[column_family_id][column][0]
        print('{} = {} should be {}'.format(key, cell_values, value))

    # [END read_rows]

Solution

  • This is the solution I ended up with

    import base64
    import json
    import ast
    import datetime
    
    from google.cloud import bigtable
    from google.cloud.bigtable import column_family
    from google.cloud.bigtable import row_filters
    
    
    def hello_pubsub(event, context):
    
        data = base64.b64decode(event['data']).decode('utf-8')
        data = ast.literal_eval(data)
    
        print(type(data))
        print(data)
    
        # Create a Cloud Bigtable client.
        client = bigtable.Client(project=project_id, admin=True)
    
        # Connect to an existing Cloud Bigtable instance.
        instance = client.instance(instance_id)
    
        print('opening the {} table.'.format(table_id))
        table = instance.table(table_id)
    
        # [START writing_rows]
    
        max_versions_rule = column_family.MaxVersionsGCRule(2)
        column_family_id = 'state'
        column_families = {column_family_id: max_versions_rule}
        if not table.exists():
            table.create(column_families=column_families)
        else:
            print("Table {} already exists.".format(table_id))
    
    
        row_key = (data['serial_num'] + " " + str(datetime.datetime.utcnow())).encode('utf-8')
    
        rows = [] 
    
        for key, value in data.items():
            row = table.row(row_key)
            row.set_cell(column_family_id, 
                str(key).encode('utf-8'), 
                str(value), 
                timestamp=datetime.datetime.utcnow())
            rows.append(row)
    
    
        table.mutate_rows(rows)
    
        print('Inserted/updated data.')
    
        # [END writing_rows]
    
        # [START creating_a_filter]
    
        # Create a filter to only retrieve the most recent version of the cell
        # for each column across entire row.
        row_filter = row_filters.CellsColumnLimitFilter(1)
    
        # [END creating_a_filter]
    
        # [START read_rows]
    
        partial_rows = table.read_row(row_key, row_filter)
        print(partial_rows.cells)
        for key, value in data.items():
            cell_value = partial_rows.cell_value(column_family_id, str(key).encode('utf-8'))
            print('{} = {} should be {}'.format(key, cell_value, value))
    
        # [END read_rows]