Scenario: : I am trying to push some selected items from source table to destination table which are in AWS dynamodb using python boto3 module. For this, I am getting the interested rows from source table, but unable to put the same items in the destination table.
NOTE: : Here, it has Decimal values - Handling them successfully. But, it also has the attribute which has dictionary values. where I am failing to push those entries.
Here is the code.
import boto3
import argparse
# import simplejson as json
import json
from decimal import *
from boto3.dynamodb.conditions import Key, Attr
class DecimalEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Decimal):
return str(obj)
return json.JSONEncoder.default(self, obj)
def migrate_data(region, sourcetable, desttable):
"""
:param region: region of AWS
:param sourcetable: source table to collect the data
:param desttable: dest backup table to move the data
:return: Move the selected data to dest and delete from source table
"""
try:
dynamodb = boto3.resource('dynamodb', region_name=region)
pdynamodb = boto3.client('dynamodb', region_name=region)
table = dynamodb.Table(sourcetable)
sresponse = table.scan(
FilterExpression=Attr('expirationDate').contains('2021-') & Attr('entityType').eq('prospect')
)
data = sresponse['Items']
print("## Quantity of 2021 records: {}".format(len(data)))
## Move to destination:
print("## Uploading the selected data from {} to {}".format(sourcetable, desttable))
for item in data:
jdata = json.dumps(item, cls=DecimalEncoder)
print("## Decimal Encoded data type is: {} and data : {}".format(type(jdata), jdata))
jitem = json.loads(jdata)
print("## Data loads must be in dict now: {}".format(type(jitem)))
item = {
'expirationDate': {'S': jitem['expirationDate']},
'entityType': {'S': jitem['entityType']},
'findProspectIndex': {'S': jitem['findProspectIndex']},
'apr': {'S': jitem['apr']},
'createdDate': {'S': jitem['createdDate']},
'marketingCampaignId': {'S': jitem['marketingCampaignId']},
'productLine': {'S': jitem['productLine']},
'status': {'S': jitem['status']},
'offerAmount': {'S': jitem['offerAmount']},
'pqCode': {'S': jitem['pqCode']},
'prospectOfferCode': {'S': jitem['prospectOfferCode']},
'customerType': {'S': jitem['customerType']},
'id': {'S': jitem['id']},
'updatedDate': {'S': jitem['updatedDate']},
# 'personalInfo': {'S': jitem['personalInfo']}
'personalInfo': {
'firstName': {'S': jitem['personalInfo']['firstName']},
'lastName': {'S': jitem['personalInfo']['lastName']},
'phoneNumber': {'S': jitem['personalInfo']['phoneNumber']},
'last4Ssn': {'S': jitem['personalInfo']['last4Ssn']},
'residentialAddress': {
'zip': {'S': jitem['personalInfo']['residentialAddress']['zip']},
'addressLine1': {'S': jitem['personalInfo']['residentialAddress']['addressLine1']},
'addressLine2': {'S': jitem['personalInfo']['residentialAddress']['addressLine2']},
'state': {'S': jitem['personalInfo']['residentialAddress']['state']},
'city': {'S': jitem['personalInfo']['residentialAddress']['city']}
}
}
}
print("## Entry item type is : {} and data: {}".format(type(item), item))
dresponse = pdynamodb.put_item(
TableName=desttable,
Item=item
)
print("## Type of data : {}".format(type(dresponse)))
print("## Quantity of 2021 records moved - {}".format(len(dresponse)))
## Remove the records from source table
for item in data:
print("## Deleting the item of id : {}".format(item['id']))
sresponse.delete_item(
Key={'id': item['id']}
)
except Exception as E:
print("## Caught an exception while deleting the data.. {}".format(E))
if __name__ == '__main__':
print("################### MIGRATE DATA from DynamoDB table to other table ####################\n")
parser = argparse.ArgumentParser(description="Input the source and destincation tables to migrate data")
parser.add_argument("-r", "--region", dest="region", action="store", help="provide region name",
default="us-east-1", required=False)
parser.add_argument("-st", "--sourcetable", dest="sourcetable", action="store", help="provide source table name",
required=True)
parser.add_argument("-dt", "--desttable", dest="desttable", action="store", help="provide destination table name",
required=True)
args = parser.parse_args()
migrate_data(args.region, args.sourcetable, args.desttable)
ERROR:
## Decimal Encoded data type is: <class 'str'> and data : {"expirationDate": "2021-11-04T00:00:00.000Z", "entityType": "prospect", "findProspectIndex": "curran_1_6904_83202", "apr": "26.99", "createdDate": "2021-10-29T11:01:52.172Z", "marketingCampaignId": "NCPQCARD20210913", "productLine": "credit card", "status": "new", "offerAmount": "0", "pqCode": "347384022", "prospectOfferCode": "1000100010003000", "customerType": "NC", "id": "e8b3d41d-ac0b-4790-9147-ee5b409b9a13", "updatedDate": "2021-10-29T11:01:52.172Z", "personalInfo": {"firstName": "RONALD_1", "lastName": "curran_1", "phoneNumber": "2083176132.0", "last4Ssn": "6904", "residentialAddress": {"zip": "83202", "addressLine1": "5085 GALENA ST", "addressLine2": "", "state": "ID", "city": "POCATELLO"}}}
## Data loads must be in dict now: <class 'dict'>
## Entry item type is : <class 'dict'> and data: {'expirationDate': {'S': '2021-11-04T00:00:00.000Z'}, 'entityType': {'S': 'prospect'}, 'findProspectIndex': {'S': 'curran_1_6904_83202'}, 'apr': {'S': '26.99'}, 'createdDate': {'S': '2021-10-29T11:01:52.172Z'}, 'marketingCampaignId': {'S': 'NCPQCARD20210913'}, 'productLine': {'S': 'credit card'}, 'status': {'S': 'new'}, 'offerAmount': {'S': '0'}, 'pqCode': {'S': '347384022'}, 'prospectOfferCode': {'S': '1000100010003000'}, 'customerType': {'S': 'NC'}, 'id': {'S': 'e8b3d41d-ac0b-4790-9147-ee5b409b9a13'}, 'updatedDate': {'S': '2021-10-29T11:01:52.172Z'}, 'personalInfo': {'firstName': {'S': 'RONALD_1'}, 'lastName': {'S': 'curran_1'}, 'phoneNumber': {'S': '2083176132.0'}, 'last4Ssn': {'S': '6904'}, 'residentialAddress': {'zip': {'S': '83202'}, 'addressLine1': {'S': '5085 GALENA ST'}, 'addressLine2': {'S': ''}, 'state': {'S': 'ID'}, 'city': {'S': 'POCATELLO'}}}}
## Caught an exception while deleting the data.. Parameter validation failed:
Unknown parameter in Item.personalInfo: "firstName", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Unknown parameter in Item.personalInfo: "lastName", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Unknown parameter in Item.personalInfo: "phoneNumber", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Unknown parameter in Item.personalInfo: "last4Ssn", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Unknown parameter in Item.personalInfo: "residentialAddress", must be one of: S, N, B, SS, NS, BS, M, L, NULL, BOOL
Experts:, Please help me out here with a solution to handle the dict of dict values pushing to destination table.
You do not declare personalInfo as a datatype:
'personalInfo': {
'firstName': {'S': jitem['personalInfo']['firstName']},
'lastName': {'S': jitem['personalInfo']['lastName']},
'phoneNumber': {'S': jitem['personalInfo']['phoneNumber']},
'last4Ssn': {'S': jitem['personalInfo']['last4Ssn']},
'residentialAddress': {
'zip': {'S': jitem['personalInfo']['residentialAddress']['zip']},
'addressLine1': {'S': jitem['personalInfo']['residentialAddress']['addressLine1']},
'addressLine2': {'S': jitem['personalInfo']['residentialAddress']['addressLine2']},
'state': {'S': jitem['personalInfo']['residentialAddress']['state']},
'city': {'S': jitem['personalInfo']['residentialAddress']['city']}
}
It should be declared as a Map type which is the DynamoDB data type for dicts and requires a prefix of M
:
'personalInfo': { 'M': {
'firstName': {'S': jitem['personalInfo']['firstName']},
'lastName': {'S': jitem['personalInfo']['lastName']},
'phoneNumber': {'S': jitem['personalInfo']['phoneNumber']},
'last4Ssn': {'S': jitem['personalInfo']['last4Ssn']},
'residentialAddress': {
'zip': {'S': jitem['personalInfo']['residentialAddress']['zip']},
'addressLine1': {'S': jitem['personalInfo']['residentialAddress']['addressLine1']},
'addressLine2': {'S': jitem['personalInfo']['residentialAddress']['addressLine2']},
'state': {'S': jitem['personalInfo']['residentialAddress']['state']},
'city': {'S': jitem['personalInfo']['residentialAddress']['city']}
}
}
I'm extremely curious why you begin using the Resource Client while reading and then switch to the Low Level Client to write, creating extra work for yourself having to add the types at the risk of exceptions or data corruption. I strongly advise that you use the same client for both and avoid any of the conversions you are attempting.