Search code examples
pythonpython-3.xprotocol-buffersprotobuf-net

Time data '2024-05' does not match format '%Y-%m-%dT%H:%M:%S' (Protobuff)


I am creating a protobuffer from a json and json strcture loooks like this:

    {
      "answerUpdateRequest": {
        "entity": {
          "type": "ORGANIZATION",
          "id": "UU1234321234ID"
        },
        "answers": [
          {
            "key": "legal_company_name",
            "source": {
              "type": "DOCUMENT",
              "id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
            },
            "provided_at": "2024-05-02T15:54:15.941988",
            "received_at": "2024-05-02T15:54:15.945350",
            "type": "TEXT",
            "value": {
              "text": "Ciccone Law, LLC"
            }
          },
          {
            "key": "company_website_ind",
            "source": {
              "type": "DOCUMENT",
              "id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
            },
            "provided_at": "2024-05-02T15:54:15.941988",
            "received_at": "2024-05-02T15:54:15.945365",
            "type": "BOOLEAN",
            "value": {
              "text": "Yes"
            }
          },
          {
            "key": "company_webiste",
            "source": {
              "type": "DOCUMENT",
              "id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
            },
            "provided_at": "2024-05-02T15:54:15.941988",
            "received_at": "2024-05-02T15:54:15.945388",
            "type": "TEXT",
            "value": {
              "text": "www.Justice-Insight.com"
            }
          }
          
        ]
      },
      "documentKey": "3ea20440-83fb-43c0-b409-1dd8f68e73ec",
      "applicationId": "1343245432",
      "activityId": "1111"
    }
    def create_answer_update_request(json_data):
        data = json_data
        print("Data is "+str(data))
        answer_update_request = events_pb2.AnswerUpdateRequest()
        answer_update_request.entity.type = model_pb2.Entity.Type.Value(data["answerUpdateRequest"]["entity"]["type"])
        answer_update_request.entity.id = data["answerUpdateRequest"]["entity"]["id"]
    
        # Convert answers to protobuf format
        for answer_data in data["answerUpdateRequest"]["answers"]:
            answer = answer_update_request.answers.add()
            answer.key = answer_data["key"]
            answer.source.type = answer_data["source"]["type"]
            answer.source.id = answer_data["source"]["id"]
    
            # Handle provided_at and received_at fields
            provided_at_str = answer_data["provided_at"].split('.')[0]
            print(provided_at_str)
    
            provided_at_datetime = datetime.strptime(provided_at_str, "%Y-%m-%dT%H:%M:%S")
    
            provided_at = Timestamp()
            provided_at.FromJsonString(provided_at_datetime.isoformat())
            print("##########provided_at############")
            answer.provided_at.CopyFrom(provided_at_datetime.isoformat())
    
            received_at_str = answer_data["received_at"]
            if len(received_at_str) == 7:  # Only contains 'YYYY-MM'
                received_at_str += "-01T00:00:00"  # Append default day and time
            received_at = Timestamp()
            received_at.FromJsonString(received_at_str)
            answer.received_at.CopyFrom(received_at)
    
            # Ensure the type value is valid and convert it
            if answer_data["type"] not in events_pb2.Answer.Type.keys():
                raise ValueError(f"Invalid answer type: {answer_data['type']}")
            answer.type = events_pb2.Answer.Type.Value(answer_data["type"])
    
            # Handle different value types based on your schema
            if answer_data["type"] == "TEXT":
                answer.value.text = answer_data["value"]["text"]
            elif answer_data["type"] == "BOOLEAN":
                answer.value.boolean = answer_data["value"]["boolean"]
            # Add handling for other types as needed
    
        # Serialize message
        serialized_data = answer_update_request.SerializeToString()
    
        return serialized_data

I have tried to print provided_at_str and it is printing "2024-05-16T22:20:28"

Error: 
   tt, fraction, gmtoff_fraction = _strptime(data_string, format)
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/_strptime.py", line 349, in _strptime
    raise ValueError("time data %r does not match format %r" %
ValueError: time data '2024-05' does not match format '%Y-%m-%dT%H:%M:%S'

Solution

  • Ugh datetimes :-)

    I think your dates aren't valid RFC-3339.

    Without the timezone offset, I think they should terminate Z

    If you were to append Z to your dates, you can then FromJsonString:

    You don't include your Protobuf source:

    foo.proto:

    syntax = "proto3";
    
    import "google/protobuf/timestamp.proto";
    
    message Foo {
        google.protobuf.Timestamp provided_at = 1;
        google.protobuf.Timestamp received_at = 2;
    }
    
    def add_Z(s: str) -> str:
        return f"{s}Z"
    
    if __name__ == "__main__":
        data = json.loads(JSON)
    
        foo = foo_pb2.Foo()
        for answer in data["answerUpdateRequest"]["answers"]:    
            foo.provided_at.FromJsonString(add_Z(answer["provided_at"]))
            foo.received_at.FromJsonString(add_Z(answer["received_at"]))
            print(foo)
    

    Yields:

    provided_at {
      seconds: 1714665255
      nanos: 941988000
    }
    received_at {
      seconds: 1714665255
      nanos: 945350000
    }
    
    provided_at {
      seconds: 1714665255
      nanos: 941988000
    }
    received_at {
      seconds: 1714665255
      nanos: 945365000
    }
    
    provided_at {
      seconds: 1714665255
      nanos: 941988000
    }
    received_at {
      seconds: 1714665255
      nanos: 945388000
    }