I am reading data from 2 proto files:
file.proto: this is a wrapper
file2.proto: this has all the columns
file.proto:
syntax = "proto3";
package com.oracle;
import "file2.proto";
option go_package = "github.com/cle/sdk/go_sdk";
// This is the inbound message intended to inform the Oracle of new answers to be persisted
message AnswerUpdateRequest {
Entity entity = 1;
repeated Answer answers = 2;
}
// This is the outbound message informing Oracle subscribers of new answers
message AnswersUpdated {
Entity entity = 1;
repeated Answer answers = 2;
}
file2.proto:
syntax = "proto3";
package com.oracle;
import "google/protobuf/timestamp.proto";
option go_package = "github.com/embroker/oracle/sdk/go_sdk";
message Entity {
Type type = 1;
string id = 2;
enum Type {
ORGANIZATION = 0;
USER = 1;
APPLICATION = 2;
}
}
message AnswerSource {
Type type = 1;
string id = 2;
enum Type {
UNKNOWN = 0;
USER = 1;
DOCUMENT = 2;
EXTERNAL = 3;
}
}
message Answer {
string key = 1;
AnswerSource source = 2;
google.protobuf.Timestamp provided_at = 3;
google.protobuf.Timestamp received_at = 4;
AnswerFieldType type = 5;
Value value = 6;
message Value {
oneof value {
string text = 1;
float decimal = 2;
// ...
}
}
}
enum AnswerFieldType {
ANSWER_FIELD_TYPE_UNSTRUCTURED = 0; // Can be useful for LLM purposes
ANSWER_FIELD_TYPE_TEXT = 1;
ANSWER_FIELD_TYPE_INTEGER = 2;
ANSWER_FIELD_TYPE_BOOLEAN = 3;
ANSWER_FIELD_TYPE_DECIMAL = 4;
ANSWER_FIELD_TYPE_DATE = 5;
ANSWER_FIELD_TYPE_ADDRESS = 6;
}
My python function to map to proto
import file.proto
import file2.proto
def create_answer_update_request(json_data):
data = json_data
answer_update_request = events_pb2.AnswerUpdateRequest()
entity = answer_update_request.entity
entity.type = model_pb2.Entity.Type.Value(data["answerUpdateRequest"]["entity"]["type"])
entity.id = data["answerUpdateRequest"]["entity"]["id"]
for answer_data in data["answerUpdateRequest"]["answers"]:
answer = Answer()
answer.key = answer_data['key']
source = AnswerSource()
source.type = AnswerSource.Type.Value(answer_data['source']['type'])
source.id = answer_data['source']['id']
answer.source.CopyFrom(source)
provided_at_datetime = datetime.fromisoformat(answer_data['provided_at'])
answer.provided_at.FromDatetime(provided_at_datetime)
received_at_datetime = datetime.fromisoformat(answer_data['received_at'])
answer.received_at.FromDatetime(received_at_datetime)
answer.type = AnswerFieldType.Value(f"ANSWER_FIELD_TYPE_{answer_data['type']}")
value = Answer.Value()
value.text = answer_data['value']['text']
answer.value.CopyFrom(value)
answer_update_request.answers.append(answer)
return answer_update_request.SerializeToString()
While deserializing data I am not getting wrapper:
Expected output:
{
"answerUpdateRequest": {
"entity": {
"type": "ORGANIZATION",
"id": "UU12334ID"
},
"answers": [
{
"key": "legal_company_name",
"source": {
"type": "DOCUMENT",
"id": "3ea20f68e73ec | DocumentType.application"
},
"provided_at": "2024-05-02T15:54:15.941988",
"received_at": "2024-05-02T15:54:15.945350",
"type": "TEXT",
"value": {
"text": "Cicne Law, LLC"
}
},
{
"key": "company_website_ind",
"source": {
"type": "DOCUMENT",
"id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
},
"provided_at": "2024-05-02T15:54:15.941988",
"received_at": "2024-05-02T15:54:15.945365",
"type": "BOOLEAN",
"value": {
"text": "Yes"
}
]
}
}
Error: I am not getting "answerUpdateRequest" " in the final output, rest everthing is working for me as expected how to get this?
The Protobuf sources (schemas) you include reference package com.oracle
.
If these are indeed Oracle Protobuf sources, it would be better for you to generate using Oracle's public repo and reference them as 3rd-party sources.
I think your code could be simplified:
"answerUpdateRequest"
but the Message
that includes this is missing"2024-05-02T15:54:15.941988"
)I create a wrapper Message:
foo.proto
:
syntax = "proto3";
package com.oracle;
import "file.proto";
message Foo {
AnswerUpdateRequest answer_update_request = 1;
}
And:
protoc \
--python_out=${PWD} \
--pyi_out=${PWD} \
file.proto \
file2.proto \
foo.proto
If you were to use the following tweaks to the JSON:
data = '''{
"answerUpdateRequest": {
"entity": {
"type": 0,
"id": "UU12334ID"
},
"answers": [
{
"key": "legal_company_name",
"source": {
"type": 2,
"id": "3ea20f68e73ec | DocumentType.application"
},
"provided_at": "2024-05-02T15:54:15.941988Z",
"received_at": "2024-05-02T15:54:15.945350Z",
"type": 1,
"value": {
"text": "Cicne Law, LLC"
}
},
{
"key": "company_website_ind",
"source": {
"type": 2,
"id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
},
"provided_at": "2024-05-02T15:54:15.941988Z",
"received_at": "2024-05-02T15:54:15.945365Z",
"type": 3,
"value": {
"text": "Yes"
}
}
]
}
}
'''
Then:
import json
import foo_pb2
import file_pb2
import file2_pb2
from google.protobuf import json_format
j=json.loads(data)
m1 = foo_pb2.Foo()
json_format.Parse(data,m1)
print(m1)
Yields a protobuf message (!) (entity.type
is omitted because it is the default value 0|ORGANIZATION
):
entity {
id: "UU12334ID"
}
answers {
key: "legal_company_name"
source {
type: DOCUMENT
id: "3ea20f68e73ec | DocumentType.application"
}
provided_at {
seconds: 1714665255
nanos: 941988000
}
received_at {
seconds: 1714665255
nanos: 945350000
}
type: ANSWER_FIELD_TYPE_TEXT
value {
text: "Cicne Law, LLC"
}
}
answers {
key: "company_website_ind"
source {
type: DOCUMENT
id: "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
}
provided_at {
seconds: 1714665255
nanos: 941988000
}
received_at {
seconds: 1714665255
nanos: 945365000
}
type: ANSWER_FIELD_TYPE_BOOLEAN
value {
text: "Yes"
}
}
}
And:
import json
import foo_pb2
import file_pb2
import file2_pb2
from google.protobuf import json_format
m2 = foo_pb2.Foo(
answer_update_request=file_pb2.AnswerUpdateRequest(
entity=file2_pb2.Entity(
type=file2_pb2.Entity.ORGANIZATION,
id="UU12334ID",
),
),
)
a1 = file2_pb2.Answer(
key="legal_company_name",
source=file2_pb2.AnswerSource(
type=file2_pb2.AnswerSource.DOCUMENT,
id="3ea20f68e73ec | DocumentType.application",
),
type=file2_pb2.ANSWER_FIELD_TYPE_TEXT,
value=file2_pb2.Answer.Value(
text="Cicne Law, LLC",
),
)
a1.provided_at.FromJsonString("2024-05-02T15:54:15.941988Z"),
a1.received_at.FromJsonString("2024-05-02T15:54:15.945350Z"),
a2 = file2_pb2.Answer(
key="company_websited_ind",
source=file2_pb2.AnswerSource(
type=file2_pb2.AnswerSource.DOCUMENT,
id="3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application",
),
type=file2_pb2.ANSWER_FIELD_TYPE_BOOLEAN,
value=file2_pb2.Answer.Value(
text="Yes",
),
)
a2.provided_at.FromJsonString("2024-05-02T15:54:15.941988Z"),
a2.received_at.FromJsonString("2024-05-02T15:54:15.945350Z"),
m2.answer_update_request.answers.extend([
a1,
a2,
])
print(json_format.MessageToJson(m2,always_print_fields_with_no_presence=True))
Yields the JSON:
{
"answerUpdateRequest": {
"entity": {
"id": "UU12334ID",
"type": "ORGANIZATION"
},
"answers": [
{
"key": "legal_company_name",
"source": {
"type": "DOCUMENT",
"id": "3ea20f68e73ec | DocumentType.application"
},
"providedAt": "2024-05-02T15:54:15.941988Z",
"receivedAt": "2024-05-02T15:54:15.945350Z",
"type": "ANSWER_FIELD_TYPE_TEXT",
"value": {
"text": "Cicne Law, LLC"
}
},
{
"key": "company_websited_ind",
"source": {
"type": "DOCUMENT",
"id": "3ea20440-83fb-43c0-b409-1dd8f68e73ec | DocumentType.application"
},
"providedAt": "2024-05-02T15:54:15.941988Z",
"receivedAt": "2024-05-02T15:54:15.945350Z",
"type": "ANSWER_FIELD_TYPE_BOOLEAN",
"value": {
"text": "Yes"
}
}
]
}
}
NOTE You'd need to revise AnswerFieldType
to get values of TEXT
instead of ANSWER_FIELD_TYPE_TEXT
.