Search code examples
pythongoogle-cloud-platformgoogle-cloud-pubsubgoogle-cloud-automl

Google Automl 400 error location ID and Field:Name is invalid


I am currently writing a function that will listen to GCP pubsub for message that will trigger the training function. The message itself contains the operation id of importing dataset, the function will receive this operation id and will keep looping until importing is complete after that, it will train the model and loop it until it completely due to I need to mark the start time and the end time to the database. My question is in my code I already specified the location ID and the project ID and it give me this 400 error of field name and name is invalid. I try the same train model setting with the sample code that provide from GCP website.https://cloud.google.com/vision/automl/docs/train?hl=zh-TW and it works fine. Here is my code. Thanks for every one for helping.

Backend function

from datetime import datetime
import time
from google.cloud import pubsub_v1, automl, datastore
from app import celeryConfig
from config import Config
import requests


def train_model():
    project_id = 'XXXX'
    topic_name = "XXXX"
    timeout = 20
    subscription_name = "XXXX"
    subscriber = pubsub_v1.SubscriberClient.from_service_account_json("./gerald-automl-test-ccf53bf513b7.json")
    subscription_path = subscriber.subscription_path(project_id, subscription_name)

    def callback(message):
        client = automl.AutoMlClient.from_service_account_json("XXXX.json")
        project_location = client.location_path('XXXXX', "us-central1")
        flow_control = pubsub_v1.types.FlowControl(max_messages=5)
        print("Received message: {}".format(message))
        pub_sub_message = str(message.data, encoding="utf-8")  # containing model_id,opreation_id & dataset_id
        message.ack()
        extract_from_data = [e.strip().replace("'", "") for e in pub_sub_message.strip("[]").split(",")]
        dataset_id = extract_from_data[0]
        model_name = extract_from_data[1]
        opreation_id = extract_from_data[2]
        print(opreation_id)


        while True:
            response = client.transport._operations_client.get_operation(
                operation_full_id
            )

            data_of_response = str(response)

            if response.done != True:
                print("not complete importing data")
                response = client.transport._operations_client.get_operation(
                    operation_full_id
                )
                print(str(response))
                time.sleep(10)

            elif response.done == True:
                print("start train model")
                dataset_id = dataset_id
                display_name = model_name
                location = 'XXXX'
                project_id='XXXX'

                client = automl.AutoMlClient.from_service_account_json("XXXXXX")

                # A resource that represents Google Cloud Platform location.
                parent = client.location_path('XXXX', 'XXXXX')

                metadata = automl.types.ImageClassificationModelMetadata(
                    train_budget_milli_node_hours=1000,
                    model_type='mobile-high-accuracy-1'
                )
                # Leave model unset to use the default base model provided by Google
                model = automl.types.Model(
                    display_name=display_name,
                    dataset_id=dataset_id,
                    image_classification_model_metadata=metadata,
                )

                datastore_client = datastore.Client.from_service_account_json(
                    "./XXXX.json")

                response = client.create_model(parent, model)

                opreation_full = str(response.operation.name).split('/')
                opreation_id_train_model = opreation_full[5]

                kind = 'job'
                data_key = datastore_client.key(kind)
                new_post = datastore.Entity(key=data_key)
                new_post['action_date'] = datetime.today().strftime('%Y-%m-%d %H:%M:%S')
                new_post['job_type'] = "train_model"
                new_post['status'] = 'training'
                datastore_client.put(new_post)
                create_model_opreation = client.transport._operations_client.get_operation(
                    opreation_id_train_model)

                if create_model_opreation.done == True:
                    print('complete training model')
                    kind = 'job'
                    data_key = datastore_client.key(kind)
                    new_post = datastore.Entity(key=data_key)
                    new_post['job_type'] = "train_model"
                    new_post['status'] = ['complete_training']
                    new_post['finish_date'] = datetime.today().strftime('%Y-%m-%d %H:%M')
                    new_post['description'] = 'from train model ' + dataset_id
                    datastore_client.put(new_post)
                    break
                else:
                    print("not complete traning mode")
                    response = client.transport._operations_client.get_operation(
                        opreation_id
                    )
                    print(str(response))
                    time.sleep(10)



    streaming_pull_future = subscriber.subscribe(
        subscription_path, callback=callback
    )
    print("Listening for messages on {}..\n".format(subscription_path))

    # Wrap subscriber in a 'with' block to automatically call close() when done.
    with subscriber:
        try:
            # When `timeout` is not set, result() will block indefinitely,
            # unless an exception is encountered first.
            streaming_pull_future.result(timeout=timeout)
        except:  # noqa
            streaming_pull_future.cancel()

Solution

  • Could you add the stack trace of the error you get? It will help narrowing down the possibilities. As per my understanding the error comes from one of those lines:

    response = client.create_model(parent, model)
    opreation_full = str(response.operation.name).split('/')
    

    I've also realized that in the google documentation the libraries are not updated yet, in the python Automl Client Library the module name is automl_v1 now. You can first try using this and see if this is a version issue:

    from google.cloud import automl_v1