Search code examples
pythonplotlykedro

How to show plotly chart in kedro?


I am trying to use data science tool kedro according to this tutorial.
I followed the instruction(write config.yaml, node.py and pipeline.py etc) and do exactly the same as the documentation) and could run kedro run successfully.
And next step, I tried kedro viz and could show the pipelines but I cannot see plotly chart.
Here is the result of the visualization. Please see the left pane. I can see Shuttle Passenger Capacity Plot but it is not activated and plots does not show up.
enter image description here

Also, I set conf/base/catalog.yaml to output json file to load for plotly but I cannot see any in 08_reporting directory. This could be the cause of the issue?

enter image description here

Update

nodes.py and pipeline.py is located here. enter image description here

nodes.py

import pandas as pd


def _is_true(x: pd.Series) -> pd.Series:
    return x == "t"


def _parse_percentage(x: pd.Series) -> pd.Series:
    x = x.str.replace("%", "")
    x = x.astype(float) / 100
    return x


def _parse_money(x: pd.Series) -> pd.Series:
    x = x.str.replace("$", "").str.replace(",", "")
    x = x.astype(float)
    return x


def preprocess_companies(companies: pd.DataFrame) -> pd.DataFrame:
    """Preprocesses the data for companies.

    Args:
        companies: Raw data.
    Returns:
        Preprocessed data, with `company_rating` converted to a float and
        `iata_approved` converted to boolean.
    """
    companies["iata_approved"] = _is_true(companies["iata_approved"])
    companies["company_rating"] = _parse_percentage(companies["company_rating"])
    return companies


def preprocess_shuttles(shuttles: pd.DataFrame) -> pd.DataFrame:
    """Preprocesses the data for shuttles.

    Args:
        shuttles: Raw data.
    Returns:
        Preprocessed data, with `price` converted to a float and `d_check_complete`,
        `moon_clearance_complete` converted to boolean.
    """
    shuttles["d_check_complete"] = _is_true(shuttles["d_check_complete"])
    shuttles["moon_clearance_complete"] = _is_true(shuttles["moon_clearance_complete"])
    shuttles["price"] = _parse_money(shuttles["price"])
    return shuttles


def create_model_input_table(
        shuttles: pd.DataFrame, companies: pd.DataFrame, reviews: pd.DataFrame
) -> pd.DataFrame:
    """Combines all data to create a model input table.

    Args:
        shuttles: Preprocessed data for shuttles.
        companies: Preprocessed data for companies.
        reviews: Raw data for reviews.
    Returns:
        Model input table.

    """
    rated_shuttles = shuttles.merge(reviews, left_on="id", right_on="shuttle_id")
    model_input_table = rated_shuttles.merge(
        companies, left_on="company_id", right_on="id"
    )
    model_input_table = model_input_table.dropna()
    return model_input_table


import plotly.express as px
import pandas as pd
import plotly.graph_objects as go


# the below function uses plotly.express
def compare_passenger_capacity(preprocessed_shuttles: pd.DataFrame):
    fig = px.bar(data_frame=preprocessed_shuttles.groupby(["shuttle_type"]).mean().reset_index(), x="shuttle_type", y="passenger_capacity", )
    return fig

pipeline.py

from kedro.pipeline import Pipeline, node
from kedro.pipeline.modular_pipeline import pipeline

from .nodes import create_model_input_table, preprocess_companies, preprocess_shuttles, compare_passenger_capacity


def create_pipeline(**kwargs) -> Pipeline:
    return pipeline(
        [
            node(
                func=preprocess_companies,
                inputs="companies",
                outputs="preprocessed_companies",
                name="preprocess_companies_node",
            ),
            node(
                func=preprocess_shuttles,
                inputs="shuttles",
                outputs="preprocessed_shuttles",
                name="preprocess_shuttles_node",
            ),
            node(
                func=create_model_input_table,
                inputs=["preprocessed_shuttles", "preprocessed_companies", "reviews"],
                outputs="model_input_table",
                name="create_model_input_table_node",
            ),
            node(
                func=compare_passenger_capacity,
                inputs="preprocessed_shuttles",
                outputs="shuttle_passenger_capacity_plot",
            ),
        ],
        namespace="data_processing",
        inputs=["companies", "shuttles", "reviews"],
        outputs="model_input_table",
    )

Reference: https://kedro.readthedocs.io/en/stable/tutorial/visualise_pipeline.html


Solution

  • I passed wrong arguments to pipeline.
    After I deleted the below three lines of code, it worked.

    namespace="data_processing",
    inputs=["companies", "shuttles", "reviews"],
    outputs="model_input_table",
    

    Here is corrected code:

    pipeline.py

    from .nodes import create_model_input_table, preprocess_companies, preprocess_shuttles, compare_passenger_capacity
    
    
    def create_pipeline(**kwargs) -> Pipeline:
        return pipeline(
            [
                node(
                    func=preprocess_companies,
                    inputs="companies",
                    outputs="preprocessed_companies",
                    name="preprocess_companies_node",
                ),
                node(
                    func=preprocess_shuttles,
                    inputs="shuttles",
                    outputs="preprocessed_shuttles",
                    name="preprocess_shuttles_node",
                ),
                node(
                    func=create_model_input_table,
                    inputs=["preprocessed_shuttles", "preprocessed_companies", "reviews"],
                    outputs="model_input_table",
                    name="create_model_input_table_node",
                ),
                node(
                    func=compare_passenger_capacity,
                    inputs="preprocessed_shuttles",
                    outputs="shuttle_passenger_capacity_plot",
                ),
            ],
        )
    

    enter image description here