https://dagster.io/ logo
#ask-community
Title
# ask-community
r

Rahul Dave

01/30/2023, 6:24 PM
I do this
Copy code
@asset
def train_dataset():
    train_data : str = "data/train.csv"
    return read_data(train_data)

@asset
def test_dataset():
    test_data : str = "data/test.csv"
    return read_data(test_data)

input_datasets = [train_dataset, test_dataset]

encoder_op = define_dagstermill_op(
    name="encoder_op",
    notebook_path=file_relative_path(__file__, "../notebooks/encoder.ipynb"),
    output_notebook_name="output_encoder",
    outs={"encoders": Out(dict)},
    ins={"df_train": In(pd.DataFrame), "df_test": In(pd.DataFrame)}
)

@graph(out = {'encoders': GraphOut()},
    ins = {'df_train': GraphIn(), 'df_test': GraphIn()}
)
def encoder_graph(df_train, df_test):
    encoders, _ = encoder_op(df_train, df_test)
    return encoders

local_encoder_job = encoder_graph.to_job(
    name="local_encoder_job",
    resource_defs={
        "output_notebook_io_manager": local_output_notebook_io_manager,
    },
    input_values={'df_train': AssetKey("train_dataset"), 'df_test': AssetKey("test_dataset")}
)