Rahul Dave
01/30/2023, 6:24 PM@asset
def train_dataset():
train_data : str = "data/train.csv"
return read_data(train_data)
@asset
def test_dataset():
test_data : str = "data/test.csv"
return read_data(test_data)
input_datasets = [train_dataset, test_dataset]
encoder_op = define_dagstermill_op(
name="encoder_op",
notebook_path=file_relative_path(__file__, "../notebooks/encoder.ipynb"),
output_notebook_name="output_encoder",
outs={"encoders": Out(dict)},
ins={"df_train": In(pd.DataFrame), "df_test": In(pd.DataFrame)}
)
@graph(out = {'encoders': GraphOut()},
ins = {'df_train': GraphIn(), 'df_test': GraphIn()}
)
def encoder_graph(df_train, df_test):
encoders, _ = encoder_op(df_train, df_test)
return encoders
local_encoder_job = encoder_graph.to_job(
name="local_encoder_job",
resource_defs={
"output_notebook_io_manager": local_output_notebook_io_manager,
},
input_values={'df_train': AssetKey("train_dataset"), 'df_test': AssetKey("test_dataset")}
)