Olivier Dupuis
04/07/2022, 3:09 PM@op(
required_resource_keys = {"novacene_client"}
)
def store_ml_enrichment_files(context, df_ml_enrichment_files):
s3 = boto3.resource('s3')
for index, row in df_ml_enrichment_files.iterrows():
# Read csv as pandas
df_ml_enrichment_file = context.resources.novacene_client.get_file(row['file_path'])
# Extract date from file name
file_date = row['name'].split("_")[2].split(".")[0][0 : 8]
# Save df as csv in S3
csv_buffer = StringIO()
df_ml_enrichment_file.to_csv(csv_buffer, index = False)
s3.Object('discursus-io', 'sources/ml/' + file_date + '/ml_enriched_' + row['name']).put(Body=csv_buffer.getvalue())
# Materialize and yield asset?
return None
Thanks for your help! 🙂Zach
04/07/2022, 3:53 PM...
yield AssetMaterialization("materialization1")
yield AssetMaterialization("materialization2")
...
or even
for i in range(5):
yield AssetMaterialization(f"materialization_{i})
Olivier Dupuis
04/07/2022, 4:09 PMZach
04/07/2022, 4:10 PM