Tobias Macey
05/13/2020, 7:26 PMTobias Macey
05/13/2020, 7:26 PM@composite_solid(
name='open_edx_forum_data_export',
description='Export data for edX forums from Mongo database',
config={
'edx_mongodb_host': Field(
String,
is_required=True,
description='Resolvable host address of MongoDB master'
),
'edx_mongodb_port': Field(
Int,
is_required=False,
default_value=27017, # noqa WPS4232
description='TCP port number used to connect to MongoDB server'
),
'edx_mongodb_username': Field(
String,
is_required=False,
default_value='',
description='Username for account with permissions to read forum database'
),
'edx_mongodb_password': Field(
String,
is_required=False,
default_value='',
description='Password for account with permissions to read forum database'
),
'edx_mongodb_database_name': Field(
String,
is_required=True,
description='Name of database that contains forum data for Open edX installation'
),
'edx_forum_data_folder_name': Field(
String,
is_required=False,
default_value='forum',
description=('Name of the directory to create within the results directory for containing the exported '
'mongo database')
)
},
required_resource_keys={'results_dir'},
input_defs=[
InputDefinition(
name='edx_course_ids',
dagster_type=List[String],
description='List of course IDs active on Open edX installation'
)
],
output_defs=[
OutputDefinition(
name='edx_forum_data',
dagster_type=String,
description='Open edX forum data exported from Mongo database'
)
]
)
def export_edx_forum_data(context: SolidExecutionContext) -> Sring:
"""Run mongodump for the database that contains Open edX forum submissions to be consumed by Institutional Research.
:param context: Dagster execution context for propagaint configuration data
:type context: SolidExecutionContext
:returns: Path to exported database contents
:rtype: String
"""
forum_data_path = context.resources.results_dir.joinpath(context.config['edx_forum_data_folder_name'])
command_array = ['/usr/bin/mongodump',
'--host',
context.config['edx_mongodb_host'],
'--port',
context.config['edx_mongodb_port'],
'--db',
context.config['edx_mongodb_database_name'],
'--authenticationDatabase',
'admin',
'--out',
str(forum_data_path)]
if password := context.config['edx_mongodb_password']:
command_array.extend(['--password', password])
if username := context.config['edx_mongodb_username']:
command_array.extend(['--username', username])
bash_command_solid(' '.join(command_string))
yield Output(
str(forum_data_path),
'edx_forum_data'
)
max
05/13/2020, 7:48 PMconfig_fn
arg to composite_solid
max
05/13/2020, 7:49 PMmax
05/13/2020, 7:49 PMTobias Macey
05/13/2020, 7:51 PMTobias Macey
05/13/2020, 7:53 PM@solid(
name='edx_forum_build_mongo_dump_command',
description='Solid to build the command line string for executing mongodump against the Open edX forum database',
required_resource_keys={'results_dir'},
config={
'edx_mongodb_host': Field(
String,
is_required=True,
description='Resolvable host address of MongoDB master'
),
'edx_mongodb_port': Field(
Int,
is_required=False,
default_value=27017, # noqa WPS4232
description='TCP port number used to connect to MongoDB server'
),
'edx_mongodb_username': Field(
String,
is_required=False,
default_value='',
description='Username for account with permissions to read forum database'
),
'edx_mongodb_password': Field(
String,
is_required=False,
default_value='',
description='Password for account with permissions to read forum database'
),
'edx_mongodb_database_name': Field(
String,
is_required=True,
description='Name of database that contains forum data for Open edX installation'
),
'edx_forum_data_folder_name': Field(
String,
is_required=False,
default_value='forum',
description=('Name of the directory to create within the results directory for containing the exported '
'mongo database')
)
},
output_defs=[
OutputDefinition(
name='edx_forum_mongodump_command',
dagster_type=String,
description='Command line string for executing mongodump'
),
OutputDefinition(
name='edx_forum_data_directory',
dagster_type=String,
description='Path to exported forum data generated by mongodump command'
)
]
)
def edx_forum_mongo_build_dump_command(context: SolidExSolidExecutionContext):
forum_data_path = context.resources.results_dir.joinpath(context.config['edx_forum_data_folder_name'])
command_array = ['/usr/bin/mongodump',
'--host',
context.config['edx_mongodb_host'],
'--port',
context.config['edx_mongodb_port'],
'--db',
context.config['edx_mongodb_database_name'],
'--authenticationDatabase',
'admin',
'--out',
str(forum_data_path)]
if password := context.config['edx_mongodb_password']:
command_array.extend(['--password', password])
if username := context.config['edx_mongodb_username']:
command_array.extend(['--username', username])
yield Output(' '.join(command_array), 'edx_forum_mongodump_command')
yield Output(str(forum_data_path), 'edx_forum_data_directory')
@composite_solid(
name='open_edx_forum_data_export',
description='Export data for edX forums from Mongo database',
output_defs=[
OutputDefinition(
name='edx_forum_data_directory',
dagster_type=String,
description='Path to Open edX forum data exported from Mongo database'
)
]
)
def export_edx_forum_data() -> Sring:
"""Run mongodump for the database that contains Open edX forum submissions to be consumed by Institutional Research.
:param context: Dagster execution context for propagaint configuration data
:type context: SolidExecutionContext
:returns: Path to exported database contents
:rtype: String
"""
edx_forum_mongo_build_dump_command()
bash_command_solid(' '.join(command_string), input_defs=[])
yield Output(
str(forum_data_path),
'edx_forum_data'
)
Tobias Macey
05/13/2020, 7:53 PMTobias Macey
05/13/2020, 7:53 PMTobias Macey
05/13/2020, 8:16 PMmax
05/13/2020, 8:16 PMmax
05/13/2020, 8:16 PMNothing
dependency on the bash command solid that yielded the pathTobias Macey
05/13/2020, 8:20 PMTobias Macey
05/13/2020, 8:21 PMmax
05/13/2020, 10:59 PMmax
05/13/2020, 10:59 PMmax
05/13/2020, 11:02 PMalex
05/14/2020, 1:53 AMBalázs Dukai
12/16/2020, 1:04 PMTobias Macey
12/16/2020, 2:28 PM