Map-reduce jobs are supported by WMArchive service.
mrjob --hdir=hdfs://host:port/path/data --odir=hdfs://host:port/path/out --schema=hdfs://host:port/path/schema.avsc --mrpy=mr.py --pydoop=/path/pydoop.tgz --avro=/path/avro.tgz
def mapper(ctx): "Read given context and yield key (job-id) and values (task)" rec = ctx.value jid = rec["jobid"] if jid is not None: ctx.emit(jid, rec["fwjr"]["task"]) def reducer(ctx): "Emit empty key and some data structure via given context" ctx.emit("", {"jobid": ctx.key, "task": ctx.values})