biokepi
biokepi copied to clipboard
Cycledash submission sends local paths when Cycledash wants HDFS paths
Cycledash requires the normal/tumor BAMs exist at a location on HDFS
The current post_vcf function provides local paths to BAMs which Cycledash won't be able to access. Also, it requires them to be file_targets instead of strings so you can't submit and HDFS path without failing.
I've been using an hdfs_file and hdfs_file_target for somethings, but not sure where it should in biokepi or ketrew.
let hdfs_file ~run_with path =
let open Ketrew.EDSL in
let open Biokepi_run_environment in
let test_statement = sprintf "hadoop fs -test -e %s" path in
object
method path = path
val command =
(Ketrew.Target.Command.shell
~host:Machine.(as_host run_with) test_statement)
method exists = (`Command_returns (command, 0))
method is_bigger_than n = (`Command_returns ((Ketrew.Target.Command.shell
~host:Machine.(as_host run_with) (sprintf "hadoop fs -test -z %s" path)), 0))
end
let hdfs_file_target ~run_with ?dependencies ?make ?metadata ?name ?equivalence ?if_fails_activate
?success_triggers ?tags path =
let product = hdfs_file ~run_with path in
let name = Option.value name ~default:("Make on HDFS:" ^ path) in
Ketrew_edsl.target ~product ?equivalence ?if_fails_activate ?tags ?success_triggers
~done_when:product#exists ?dependencies ?make ?metadata name