biokepi icon indicating copy to clipboard operation
biokepi copied to clipboard

Cycledash submission sends local paths when Cycledash wants HDFS paths

Open arahuja opened this issue 10 years ago • 0 comments

Cycledash requires the normal/tumor BAMs exist at a location on HDFS

The current post_vcf function provides local paths to BAMs which Cycledash won't be able to access. Also, it requires them to be file_targets instead of strings so you can't submit and HDFS path without failing.

I've been using an hdfs_file and hdfs_file_target for somethings, but not sure where it should in biokepi or ketrew.

let hdfs_file ~run_with path =
  let open Ketrew.EDSL in
  let open Biokepi_run_environment in 
  let test_statement = sprintf "hadoop fs -test -e %s" path in
  object 
    method path = path
    val command = 
      (Ketrew.Target.Command.shell 
            ~host:Machine.(as_host run_with) test_statement)
    method exists = (`Command_returns (command, 0))
    method is_bigger_than n = (`Command_returns ((Ketrew.Target.Command.shell 
            ~host:Machine.(as_host run_with) (sprintf "hadoop fs -test -z %s" path)), 0))
  end

let hdfs_file_target ~run_with ?dependencies ?make ?metadata ?name ?equivalence ?if_fails_activate
    ?success_triggers ?tags path =
  let product = hdfs_file ~run_with path in
  let name = Option.value name ~default:("Make on HDFS:" ^ path) in
  Ketrew_edsl.target ~product ?equivalence ?if_fails_activate ?tags ?success_triggers
    ~done_when:product#exists ?dependencies ?make ?metadata name

arahuja avatar May 06 '15 22:05 arahuja