neoflow
neoflow copied to clipboard
Error with demo data when running STEP2
I copied test data from example_data folder provided by the neoflow. the input_vcf_list.txt file looks like:
experiment sample file file_type
crc_test sample1 /neoantigen2/input/sample1.vcf somatic
the copied sample1.vcf looks like:
##fileformat=VCFv4.2
##contig=<ID=chrM,length=16571,assembly=hg19>
##contig=<ID=chr1,length=249250621,assembly=hg19>
##contig=<ID=chr2,length=243199373,assembly=hg19>
##contig=<ID=chr3,length=198022430,assembly=hg19>
##contig=<ID=chr4,length=191154276,assembly=hg19>
##contig=<ID=chr5,length=180915260,assembly=hg19>
##contig=<ID=chr6,length=171115067,assembly=hg19>
##contig=<ID=chr7,length=159138663,assembly=hg19>
##contig=<ID=chr8,length=146364022,assembly=hg19>
##contig=<ID=chr9,length=141213431,assembly=hg19>
##contig=<ID=chr10,length=135534747,assembly=hg19>
##contig=<ID=chr11,length=135006516,assembly=hg19>
##contig=<ID=chr12,length=133851895,assembly=hg19>
##contig=<ID=chr13,length=115169878,assembly=hg19>
##contig=<ID=chr14,length=107349540,assembly=hg19>
##contig=<ID=chr15,length=102531392,assembly=hg19>
##contig=<ID=chr16,length=90354753,assembly=hg19>
##contig=<ID=chr17,length=81195210,assembly=hg19>
##contig=<ID=chr18,length=78077248,assembly=hg19>
##contig=<ID=chr19,length=59128983,assembly=hg19>
##contig=<ID=chr20,length=63025520,assembly=hg19>
##contig=<ID=chr21,length=48129895,assembly=hg19>
##contig=<ID=chr22,length=51304566,assembly=hg19>
##contig=<ID=chrX,length=155270560,assembly=hg19>
##contig=<ID=chrY,length=59373566,assembly=hg19>
##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19>
##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19>
##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19>
##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19>
##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19>
##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19>
##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19>
##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19>
##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19>
##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19>
##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19>
##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19>
##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19>
##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19>
##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19>
##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19>
##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19>
##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19>
##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19>
##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19>
##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19>
##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19>
##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19>
##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19>
##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19>
##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19>
##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19>
##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19>
##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19>
##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19>
##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19>
##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19>
##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19>
##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19>
##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19>
##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19>
##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19>
##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19>
##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19>
##reference=file:///data/kail/tmp/nxf.ovMAqCSHCz/ucsc.hg19.fasta
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT crc_cell_lines
chrM 73 . G A 207.78 PASS . GT:AD:DP:GQ:PL 1/1:0,9:9:27:236,27,0
chrM 150 . T C 85.28 PASS . GT:AD:DP:GQ:PL 1/1:0,3:3:9:113,9,0
chrM 410 . A T 169.84 PASS . GT:AD:DP:GQ:PL 1/1:0,6:6:18:198,18,0
chrM 515 . GCA G 83.25 PASS . GT:AD:DP:GQ:PL 1/1:0,3:3:9:120,9,0
chrM 2354 . C T 57.28 PASS . GT:AD:DP:GQ:PL 1/1:0,3:3:9:85,9,0
chrM 2708 . G A 126.9 PASS . GT:AD:DP:GQ:PL 1/1:0,5:5:15:155,15,0
chrM 16364 . T C 241.84 PASS . GT:AD:DP:GQ:PL 1/1:0,6:6:18:270,18,0
chr1 13868 . A G 72.77 PASS . GT:AD:DP:GQ:PL 0/1:16,6:22:99:101,0,367
chr1 13896 rs201696125 C A 50.77 PASS . GT:AD:DP:GQ:PL 0/1:14,5:19:79:79,0,340
chr1 14907 rs79585140 A G 919.77 PASS . GT:AD:DP:GQ:PL 0/1:20,38:58:99:948,0,429
chr1 14930 rs75454623 A G 878.77 PASS . GT:AD:DP:GQ:PL 0/1:16,34:50:99:907,0,341
chr1 15274 rs201931625 A T 62.28 PASS . GT:AD:DP:GQ:PL 1/1:0,3:3:9:90,9,0
chr1 16495 rs141130360 G C 41.77 PASS . GT:AD:DP:GQ:PL 0/1:14,5:19:70:70,0,395
chr1 16534 rs201459529 C T 298.77 PASS . GT:AD:DP:GQ:PL 0/1:5,15:20:67:327,0,67
chr1 16571 rs199676946 G A 188.77 PASS . GT:AD:DP:GQ:PL 0/1:4,10:14:61:217,0,61
chr1 1453304 rs141065088 C T 40.74 PASS . GT:AD:DP:GQ:PL 1/1:0,2:2:6:68,6,0
chr1 1454519 rs370448805 G C 51.77 PASS . GT:AD:DP:GQ:PL 0/1:2,3:5:63:80,0,63
chr1 3427514 rs146093263 GCACACGCCCCCACC G 98.25 PASS . GT:AD:DP:GQ:PL 1/1:0,3:3:9:135,9,0
chr1 3428160 rs2820999 T G 215.8 PASS . GT:AD:DP:GQ:PL 1/1:0,7:7:21:244,21,0
chr1 3496479 rs2794340 T C 33.77 PASS . GT:AD:DP:GQ:PL 0/1:2,2:4:62:62,0,64
chr1 3545175 rs147637374 GTTCTGGGAGCTCCTCCCCC G 151.82 PASS . GT:AD:DP:GQ:PL 1/1:0,4:4:17:189,17,0
chr1 3546264 rs137965653 CCGGG C 277.77 PASS . GT:AD:DP:GQ:PL 1/1:0,7:7:21:315,21,0
chr1 59147926 rs12139511 T C 769.77 PASS . GT:AD:DP:GQ:PL 1/1:0,22:22:66:798,66,0
chr1 59148118 rs17118103 A T 280.77 PASS . GT:AD:DP:GQ:PL 0/1:8,10:18:99:309,0,173
chr1 59150941 rs2206764 G A 396.77 PASS . GT:AD:DP:GQ:PL 1/1:0,13:13:39:425,39,0
chr1 59150997 rs5774421 TA T 88.87 PASS . GT:AD:DP:GQ:PL 1/1:0,5:5:15:126,15,0
chr1 59156155 rs12076049 C T 200.84 PASS . GT:AD:DP:GQ:PL 1/1:0,6:6:18:229,18,0
chr1 59158632 rs12097333 T C 584.77 PASS . GT:AD:DP:GQ:PL 1/1:0,18:18:54:613,54,0
chr1 59160724 rs66629118 TA T 63.0 PASS . GT:AD:DP:GQ:PL 1/1:0,4:4:12:100,12,0
chr1 59248813 rs2760499 G C 292.77 PASS . GT:AD:DP:GQ:PL 1/1:0,11:11:33:321,33,0
...(etc, 81914 lines in total)
I can run through STEP2 using these 2 files.
(base) [root@gpu01 neoantigen2]# sh test_cmd.sh
N E X T F L O W ~ version 21.04.3
Launching `./neoflow_db.nf` [compassionate_jennings] - revision: 421847b8f4
Format input mapping file: /neoantigen2/input/test_vcf_files.tsv!
New mapping file: output/test_vcf_files.tsv!
executor > local (5)
[7c/318276] process > pre_processing (test_vcf_files.tsv) [100%] 1 of 1 ✔
[87/d3b425] process > variant_annotation (crc_test-mapping_file.tsv) [100%] 1 of 1 ✔
[62/9f2138] process > database_construction (crc_test) [100%] 1 of 1 ✔
[dd/0d806c] process > format_db (crc_test) [100%] 1 of 1 ✔
[05/178767] process > generate_decoy_db (crc_test) [100%] 1 of 1 ✔
Completed at: 19-Jan-2022 05:12:16
Duration : 12m 59s
CPU hours : 0.2
Succeeded : 5
crc_test
N E X T F L O W ~ version 21.04.3
Launching `./neoflow_msms.nf` [dreamy_borg] - revision: 5cf4143baf
Process multiple MS/MS files.
executor > local (19)
[2b/f3de6e] process > msms_searching (spec-00037.mgf) [100%] 15 of 15 ✔
[d6/3db5c3] process > calculate_fdr (calculate_fdr) [100%] 1 of 1 ✔
[ab/67eada] process > prepare_pepquery_input (prepare_pepquery_input) [100%] 1 of 1 ✔
[47/b207b0] process > run_pepquery (run_pepquery) [100%] 1 of 1 ✔
[3c/c1bbee] process > add_pepquery_validation (add_pepquery_validation) [100%] 1 of 1 ✔
Completed at: 19-Jan-2022 06:14:34
Duration : 1h 2m 11s
CPU hours : 11.0
Succeeded : 19
but when I randomly sample 7191 vcf records from sample1.vcf before running the same pipeline, it is exactly the same as the last one except for the number of rows. this time the input vcf file looks like:
##fileformat=VCFv4.2
##contig=<ID=chrM,length=16571,assembly=hg19>
##contig=<ID=chr1,length=249250621,assembly=hg19>
##contig=<ID=chr2,length=243199373,assembly=hg19>
##contig=<ID=chr3,length=198022430,assembly=hg19>
##contig=<ID=chr4,length=191154276,assembly=hg19>
##contig=<ID=chr5,length=180915260,assembly=hg19>
##contig=<ID=chr6,length=171115067,assembly=hg19>
##contig=<ID=chr7,length=159138663,assembly=hg19>
##contig=<ID=chr8,length=146364022,assembly=hg19>
##contig=<ID=chr9,length=141213431,assembly=hg19>
##contig=<ID=chr10,length=135534747,assembly=hg19>
##contig=<ID=chr11,length=135006516,assembly=hg19>
##contig=<ID=chr12,length=133851895,assembly=hg19>
##contig=<ID=chr13,length=115169878,assembly=hg19>
##contig=<ID=chr14,length=107349540,assembly=hg19>
##contig=<ID=chr15,length=102531392,assembly=hg19>
##contig=<ID=chr16,length=90354753,assembly=hg19>
##contig=<ID=chr17,length=81195210,assembly=hg19>
##contig=<ID=chr18,length=78077248,assembly=hg19>
##contig=<ID=chr19,length=59128983,assembly=hg19>
##contig=<ID=chr20,length=63025520,assembly=hg19>
##contig=<ID=chr21,length=48129895,assembly=hg19>
##contig=<ID=chr22,length=51304566,assembly=hg19>
##contig=<ID=chrX,length=155270560,assembly=hg19>
##contig=<ID=chrY,length=59373566,assembly=hg19>
##contig=<ID=chr1_gl000191_random,length=106433,assembly=hg19>
##contig=<ID=chr1_gl000192_random,length=547496,assembly=hg19>
##contig=<ID=chr4_gl000193_random,length=189789,assembly=hg19>
##contig=<ID=chr4_gl000194_random,length=191469,assembly=hg19>
##contig=<ID=chr6_cox_hap2,length=4795371,assembly=hg19>
##contig=<ID=chr7_gl000195_random,length=182896,assembly=hg19>
##contig=<ID=chr9_gl000198_random,length=90085,assembly=hg19>
##contig=<ID=chr9_gl000199_random,length=169874,assembly=hg19>
##contig=<ID=chr17_gl000205_random,length=174588,assembly=hg19>
##contig=<ID=chr19_gl000208_random,length=92689,assembly=hg19>
##contig=<ID=chr19_gl000209_random,length=159169,assembly=hg19>
##contig=<ID=chrUn_gl000211,length=166566,assembly=hg19>
##contig=<ID=chrUn_gl000212,length=186858,assembly=hg19>
##contig=<ID=chrUn_gl000213,length=164239,assembly=hg19>
##contig=<ID=chrUn_gl000214,length=137718,assembly=hg19>
##contig=<ID=chrUn_gl000215,length=172545,assembly=hg19>
##contig=<ID=chrUn_gl000216,length=172294,assembly=hg19>
##contig=<ID=chrUn_gl000217,length=172149,assembly=hg19>
##contig=<ID=chrUn_gl000218,length=161147,assembly=hg19>
##contig=<ID=chrUn_gl000219,length=179198,assembly=hg19>
##contig=<ID=chrUn_gl000220,length=161802,assembly=hg19>
##contig=<ID=chrUn_gl000221,length=155397,assembly=hg19>
##contig=<ID=chrUn_gl000222,length=186861,assembly=hg19>
##contig=<ID=chrUn_gl000224,length=179693,assembly=hg19>
##contig=<ID=chrUn_gl000225,length=211173,assembly=hg19>
##contig=<ID=chrUn_gl000226,length=15008,assembly=hg19>
##contig=<ID=chrUn_gl000228,length=129120,assembly=hg19>
##contig=<ID=chrUn_gl000229,length=19913,assembly=hg19>
##contig=<ID=chrUn_gl000230,length=43691,assembly=hg19>
##contig=<ID=chrUn_gl000231,length=27386,assembly=hg19>
##contig=<ID=chrUn_gl000232,length=40652,assembly=hg19>
##contig=<ID=chrUn_gl000234,length=40531,assembly=hg19>
##contig=<ID=chrUn_gl000236,length=41934,assembly=hg19>
##contig=<ID=chrUn_gl000237,length=45867,assembly=hg19>
##contig=<ID=chrUn_gl000238,length=39939,assembly=hg19>
##contig=<ID=chrUn_gl000239,length=33824,assembly=hg19>
##contig=<ID=chrUn_gl000240,length=41933,assembly=hg19>
##contig=<ID=chrUn_gl000241,length=42152,assembly=hg19>
##contig=<ID=chrUn_gl000243,length=43341,assembly=hg19>
##reference=file:///data/kail/tmp/nxf.ovMAqCSHCz/ucsc.hg19.fasta
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT crc_cell_lines
chrM 73 . G A 207.78 PASS . GT:AD:DP:GQ:PL 1/1:0,9:9:27:236,27,0
chrM 150 . T C 85.28 PASS . GT:AD:DP:GQ:PL 1/1:0,3:3:9:113,9,0
chrM 410 . A T 169.84 PASS . GT:AD:DP:GQ:PL 1/1:0,6:6:18:198,18,0
chrM 515 . GCA G 83.25 PASS . GT:AD:DP:GQ:PL 1/1:0,3:3:9:120,9,0
chrM 2354 . C T 57.28 PASS . GT:AD:DP:GQ:PL 1/1:0,3:3:9:85,9,0
chrM 2708 . G A 126.9 PASS . GT:AD:DP:GQ:PL 1/1:0,5:5:15:155,15,0
chrM 16364 . T C 241.84 PASS . GT:AD:DP:GQ:PL 1/1:0,6:6:18:270,18,0
chr1 3546264 rs137965653 CCGGG C 277.77 PASS . GT:AD:DP:GQ:PL 1/1:0,7:7:21:315,21,0
chr1 59465926 rs2764901 A C 337.77 PASS . GT:AD:DP:GQ:PL 0/1:22,11:33:99:366,0,1040
chr1 60280749 rs113730234 GT G 360.74 PASS . GT:AD:DP:GQ:PL 1/1:0,10:10:30:398,30,0
chr1 62231924 rs141761262 T TTGA 53.7 PASS . GT:AD:DP:GQ:PL 1/1:0,2:2:6:90,6,0
chr1 62627675 rs2481676 G A 191.77 PASS . GT:AD:DP:GQ:PL 0/1:6,7:13:99:220,0,170
chr1 63113511 rs746735 C A 202.77 PASS . GT:AD:DP:GQ:PL 0/1:4,7:11:99:231,0,133
chr1 64097432 rs1126728 C T 273.77 PASS . GT:AD:DP:GQ:PL 0/1:13,10:23:99:302,0,360
chr1 64698411 rs2029868 T C 45.77 PASS . GT:AD:DP:GQ:PL 0/1:1,2:3:29:74,0,29
chr1 67288045 rs482082 C T 140.77 PASS . GT:AD:DP:GQ:PL 0/1:8,6:14:99:169,0,245
chr1 67562106 rs6670381 G T 46.74 PASS . GT:AD:DP:GQ:PL 1/1:0,2:2:6:74,6,0
chr1 70397087 rs7530457 T C 54.77 PASS . GT:AD:DP:GQ:PL 0/1:7,3:10:83:83,0,233
...(etc, 7191 lines in total)
I got an fatal Error when running the last procedure of STEP2 whose Error message is :
executor > local (19) [60/1956]
[a3/308b14] process > msms_searching (spec-00037.mgf) [100%] 15 of 15 ✔
[ae/c73260] process > calculate_fdr (calculate_fdr) [100%] 1 of 1 ✔
[c4/516371] process > prepare_pepquery_input (prepare_pepquery_input) [100%] 1 of 1 ✔
[68/fd6ea1] process > run_pepquery (run_pepquery) [100%] 1 of 1 ✔
[67/fd2463] process > add_pepquery_validation (add_pepquery_validation) [100%] 1 of 1, failed: 1 ✘
Error executing process > 'add_pepquery_validation (add_pepquery_validation)'
Caused by:
Process `add_pepquery_validation (add_pepquery_validation)` terminated with an error exit status (1)
Command executed:
#!/usr/bin/env /usr/local/bin/Rscript
library(dplyr)
library(readr)
library(stringr)
psms <- read_tsv("novel_peptides_psm.tsv")
psm_rank_file = "pepquery/psm_rank.txt"
if(file.exists(psm_rank_file)){
psm_rank <- read_tsv(psm_rank_file)
if("n_ptm" %in% names(psm_rank)){
psm_rank <- psm_rank %>% filter(pvalue<=0.01,n_ptm==0,rank==1)
psms$pepquery <- ifelse(psms$peptide %in% psm_rank$peptide,1,0)
}else{
psms$pepquery <- 0ter(),
}
}else{
psms$pepquery <- 0
}
psms %>% write_tsv("novel_peptides_psm_pepquery.tsv")
Command exit status:
1
Command output:
(empty)
Command error:
Bioconductor version 3.10 (BiocManager 1.30.10), ?BiocManager::install for help
Bioconductor version '3.10' is out-of-date; the current release version '3.14'
is available with R version '4.1'; see https://bioconductor.org/install
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Parsed with column specification:
cols(
index = col_character(),
evalue = col_character(),
ion_score = col_character(),
charge = col_character(),
mass = col_character(),
mz = col_character(),
delta_da = col_character(),
delta_ppm = col_character(),
peptide = col_character(),
isdecoy = col_character(),
miss = col_character(),
protein = col_character(),
position = col_character(),
rt = col_character(),
isSAP = col_character(),
mods = col_character(),
FDR = col_character(),
Qvalue = col_character(),
is_novel = col_character()
)
Error in `$<-.data.frame`(`*tmp*`, pepquery, value = 0) :
replacement has 1 row, data has 0
Calls: $<- -> $<-.data.frame
Execution halted
Work dir:
/neoantigen2/work/67/fd2463881a197b5022a22c8c2c78a2
Tip: when you have fixed the problem you can continue the execution adding the option `-resume` to the run command line
I think it is caused by the empty of the intermediate file: /neoantigen2/work/7d/3269b5449820da00611a534f20e697/novel_peptides_psm.tsv and the novel_peptides_psm.tsv looks like:
index evalue ion_score charge mass mz delta_da delta_ppm peptide isdecoy miss protein posit
I don't understand why the number of rows affect the pipeline's execution.
Could you help me out ?