cyvcf2
cyvcf2 copied to clipboard
##INFO fields being dropped
cat 2202415007_hemi.vcf | head
/home/zmvanw01/projects/12-sample-comparison/hifiasm-path/geno_analysis/per_samp/2202415007/change_to_hemi/2202415007_hemi.vcf
=mpileup -B -a QS -Ou -f /home/zmvanw01/git_repos/immune_receptor_genomics/current/reference.fasta --threads 11 /home/zmvanw01/projects/12-sample-comparison/hifiasm-path/2202415007/2202415007.editRG.bam
##reference=file:///home/zmvanw01/git_repos/immune_receptor_genomics/current/reference.fasta
##contig=<ID=chr10,length=133797422>
##contig=<ID=chr11,length=135086622>
##contig=<ID=chr12,length=133275309>
##contig=<ID=chr13,length=114364328>
##contig=<ID=chr14,length=105480000>
##contig=<ID=chr15,length=101991189>
##contig=<ID=chr16,length=90338345>
(bcftools) [zmvanw01@bigdata change_to_hemi]$ cat ../2202415007.vcf | head
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##bcftoolsVersion=1.19+htslib-1.19.1
##bcftoolsCommand=mpileup -B -a QS -Ou -f /home/zmvanw01/git_repos/immune_receptor_genomics/current/reference.fasta --threads 11 /home/zmvanw01/projects/12-sample-comparison/hifiasm-path/2202415007/2202415007.editRG.bam
##reference=file:///home/zmvanw01/git_repos/immune_receptor_genomics/current/reference.fasta
I am editing the below vcf file with the following script:
def change_genotypes(input_vcf, bedfile, output_path):
vcf_reader = VCF(input_vcf, strict_gt=True)
# Create a new VCF Writer using the input VCF as a template
vcf_writer = Writer(output_path, vcf_reader)
coords = read_bedfile(bedfile)
for record in vcf_reader:
for index, sample in enumerate(vcf_reader.samples):
change_snp = False
for chrom, start, end in coords:
if record.CHROM != chrom:
continue
if record.POS < start:
continue
if record.POS > end:
continue
change_snp = True
break
if change_snp:
current_genotype = record.genotypes[index]
if current_genotype is None or -1 in current_genotype:
record.genotypes[index] = [-1, -1, False] # Set genotype to unknown
elif 1 in current_genotype:
record.genotypes[index] = [-1, 1, False] # Set genotype to ./1
else:
record.genotypes[index] = [-1, 0, False] # Set genotype to ./0
# Reassign the genotypes field
record.genotypes = record.genotypes
vcf_writer.write_record(record)
vcf_writer.close()
vcf_reader.close()
I expect it to copy all ##info fields from the template to the new vcf, but it seems to truncate it? I am missing: ##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##bcftoolsVersion=1.19+htslib-1.19.1
##bcftoolsCommand
from the newly created file and it is causing downstream issues. It also seems to be placing the path to the new file right at the top with now ## before it.