# Python Script to perform for running the single process for our pipeline
#
# Murray Cadzow
# July 2013
# University Of Otago
#
# James Boocock
# July 2013
# University Of Otago
#
import argparse
import ConfigParser
import logging
import os
import sys
from __version__ import __VERSION__
logger = logging.getLogger(__name__)
SUBPROCESS_FAILED_EXIT = 10
[docs]def parse_config(args):
""" Parse config file
Reads a config and parses the
arguments into a dictionary.
"""
config = ConfigParser.ConfigParser()
config.read(args.config_file)
config_parsed = {}
logger.debug(config.sections())
for section in config.sections():
logger.debug(section)
opts = config.args(section)
config_parsed[section] = {}
for op in opts:
logger.debug(op)
try:
config_parsed[section][op] = config.get(section, op)
except:
logger.info("exception on {0}".format(op))
config_parsed[section][op] = None
return config_parsed
[docs]def parse_arguments():
"""
Parse all the comandline arguments for the ancient DNA pipeline.
"""
parser = argparse.ArgumentParser(description="Options for the ancient DNA pipeline")
adapt_remove = parser.add_argument_group('AdapterRemoval')
alignment = parser.add_argument_group('Alignment')
sample_qc = parser.add_argument_group("Sample QC")
ancient_filter = parser.add_argument_group('Ancient Filtering')
variant_calling = parser.add_argument_group('Variant Calling')
vcf_qc = parser.add_argument_group("VCF QC")
parser.add_argument('-l','--log-file', "log_file", default="pipeline_run.log",
help="Log file for the ancient DNA pipeline")
parser.add_argument("fastq_files", help="Unzipped fastq files in the following format \
<SAMPLEID>.*.R1.fastq <SAMPLEID>.*.R2.fastq")
parser.add_argument('-c','--config-file', dest="config_file",
help="Path to configuration file", default="/etc/ancient_dna_pipeline.cfg")
parser.add_argument('-v', '--verbose',
action="store_true", dest='verbose', default=False,
help="Verbose command output useful for debugging")
parse.add_argument("--version", dest="version", help="Print program version",
action="")
parser.add_argument('-d', '--directory', dest="running_directory",
help="Directory where output file should be placed")
parser.add_argument("-l", "--library-type", dest="library_type", help="Type of Sequencing library: args are double-stranded (ds) and single-stranded (ss)")
parser.add_argument("--imputation", dest="imputation", help="Perform BEAGLE imputation of the VCF file",
default=False, action="store_true")
parser.add_argument("-m","--use-merged_reads", help="Use the unmergable reads",
dest="use_unmerged_reads", action="store_true", default=True)
adapt_remove.add_argument('--minimum-length',help="Minimum read length to process for analysis",
dest="adapt_min_length", default=25)
adapt_remove.add_argument('--minimum-quality', help="Minimum base quality",
dest="adapt_min_qual", default=20)
adapt_remove.add_argument('--mismatch-rate', help="Mismatch fraction (If >1 set to 1/<mismatch rate>",
dest="adapt_mismatch_rate", default=3)
adapt_remove.add_argument("--min-length", help="Minimum alignment length for merging",
dest="adapt_alignment_length", default=11)
alignment.add_argument("--algorithm", help="BWA alignment algorithm (mem or aln)",
dest="bwa_algo", default="aln")
alignment.add_argument("--max-edit-distance", help="Maxmimum edit distance (-n aln)",
dest="bwa_edit_distance", default=0.03)
alignment.add_argument("--max-gap-opens", help="Maximum number of gap opens (-o aln)",
dest="bwa_gap_opens", default=2)
alignment.add_argument("--seeding", help="Should seeding be enabled (disabled by default -l 1024 aln)",
dest="bwa_seeding", default=False, action="store_true")
sample_qc.add_argument("--min-sample-coverage-percent", help="Minimum sample coverage (%)",
dest="min_coverage_percent", default=0.95)
ancient_filter.add_argument("--rescale-bams", help="Rescale base qualities using mapDamage",
dest="ancient_rescale", default=False, action="store_true")
ancient_filter.add_argument('--downweight-number', help="Number of C->T transitions at start and G->A transitions at the end of read to downweight",dest="ancient_downweight_number" ,default=2)
variant_calling.add_argument('--min-depth',help="Minimum variant read-depth",
dest="vcf_minimum_depth", default=2)
variant_calling.add_argument('--min-mapping-quality', help="Minimum mapping quality",
dest="vcf_min_mapping_quality",default=20)
args = parser.parse_args()
if(args.verbose):
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.ERROR)
if(args.ver):
logging.info("Version: {0}".format(__version__))
sys.exit(1)
return args
[docs]def main():
""" The main function
Runs the selection pipeline.
"""
args = parse_arguments()
config = parse_config(args)
set_environment(config['environment'])
if args.cores is not None:
config['system']['cores_avaliable'] = args.cores
logging.basicConfig(format='%(asctime)s %(message)s',
filename=args.log_file, filemode='w',
level=logging.INFO)
s = StandardRun(args, config=config)
s.run_pipeline()
print("Selection Pipeline Completed Successfully :)!")
if __name__ == "__main__":
main()