Package bamm :: Module bamMaker
[hide private]
[frames] | no frames]

Source Code for Module bamm.bamMaker

   1  #!/usr/bin/env python 
   2  ############################################################################### 
   3  #                                                                             # 
   4  #    bamMaker.py                                                              # 
   5  #                                                                             # 
   6  #    Wrapper to produce a .[bt]am file with BWA for use in general chicanery  # 
   7  #                                                                             # 
   8  #    Copyright (C) Michael Imelfort                                           # 
   9  #                                                                             # 
  10  ############################################################################### 
  11  #                                                                             # 
  12  #    This library is free software; you can redistribute it and/or            # 
  13  #    modify it under the terms of the GNU Lesser General Public               # 
  14  #    License as published by the Free Software Foundation; either             # 
  15  #    version 3.0 of the License, or (at your option) any later version.       # 
  16  #                                                                             # 
  17  #    This library is distributed in the hope that it will be useful,          # 
  18  #    but WITHOUT ANY WARRANTY; without even the implied warranty of           # 
  19  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU        # 
  20  #    Lesser General Public License for more details.                          # 
  21  #                                                                             # 
  22  #    You should have received a copy of the GNU Lesser General Public         # 
  23  #    License along with this library.                                         # 
  24  #                                                                             # 
  25  ############################################################################### 
  26   
  27  __author__ = "Michael Imelfort, Ben Woodcroft" 
  28  __copyright__ = "Copyright 2014" 
  29  __credits__ = ["Michael Imelfort", "Ben Woodcroft"] 
  30  __license__ = "LGPLv3" 
  31  __maintainer__ = "Michael Imelfort" 
  32  __email__ = "mike@mikeimelfort.com" 
  33   
  34  ############################################################################### 
  35   
  36  # system imports 
  37  import subprocess 
  38  import os 
  39  import sys 
  40  import tempfile 
  41   
  42  # local imports 
  43  from bammExceptions import * 
  44   
  45  ############################################################################### 
  46  ############################################################################### 
  47  ############################################################################### 
  48  ############################################################################### 
  49   
50 -def checkForDatabase(databaseBaseName):
51 '''Check to see if bwa indexed database is present 52 53 Inputs: 54 databaseBaseName - string, full path to the prefix of the DB 55 56 Outputs: 57 True if database files exists, false otherwise 58 ''' 59 return os.path.isfile(databaseBaseName+'.amb')
60
61 -class BamScheduler:
62 '''Class for scheduling making multiple BAM and TAM files 63 64 The class implements a somewhat parallel interface for running multiple 65 BWA jobs. It is a little slack in that it sequentially calls BWA with BWA's 66 threading flag. On larger systems this will probably be slower than running 67 gnu parallel through a bash script. but if you can do that then you probably 68 don't need this. 69 ''' 70
71 - def __init__(self, 72 database, 73 alignmentAlgorithm, 74 indexAlgorithm, 75 outFolder, 76 paired=[], 77 interleaved=[], 78 singleEnded=[], 79 keptFiles=False, 80 keepFiles=False, 81 outputTam=False, 82 prefix='', 83 numThreads=1, 84 maxMemory=None, 85 forceOverwriting=False, 86 extraArguments='', 87 showCommands=False, 88 quiet = False, 89 silent=False 90 ):
91 '''Default constructor. 92 93 Initializes a BamScheduler instance with the provided set of properties. 94 95 Inputs: 96 database - full path to fasta file of contigs (may be gzipped), 97 alignmentAlgorithm - one of BWA's alignment algorithms, 98 indexAlgorithm - one of BWA's index algorithms, 99 paired - [fileName pairs], always even numbered in length, in order 100 [R_A_1, R_A_2, R_B_1, R_B_2, ...] 101 interleaved - [fileNames], containing interleaved paired reads 102 singleEnded - [fileNames], containing single ended reads 103 keptFiles - == True -> indexes for the db already exist, 104 keepFiles - == True -> don't delete indexes at the end, 105 outputTam - == True -> you love text files to bits, 106 numThreads - int, the maximum number of threads to use 107 maxMemory - string, maximum memory program will use (samtools style) 108 forceOverwriting - == True -> force overwriting index files, 109 extraArguments - string, extra args to pass to BWA 110 showCommands - == True -> show all commands being run 111 quiet - == True -> suppress output from the mapper 112 silent - == True -> suppress all output 113 114 Outputs: 115 None 116 ''' 117 118 # the main thing is to make sure that the input parameters make sense 119 self.outFolder = outFolder 120 self.database = database 121 self.dbBaseName = self.stripFaCrud(os.path.basename(database)) 122 self.prefix = prefix 123 self.paired = paired 124 self.interleaved = interleaved 125 self.singleEnded = singleEnded 126 if self.database is None: 127 raise InvalidParameterSetException('Nothing to map reads onto, ' \ 128 'you need to supply a database') 129 130 if not os.path.isfile(self.database): 131 raise InvalidParameterSetException('Specified database (%s) is not a valid file' % self.database) 132 133 if self.singleEnded == [] and \ 134 self.paired == [] and \ 135 self.interleaved == []: 136 raise InvalidParameterSetException( \ 137 'Nothing to map, please specify coupled, interleaved or ' \ 138 'single ended reads files') 139 140 self.alignmentAlgorithm = alignmentAlgorithm 141 self.indexAlgorithm = indexAlgorithm 142 self.keptFiles = keptFiles 143 self.keepFiles = keepFiles 144 self.numThreads = int(numThreads) 145 self.maxMemory = maxMemory 146 self.outputTam = outputTam 147 self.forceOverwriting = forceOverwriting 148 self.extraArguments = extraArguments 149 self.quiet = quiet 150 self.silent = silent 151 self.showCommands = showCommands 152 153 154 if self.maxMemory is None: 155 # default to 2GBs per number of threads 156 self.maxMemory = str(self.numThreads*2)+'G' 157 158 # --kept sanity check 159 if checkForDatabase(self.database): 160 # dbs are there, has the user specified 'kept' 161 if self.keptFiles is False and not self.forceOverwriting: 162 raise InvalidParameterSetException( \ 163 "You didn't specify that index files have been kept but " \ 164 "there appears to be bwa index files present.\nI'm " \ 165 "cowardly refusing to run so as not to risk overwriting.\n"\ 166 "Force overwriting to create new indices") 167 168 elif self.keptFiles: 169 # user specified 'kept' but there are no DBs there 170 raise InvalidParameterSetException( \ 171 "You specified that index files have been kept but there " \ 172 "doesn't appear to be any suitable bwa index files present") 173 174 self.BMs = [] 175 self.outFiles = {} # use this to check for repeated output files 176 177 # the BamMaker class can check validity of it's own paramters quite well 178 # we can just make a list of these guys here and then set it all going 179 # when we're sure everything is going to be OK. 180 181 # paired first 182 l_paired = len(self.paired) 183 if l_paired % 2 != 0: 184 raise InvalidParameterSetException( \ 185 "Use of the -c option requires an even number of reads " \ 186 "(ordered as pairs)") 187 188 for p_index in range(l_paired/2): 189 # make the output file name and check that it's going to be unique 190 out_file = self.makeOutFileName(self.paired[2*p_index]) 191 if out_file in self.outFiles: 192 raise InvalidParameterSetException( \ 193 'Output filename: %s for read set (Paired: %s %s) '\ 194 'conflicts with previously calculated filename for ' \ 195 'read set %s', 196 out_file, 197 self.paired[2*p_index], 198 self.paired[2*p_index+1], 199 self.outFiles[out_file]) 200 201 self.outFiles[out_file] = "(Paired: %s %s)" % \ 202 (self.paired[2*p_index], self.paired[2*p_index+1]) 203 BM = BamMaker(self.database, 204 self.alignmentAlgorithm, 205 self.indexAlgorithm, 206 out_file, 207 self.paired[2*p_index], 208 readFile2=self.paired[2*p_index+1], 209 interleaved=False, 210 singleEnded=False, 211 keptFiles=True, # always keep these 212 keepFiles=True, 213 outputTam=self.outputTam, 214 numThreads=self.numThreads, 215 maxMemory=self.maxMemory, 216 forceOverwriting=self.forceOverwriting, 217 extraArguments=self.extraArguments, 218 quiet=self.quiet, 219 silent=self.silent, 220 showCommands=self.showCommands 221 ) 222 self.BMs.append(BM) 223 224 # interleaved next 225 for file in self.interleaved: 226 out_file = self.makeOutFileName(file) 227 if out_file in self.outFiles: 228 raise InvalidParameterSetException( \ 229 'Output filename: %s for read set (Interleaved: %s) ' \ 230 'conflicts with previously calculated filename for ' \ 231 'read set %s', 232 out_file, 233 file, 234 self.outFiles[out_file]) 235 236 self.outFiles[out_file] = "(Interleaved: %s)" % (file) 237 BM = BamMaker(self.database, 238 self.alignmentAlgorithm, 239 self.indexAlgorithm, 240 out_file, 241 file, 242 interleaved=True, 243 singleEnded=False, 244 keptFiles=True, 245 keepFiles=True, 246 outputTam=self.outputTam, 247 numThreads=self.numThreads, 248 maxMemory=self.maxMemory, 249 forceOverwriting=self.forceOverwriting, 250 extraArguments=self.extraArguments, 251 quiet=self.quiet, 252 silent=self.silent, 253 showCommands=self.showCommands 254 ) 255 self.BMs.append(BM) 256 257 # singletons last 258 for file in self.singleEnded: 259 out_file = self.makeOutFileName(file) 260 if out_file in self.outFiles: 261 raise InvalidParameterSetException( \ 262 'Output filename: %s for read set (Single: %s) conflicts ' \ 263 'with previously calculated filename for read set %s', 264 out_file, 265 file, 266 self.outFiles[out_file]) 267 268 self.outFiles[out_file] = "(Single: %s)" % (file) 269 BM = BamMaker(self.database, 270 self.alignmentAlgorithm, 271 self.indexAlgorithm, 272 out_file, 273 file, 274 interleaved=False, 275 singleEnded=True, 276 keptFiles=True, 277 keepFiles=True, 278 outputTam=self.outputTam, 279 numThreads=self.numThreads, 280 maxMemory=self.maxMemory, 281 forceOverwriting=self.forceOverwriting, 282 extraArguments=self.extraArguments, 283 quiet=self.quiet, 284 silent=self.silent, 285 showCommands=self.showCommands 286 ) 287 self.BMs.append(BM) 288 289 # we've made it this far. Lets tell the user what we intend to do 290 if self.showCommands and not self.silent: 291 for BM in self.BMs: 292 print BM 293 sys.stdout.flush() 294 print "-------------------------------------------" 295 sys.stdout.flush()
296
297 - def makeBams(self):
298 '''sequentially make the bam files 299 300 Inputs: 301 None 302 303 Outputs: 304 None 305 ''' 306 for BM in self.BMs: 307 BM.makeBam()
308
309 - def stripFaCrud(self, fileName):
310 # strip off the ".fa, .fa.gz etc from the end of the reads file 311 out_file_name = fileName.replace(".fasta.gz","") \ 312 .replace(".fa.gz","") \ 313 .replace(".fq.gz","") \ 314 .replace(".fastq.gz","") \ 315 .replace(".fna.gz","") 316 317 out_file_name = out_file_name.replace(".fasta","") \ 318 .replace(".fa","") \ 319 .replace(".fq","") \ 320 .replace(".fastq","") \ 321 .replace(".fna","") 322 return out_file_name
323
324 - def makeOutFileName(self, readsFile):
325 '''Consistent way to make output file name prefixes 326 327 Inputs: 328 readsFile - path-free read file name 329 330 Outputs: 331 pretty prefix 332 ''' 333 self.makeSurePathExists(self.outFolder) 334 return os.path.join(self.outFolder, 335 self.prefix + self.dbBaseName + '.' + \ 336 os.path.basename(self.stripFaCrud(readsFile)))
337
338 - def makeSurePathExists(self, path):
339 '''Make sure that a path exists, make it if necessary 340 341 Inputs: 342 path - string, full or relative path to create 343 344 Outputs: 345 None 346 ''' 347 try: 348 os.makedirs(path) 349 except OSError as exception: 350 import errno 351 if exception.errno != errno.EEXIST: 352 raise
353 354 ############################################################################### 355 ############################################################################### 356 ############################################################################### 357 ############################################################################### 358
359 -class BamMaker:
360 '''Class that wraps bwa and samtools. Can call shell 361 commands and make BAM and TAM files 362 363 Not threaded, use the BamSceduler to start multiple BAM runs in parallel 364 '''
365 - def __init__(self, 366 database, 367 alignmentAlgorithm, 368 indexAlgorithm, 369 outFileName, 370 readFile1, 371 readFile2=None, 372 interleaved=False, 373 singleEnded=False, 374 keptFiles=False, 375 keepFiles=False, 376 outputTam=False, 377 numThreads=1, 378 maxMemory=None, 379 forceOverwriting=False, 380 extraArguments='', 381 showCommands=False, 382 quiet=False, 383 silent=False 384 ):
385 '''Default constructor. 386 387 Initializes a BamMaker instance with the provided set of properties. 388 389 Inputs: 390 database - full path to fasta file of contigs (may be gzipped), 391 alignmentAlgorithm - one of BWA's alignment algorithms, 392 indexAlgorithm - one of BWA's index algorithms, 393 outFileName - string, what to name the BAM 394 readFile1 - string, full path to 1st read file 395 readFile2 - string, full path to 2nd read file or None 396 interleaved - == True -> readFile1 is interleaved pairs 397 singleEnded - == True -> readFile1 is single ended reads 398 keptFiles - == True -> indexes for the db already exist, 399 keepFiles - == True -> don't delete indexes at the end, 400 outputTam - == True -> you love text files to bits, 401 numThreads - int, the maximum number of threads to use 402 maxMemory - string, maximum memory program will use (samtools style) 403 forceOverwriting - == True -> force overwriting index files, 404 extraArguments - string, extra args to pass to BWA 405 showCommands - == True -> show all commands being run 406 quiet - == True -> suppress output from the mapper 407 silent - == True -> suppress all output 408 409 Outputs: 410 None 411 ''' 412 self.database = database 413 self.readFile1 = readFile1 414 self.readFile2 = readFile2 415 self.outFileName = outFileName 416 self.outputTam = outputTam 417 self.forceOverwriting = forceOverwriting 418 self.quiet = quiet 419 self.showCommands = showCommands 420 self.silent = silent 421 422 self.errorOutput = '' 423 if self.quiet or self.silent: 424 self.errorOutput = '2> /dev/null' 425 426 if self.database is None or \ 427 self.readFile1 is None or \ 428 self.outFileName is None: 429 raise InvalidParameterSetException( \ 430 'You need to specify a multiple fasta file (database), ' \ 431 'at least one read file and an output file') 432 433 self.isInterleaved = interleaved 434 self.isSingleEnded = singleEnded 435 436 self.alignmentAlgorithm = alignmentAlgorithm 437 self.indexAlgorithm = indexAlgorithm 438 439 self.keptFiles = keptFiles 440 self.keepFiles = keepFiles 441 442 self.numThreads = int(numThreads) 443 self.maxMemory = maxMemory 444 if self.maxMemory is None: 445 # default to 2GBs per number of threads 446 self.maxMemory = str(self.numThreads*2)+'G' 447 448 # handle extra arguments 449 self.extraArguments = {} 450 if extraArguments != '': 451 # we have some! 452 for e_arg in extraArguments.split(','): 453 fields = e_arg.split(":") 454 self.extraArguments[fields[0]] = fields[1] 455 456 # now extra arguments looks like: 457 # {} 458 # or 459 # {mode : args, mode : args ... } 460 461 # intermediate files used during BAM production 462 self.sai1 = None 463 self.sai2 = None 464 465 # make sure the file names are purdy 466 if self.outputTam: 467 if self.alignmentAlgorithm == 'mem': 468 raise InvalidParameterSetException( \ 469 'Sorry, tam output file format for bwa-mem is not ' \ 470 'supported at this time\n(though it relatively easy ' \ 471 'to implement)') 472 473 if not (self.outFileName.endswith('.sam') or \ 474 self.outFileName.endswith('.tam')): 475 self.outFileName += '.tam' 476 else: 477 # bam output 478 if self.outFileName.endswith('.bam'): 479 # samtools renames the output file with .bam already 480 self.outFileName = self.outFileName[:-4] 481 482 483 # make sure the parameters make sense 484 485 # we know we have a database and one read file 486 if self.isSingleEnded: 487 # single ended! 488 if self.isInterleaved: 489 raise InvalidParameterSetException( \ 490 'The interleaved option is incompatible ' \ 491 'with single ended alignment') 492 493 elif self.readFile2 is not None: 494 raise InvalidParameterSetException( \ 495 'Two read files specified for a single ended alignment') 496 else: 497 # paired reads 498 if self.readFile2 is None: 499 # only OK if interleaved is set 500 if self.isInterleaved is False: 501 raise InvalidParameterSetException( \ 502 'You must specify two read files and a database ' \ 503 'or one read file with the interleaved flag for a ' \ 504 'paired alignment or explicitly use the single ' \ 505 'ended flag') 506 507 elif self.alignmentAlgorithm=='aln': 508 raise InvalidParameterSetException( \ 509 'You cannot use the "aln" algorithm '\ 510 'with interleaved reads') 511 512 # else we have -d, -1 and -2 --> OK 513 514 # last sanity check 515 if (self.keptFiles is False and checkForDatabase(self.database)) and \ 516 not self.forceOverwriting: 517 raise InvalidParameterSetException(\ 518 "You didn't specify that index files have been kept but " \ 519 "there appears to be bwa index files present.\nI'm cowardly " \ 520 "refusing to run so as not to risk overwriting.\n" \ 521 "Force overwriting to create new indices")
522 523 # OK, we know what we're doing 524 525 #--------------------------------------------------------------- 526 # database management 527
528 - def makeDatabase(self):
529 ''' call BWA make index command 530 531 Inputs: 532 None 533 534 Outputs: 535 None 536 ''' 537 try: 538 e_args = self.extraArguments['index'] 539 except KeyError: 540 e_args = '' 541 542 if not self.silent: 543 sys.stderr.write('making database'+"\n") 544 sys.stderr.flush 545 if self.indexAlgorithm is None: 546 cmd = ' '.join(['bwa index', 547 e_args, 548 self.database, 549 self.errorOutput]) 550 else: 551 cmd = ' '.join(['bwa index -a', 552 self.indexAlgorithm, 553 e_args, 554 self.database, 555 self.errorOutput]) 556 557 if self.showCommands and not self.silent: 558 print cmd 559 sys.stdout.flush() 560 subprocess.check_call(cmd, shell=True)
561
562 - def removeDatabase(self):
563 ''''Remove any index files that are no longer needed 564 565 Inputs: 566 None 567 568 Outputs: 569 None 570 ''' 571 if not self.silent: 572 sys.stderr.write('deleting indices'+"\n") 573 sys.stderr.flush 574 self.safeRemove(self.database+'.amb') 575 self.safeRemove(self.database+'.ann') 576 self.safeRemove(self.database+'.bwt') 577 self.safeRemove(self.database+'.pac') 578 self.safeRemove(self.database+'.rbwt') 579 self.safeRemove(self.database+'.rpac') 580 self.safeRemove(self.database+'.rsa') 581 self.safeRemove(self.database+'.sa')
582 583 #--------------------------------------------------------------- 584 # main wrapper 585
586 - def makeBam(self):
587 '''Use BWA ans samtools to make a BAM/TAM file 588 589 Inputs: 590 None 591 592 Outputs: 593 None 594 ''' 595 # run the actual alignment 596 if self.alignmentAlgorithm == 'bwasw': 597 598 if self.outputTam: 599 self.bwasw() 600 else: 601 self.bwasw_to_sorted_indexed_bam() 602 603 elif self.alignmentAlgorithm == 'aln': 604 605 self.sai1 = tempfile.mkstemp(suffix='.sai')[1] 606 self.sai2 = tempfile.mkstemp(suffix='.sai')[1] 607 608 # always do the first read 609 self.aln(self.readFile1, self.sai1) 610 if self.isSingleEnded: 611 if self.outputTam: 612 self.samse() 613 else: 614 self.samse_to_sorted_indexed_bam() 615 else: 616 self.aln(self.readFile2, self.sai2) 617 if self.outputTam: 618 self.sampe() 619 else: 620 self.sampe_to_sorted_indexed_bam() 621 622 self.safeRemove(self.sai1) 623 self.safeRemove(self.sai2) 624 625 else: 626 # algorithm is mem 627 if self.isSingleEnded: 628 self.mem_single_to_sorted_indexed_bam() 629 else: 630 self.mem_to_sorted_indexed_bam()
631 632 #--------------------------------------------------------------- 633 # aln algorithm 634
635 - def aln(self, readFile, saiFile):
636 '''call bwa aln 637 638 Inputs: 639 readFile - full path to reads file 640 saiFile - full path to corresponding sai file 641 642 Outputs: 643 None 644 ''' 645 try: 646 e_args = self.extraArguments['aln'] 647 except KeyError: 648 e_args = '' 649 cmd = ' '.join(['bwa aln -t', 650 str(self.numThreads), 651 e_args, 652 self.database, 653 readFile, 654 '>', 655 saiFile, 656 self.errorOutput]) 657 658 if self.showCommands and not self.silent: 659 print cmd 660 sys.stdout.flush() 661 subprocess.check_call(cmd, shell=True)
662
663 - def sampe(self):
664 '''call bwa sampe 665 666 Inputs: 667 None 668 669 Outputs: 670 None 671 ''' 672 try: 673 e_args = self.extraArguments['sampe'] 674 except KeyError: 675 e_args = '' 676 cmd = ' '.join(['bwa sampe', 677 e_args, 678 self.database, 679 self.sai1, 680 self.sai2, 681 self.readFile1, 682 self.readFile2, 683 '>', 684 self.outFileName, 685 self.errorOutput]) 686 687 if self.showCommands and not self.silent: 688 print cmd 689 sys.stdout.flush() 690 subprocess.check_call(cmd, shell=True)
691
692 - def samse(self):
693 '''call bwa samse 694 695 Inputs: 696 None 697 698 Outputs: 699 None 700 ''' 701 try: 702 e_args = self.extraArguments['samse'] 703 except KeyError: 704 e_args = '' 705 cmd = ' '.join(['bwa samse', 706 e_args, 707 self.database, 708 self.sai1, 709 self.readFile1, 710 '>', 711 self.outFileName, 712 self.errorOutput]) 713 714 if self.showCommands and not self.silent: 715 print cmd 716 sys.stdout.flush() 717 subprocess.check_call(cmd, shell=True)
718
720 '''call bwa sampe and sort + index the result 721 722 Inputs: 723 None 724 725 Outputs: 726 None 727 ''' 728 try: 729 e_args = self.extraArguments['sampe'] 730 except KeyError: 731 e_args = '' 732 cmd = ' '.join(['bwa sampe', 733 e_args, 734 self.database, 735 self.sai1, 736 self.sai2, 737 self.readFile1, 738 self.readFile2]) 739 740 cmd += ' '.join([' | samtools view -SubhF 4 -', 741 self.errorOutput, 742 '| samtools sort -m', 743 self.maxMemory, 744 '-', 745 self.outFileName, 746 self.errorOutput]) 747 748 if self.showCommands and not self.silent: 749 print cmd 750 sys.stdout.flush() 751 subprocess.check_call(cmd, shell=True) 752 self.samtoolsIndex(self.outFileName)
753
755 '''call bwa samse and sort + index the result 756 757 Inputs: 758 None 759 760 Outputs: 761 None 762 ''' 763 try: 764 e_args = self.extraArguments['samse'] 765 except KeyError: 766 e_args = '' 767 cmd = ' '.join(['bwa samse', 768 e_args, 769 self.database, 770 self.sai1, 771 self.readFile1]) 772 773 cmd += ' '.join([' | samtools view -SubhF 4 -', 774 self.errorOutput, 775 '| samtools sort -m', 776 self.maxMemory, 777 '-', 778 self.outFileName, 779 self.errorOutput]) 780 781 if self.showCommands and not self.silent: 782 print cmd 783 sys.stdout.flush() 784 subprocess.check_call(cmd, shell=True) 785 786 self.samtoolsIndex(self.outFileName)
787 788 #--------------------------------------------------------------- 789 # mem algorithm 790
792 ''' call bwa mem mapping with single ended reads and sort + index 793 794 Inputs: 795 None 796 797 Outputs: 798 None 799 ''' 800 try: 801 e_args = self.extraArguments['mem'] 802 except KeyError: 803 e_args = '' 804 cmd = ' '.join(['bwa mem -t', 805 str(self.numThreads), 806 e_args, 807 self.database, 808 self.readFile1, 809 self.errorOutput]) 810 811 cmd += ' '.join([' | samtools view -SubhF 4 -', 812 self.errorOutput, 813 ' | samtools sort -m', 814 self.maxMemory, 815 '-', 816 self.outFileName, 817 self.errorOutput]) 818 819 if self.showCommands and not self.silent: 820 print cmd 821 sys.stdout.flush() 822 subprocess.check_call(cmd, shell=True) 823 824 self.samtoolsIndex(self.outFileName)
825
826 - def mem_to_sorted_indexed_bam(self):
827 ''' call bwa mem mapping with paired reads and sort + index the result 828 829 Assume -p for bwa if reads2 is None, otherwise specify reads1 and reads2 830 831 Inputs: 832 None 833 834 Outputs: 835 None 836 ''' 837 try: 838 e_args = self.extraArguments['mem'] 839 except KeyError: 840 e_args = '' 841 bwa_cmd = ' '.join(['bwa mem -t', 842 str(self.numThreads), 843 e_args, 844 self.database, 845 self.errorOutput, 846 '']) 847 if self.isInterleaved: 848 bwa_cmd += ' '.join(['-p',self.readFile1]) 849 else: 850 bwa_cmd += ' '.join([self.readFile1,self.readFile2]) 851 852 cmd = bwa_cmd + ' '.join([' | samtools view -SubhF 4 -', 853 self.errorOutput, 854 ' | samtools sort -m', 855 self.maxMemory, 856 '-', 857 self.outFileName, 858 self.errorOutput]) 859 860 if self.showCommands and not self.silent: 861 print cmd 862 sys.stdout.flush() 863 subprocess.check_call(cmd, shell=True) 864 self.samtoolsIndex(self.outFileName)
865 866 #--------------------------------------------------------------- 867 # bwasw algorithm 868
869 - def bwasw(self):
870 '''call bwasw 871 872 Inputs: 873 None 874 875 Outputs: 876 None 877 ''' 878 try: 879 e_args = self.extraArguments['bwasw'] 880 except KeyError: 881 e_args = '' 882 if self.isSingleEnded: 883 cmd = ' '.join(['bwa bwasw -t', 884 str(self.numThreads), 885 e_args, 886 self.database, 887 self.readFile1, 888 '>', 889 self.outFileName, 890 self.errorOutput]) 891 else: 892 cmd = ' '.join(['bwa bwasw -t', 893 str(self.numThreads), 894 e_args, 895 self.database, 896 self.readFile1, 897 self.readFile2, 898 '>', 899 self.outFileName, 900 self.errorOutput]) 901 902 if self.showCommands and not self.silent: 903 print cmd 904 sys.stdout.flush() 905 subprocess.check_call(cmd, shell=True)
906
908 '''call bwasw and sort and index the result 909 910 Inputs: 911 None 912 913 Outputs: 914 None 915 ''' 916 try: 917 e_args = self.extraArguments['bwasw'] 918 except KeyError: 919 e_args = '' 920 cmd = ' '.join(['bwa bwasw -t', 921 str(self.numThreads), 922 e_args, 923 self.database, 924 self.readFile1]) 925 926 if not self.isSingleEnded: 927 cmd += ' ' + self.readFile2 928 929 cmd += ' '.join([' | samtools view -SubhF 4 -', 930 self.errorOutput, 931 '| samtools sort -m', 932 self.maxMemory, 933 '-', 934 self.outFileName, 935 self.errorOutput]) 936 937 if self.showCommands and not self.silent: 938 print cmd 939 sys.stdout.flush() 940 subprocess.check_call(cmd, shell=True) 941 942 self.samtoolsIndex(self.outFileName)
943 944 #--------------------------------------------------------------- 945 # index a bam file 946
947 - def samtoolsIndex(self, sortedBamFile):
948 '''call samtools index on a sorted BAM file 949 950 Inputs: 951 sortedBamFile - full path to sorted bam file 952 953 Outputs: 954 None 955 ''' 956 # samtools index cannot be piped, so a tmpfile is required 957 cmd = ' '.join(['samtools index', 958 sortedBamFile+'.bam', 959 self.errorOutput]) 960 if self.showCommands and not self.silent: 961 print cmd 962 sys.stdout.flush() 963 subprocess.check_call(cmd, shell=True)
964 965 #--------------------------------------------------------------- 966 # utilities 967
968 - def safeRemove(self, fileName):
969 '''Delete a file without raising an exception 970 971 Inputs: 972 fileName - full path to file to be removed 973 974 Outputs: 975 None 976 ''' 977 if os.path.isfile(fileName): 978 if self.showCommands and not self.silent: 979 print 'rm ' + fileName 980 sys.stdout.flush() 981 os.system('rm ' + fileName)
982
983 - def __str__(self):
984 '''Print out the operations and outputs that will be made 985 986 Used when the scheduler is in showCommands mode 987 988 Inputs: 989 None 990 991 Outputs: 992 None 993 ''' 994 str = "-------------------------------------------\n" 995 if self.isSingleEnded: 996 str += " Input: Single ended\n" 997 str += " %s\n" % self.readFile1 998 elif self.isInterleaved: 999 str += " Input: Interleaved\n" 1000 str += " %s\n" % self.readFile1 1001 else: 1002 str += " Input: Paired\n" 1003 str += " %s, %s\n" % (self.readFile1, self.readFile2) 1004 1005 if self.outputTam: 1006 suffix = "" 1007 else: 1008 suffix = ".bam (sorted + indexed)" 1009 1010 str += " Database: %s\n Output: %s%s\n Threads: %s\n" % \ 1011 (self.database, self.outFileName, suffix, self.numThreads) 1012 str += " Alignment algorithm: %s" % self.alignmentAlgorithm 1013 return str
1014 1015 ############################################################################### 1016 ############################################################################### 1017 ############################################################################### 1018 ############################################################################### 1019