1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 __author__ = "Michael Imelfort"
28 __copyright__ = "Copyright 2014"
29 __credits__ = ["Michael Imelfort"]
30 __license__ = "LGPLv3"
31 __maintainer__ = "Michael Imelfort"
32 __email__ = "mike@mikeimelfort.com"
33
34
35
36 import os
37 import ctypes as c
38
39 from bamm.bammExceptions import *
40
41
42
43
44
45
46
47 -def enum(*sequential, **named):
48 enums = dict(zip(sequential, range(len(sequential))), **named)
49 return type('Enum', (), enums)
50
51
52
53
54
55
56
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 global RPI
75 RPI = enum('ERROR', 'FIR', 'SEC', 'SNGL_FIR', 'SNGL_SEC', 'SNGL')
76
78 '''Convert an RPI into a human readable string
79
80 Inputs:
81 rpi - RPI to convert
82
83 Outputs:
84 Human readable string
85 '''
86 if rpi == RPI.FIR:
87 return 'First'
88 elif rpi == RPI.SEC:
89 return 'Second'
90 elif rpi == RPI.SNGL_FIR:
91 return 'First_single'
92 elif rpi == RPI.SNGL_SEC:
93 return 'Second_single'
94 elif rpi == RPI.SNGL:
95 return 'Single'
96 return 'ERROR'
97
98
99 RPIConv = {RPI.ERROR:RPI.ERROR,
100 RPI.FIR:RPI.FIR,
101 RPI.SEC:RPI.SEC,
102 RPI.SNGL_FIR:RPI.SNGL,
103 RPI.SNGL_SEC:RPI.SNGL,
104 RPI.SNGL:RPI.SNGL}
105
106
107
108
109
110
111 global MI
112 MI = enum('ER_EM_EG', 'PR_PM_PG', 'PR_PM_UG', 'PR_UM_NG', 'UR_NM_NG')
113
115 '''Convert an MI into a human readable string
116
117 Inputs:
118 mi - MI to convert
119
120 Outputs:
121 Human readable string
122 '''
123 if mi == MI.PR_PG_PM:
124 return 'PR_PG_PM'
125 elif mi == MI.PR_PM_UG:
126 return 'PR_PM_UG'
127 elif mi == MI.PR_UM_NG:
128 return 'PR_UM_NG'
129 elif mi == MI.UR_NM_NG:
130 return 'UR_NM_NG'
131 return 'ER_EM_EG'
132
133
134
135
136
137
140
142 '''
143 typedef struct BM_mappedRead {
144 char * seqId,
145 char * seq,
146 char * qual,
147 uint16_t idLen,
148 uint16_t seqLen,
149 uint16_t qualLen,
150 uint8_t rpi,
151 uint8_t mi,
152 uint16_t group,
153 BM_mappedRead * nextRead,
154 BM_mappedRead * partnerRead,
155 BM_mappedRead * nextPrintingRead
156 } BM_mappedRead;
157 '''
158 _fields_ = [("seqId", c.POINTER(c.c_char)),
159 ("seq", c.POINTER(c.c_char)),
160 ("qual", c.POINTER(c.c_char)),
161 ("idLen", c.c_uint16),
162 ("seqLen", c.c_uint16),
163 ("qualLen", c.c_uint16),
164 ("rpi", c.c_uint8),
165 ("mi", c.c_uint8),
166 ("group", c.c_uint16),
167 ("nextRead",c.POINTER(BM_mappedRead_C)),
168 ("partnerRead",c.POINTER(BM_mappedRead_C)),
169 ("nextPrintingRead",c.POINTER(BM_mappedRead_C))
170 ]
171
172
173
174
175
176
179
181 '''
182 typedef struct {
183 uint16_t reversed1;
184 uint16_t reversed2;
185 uint16_t readLength1;
186 uint16_t readLength2;
187 uint32_t pos1;
188 uint32_t pos2;
189 uint32_t bam_ID;
190 struct BM_linkInfo * nextLink;
191 } BM_linkInfo;
192 '''
193 _fields_ = [("reversed1", c.c_uint16),
194 ("reversed2", c.c_uint16),
195 ("readLength1", c.c_uint16),
196 ("readLength2", c.c_uint16),
197 ("pos1", c.c_uint32),
198 ("pos2", c.c_uint32),
199 ("bam_ID", c.c_uint32),
200 ("nextLink",c.POINTER(BM_linkInfo_C))
201 ]
202
203
204
205
206
207
209 '''
210 typedef struct {
211 uint32_t cid1;
212 uint32_t cid2;
213 uint32_t numLinks;
214 BM_linkInfo * LI;
215 } BM_linkPair;
216 '''
217 _fields_ = [("cid1", c.c_uint32),
218 ("cid2", c.c_uint32),
219 ("numLinks", c.c_uint32),
220 ("LI",c.POINTER(BM_linkInfo_C))
221 ]
222
223
224
225
226
227
229 '''
230 typedef struct {
231 char ** keys;
232 size_t keyCount;
233 size_t numKeys;
234 cfuhash_table_t * linkHash;
235 BM_linkPair * pair;
236 BM_linkInfo * LI;
237 } BM_LinkWalker;
238 '''
239 _fields_= [("keys", c.POINTER(c.POINTER(c.c_char))),
240 ("keyCount", c.c_size_t),
241 ("numKeys", c.c_size_t),
242 ("links",c.POINTER(cfuhash_table_t)),
243 ("pair",c.POINTER(BM_linkPair_C)),
244 ("LI",c.POINTER(BM_linkInfo_C))
245 ]
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260 global OT
261 OT = enum('OUT', 'SAME', 'IN', 'NONE', 'ERROR')
262
264
265 '''Convert an orientation type into a human readable string
266
267 Inputs:
268 ot - OT to convert
269
270 Outputs:
271 Human readable string
272 '''
273 if ot == OT.OUT:
274 return 'OUT'
275 if ot == OT.SAME:
276 return 'SAME'
277 if ot == OT.IN:
278 return 'IN'
279 if ot == OT.NONE:
280 return 'NONE'
281 return 'ERROR'
282
283
284
285
286
287
289 '''
290 typedef struct BM_bamType {
291 int orientationType;
292 float insertSize;
293 float insertStdev;
294 int supporting;
295 } BM_bamType;
296 '''
297 _fields_ = [("orientationType", c.c_int),
298 ("insertSize", c.c_float),
299 ("insertStdev", c.c_float),
300 ("supporting", c.c_int)
301 ]
302
303
304
305
306
307
309 '''
310 typedef struct BM_bamFile {
311 char * fileName;
312 uint16_t fileNameLength;
313 BM_bamType ** types;
314 int numTypes;
315 } BM_bamFile;
316 '''
317 _fields_ = [("fileName", c.POINTER(c.c_char)),
318 ("fileNameLength", c.c_uint16),
319 ("types", c.POINTER(c.POINTER(BM_bamType_C))),
320 ("numTypes", c.c_int)
321 ]
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339 global CT
340 CT = enum('NONE',
341 'COUNT',
342 'C_MEAN',
343 'P_MEAN',
344 'P_MEDIAN',
345 'P_MEAN_TRIMMED',
346 'P_MEAN_OUTLIER')
347
349 '''Convert a CT into a human readable string
350
351 Inputs:
352 ct - CT to convert
353
354 Outputs:
355 Human readable string
356 '''
357 if ct == CT.NONE:
358 return 'None'
359 elif ct == CT.COUNT:
360 return 'Read_counts'
361 elif ct == CT.C_MEAN:
362 return 'Mean_read_counts'
363 elif ct == CT.P_MEAN:
364 return 'Mean_pileup_depth'
365 elif ct == CT.P_MEDIAN:
366 return 'Median_pileup_depth'
367 elif ct == CT.P_MEAN_TRIMMED:
368 return 'Mean_trimmed_pileup_depth'
369 elif ct == CT.P_MEAN_OUTLIER:
370 return 'Mean_outlier_pileup_depth'
371
372
373
374
375
376
378 '''
379 typedef struct BM_coverageType {
380 CT type;
381 float upperCut;
382 float lowerCut;
383 } BM_coverageType;
384 '''
385 _fields_ = [("type", c.c_int),
386 ("upperCut", c.c_float),
387 ("lowerCut", c.c_float)
388 ]
389
390
391
392
393
394
396 '''
397 typedef struct BM_fileInfo {
398 float ** coverages;
399 uint32_t * contigLengths;
400 uint32_t numBams;
401 uint32_t numContigs;
402 BM_bamFile ** bamFiles;
403 char ** contigNames;
404 uint16_t * contigNameLengths;
405 int isLinks;
406 BM_coverageType * coverageType;
407 int isIgnoreSupps;
408 cfuhash_table_t * links;
409 } BM_fileInfo;
410 '''
411 _fields_ = [("coverages", c.POINTER(c.POINTER(c.c_float))),
412 ("contigLengths",c.POINTER(c.c_uint32)),
413 ("numBams",c.c_uint32),
414 ("numContigs",c.c_uint32),
415 ("bamFiles",c.POINTER(c.POINTER(BM_bamFile_C))),
416 ("contigNames",c.POINTER(c.POINTER(c.c_char))),
417 ("contigNameLengths",c.POINTER(c.c_uint16)),
418 ("isLinks",c.c_int),
419 ("coverageType",c.POINTER(BM_coverageType_C)),
420 ("isIgnoreSupps",c.c_int),
421 ("links",c.POINTER(cfuhash_table_t))
422 ]
423
424
425
426
427
428
430 ''' Multiprocessing can't pickle cTypes pointers and functions.
431 This class is a hack which quarantines the cTypes functions.
432 '''
434 '''Default constructor.
435
436 Loads libBamM.a and instantiates wrappers to the functions we wish
437 to export
438
439 Inputs:
440 None
441
442 Outputs;
443 None
444 '''
445
446
447
448 package_dir, filename = os.path.split(__file__)
449 package_dir = os.path.abspath(package_dir)
450 if unitTests:
451 c_lib = os.path.join(package_dir, '..', 'c', 'libBamM.a')
452 else:
453 c_lib = os.path.join(package_dir, 'c', 'libBamM.a')
454
455 try:
456 self.libPMBam = c.cdll.LoadLibrary(c_lib)
457 except OSError:
458 printError("Problem importing the BamM c library. This typically " \
459 "means that BamM is not installed correctly.\nPlease check " \
460 "the installation logs for more details.\nIf you don't have "\
461 "the installation logs then please try to reinstall BamM " \
462 "and look at the output.")
463 raise InvalidInstallationException
464
465
466
467
468
469 self._mergeBFI = self.libPMBam.mergeBFIs
470
471
472 self._destroyBFI = self.libPMBam.destroyBFI
473
474
475 self._extractReads = self.libPMBam.extractReads
476 self._extractReads.argtypes = [c.POINTER(c.c_char),
477 c.POINTER(c.c_char_p),
478 c.c_int,
479 c.POINTER(c.c_uint16),
480 c.POINTER(c.c_char),
481 c.c_int,
482 c.c_int,
483 c.c_int,
484 c.c_int,
485 c.c_int]
486 self._extractReads.restype = c.POINTER(BM_mappedRead_C)
487
488
489 self._setMICode = self.libPMBam.setMICode
490 self._setMICode.argTypes = [c.POINTER(BM_mappedRead_C), c.c_int]
491 self._setMICode.restype = None
492
493
494 self._getNextMappedRead = self.libPMBam.getNextMappedRead
495 self._getNextMappedRead.argtypes = [c.POINTER(BM_mappedRead_C)]
496 self._getNextMappedRead.restype = c.POINTER(BM_mappedRead_C)
497
498
499 self._setNextPrintRead = self.libPMBam.setNextPrintRead
500 self._setNextPrintRead.argtypes = [c.POINTER(BM_mappedRead_C),
501 c.POINTER(BM_mappedRead_C)]
502 self._setNextPrintRead.restype = None
503
504
505 self._getNextPrintRead = self.libPMBam.getNextPrintRead
506 self._getNextPrintRead.argtypes = [c.POINTER(BM_mappedRead_C)]
507 self._getNextPrintRead.restype = c.POINTER(BM_mappedRead_C)
508
509
510 self._getPartner = self.libPMBam.getPartner
511 self._getPartner.argtypes = [c.POINTER(BM_mappedRead_C)]
512 self._getPartner.restype = c.POINTER(BM_mappedRead_C)
513
514
515 self._partnerInSameGroup = self.libPMBam.partnerInSameGroup
516
517
518 self._destroyMappedReads = self.libPMBam.destroyMappedReads
519 self._destroyMappedReads.argtypes = [c.POINTER(BM_mappedRead_C)]
520 self._destroyMappedReads.restype = None
521
522
523 self._destroyPrintChain = self.libPMBam.destroyPrintChain
524 self._destroyPrintChain.argtypes = [c.POINTER(BM_mappedRead_C)]
525 self._destroyPrintChain.restype = None
526
527
528 self._printMappedRead = self.libPMBam.printMappedRead
529 self._sprintMappedRead = self.libPMBam.sprintMappedRead
530 self._printMappedReads = self.libPMBam.printMappedReads
531
532
533 self._parseCoverageAndLinks = self.libPMBam.parseCoverageAndLinks
534
535
536 self._initLW = self.libPMBam.initLW
537
538
539 self._stepLW = self.libPMBam.stepLW
540
541
542 self._destroyLW = self.libPMBam.destroyLW
543
544
545 self._printBFI = self.libPMBam.printBFI
546
547
548 self._estimate_COUNT_Coverage = self.libPMBam.estimate_COUNT_Coverage
549 self._estimate_COUNT_Coverage.argtypes = [c.POINTER(c.c_uint32), c.POINTER(BM_coverageType_C), c.c_uint32]
550 self._estimate_COUNT_Coverage.restype = c.c_float
551
552 self._estimate_C_MEAN_Coverage = self.libPMBam.estimate_C_MEAN_Coverage
553 self._estimate_C_MEAN_Coverage.argtypes = [c.POINTER(c.c_uint32), c.POINTER(BM_coverageType_C), c.c_uint32]
554 self._estimate_C_MEAN_Coverage.restype = c.c_float
555
556 self._estimate_P_MEAN_Coverage = self.libPMBam.estimate_P_MEAN_Coverage
557 self._estimate_P_MEAN_Coverage.argtypes = [c.POINTER(c.c_uint32), c.POINTER(BM_coverageType_C), c.c_uint32]
558 self._estimate_P_MEAN_Coverage.restype = c.c_float
559
560 self._estimate_P_MEDIAN_Coverage = self.libPMBam.estimate_P_MEDIAN_Coverage
561 self._estimate_P_MEDIAN_Coverage.argtypes = [c.POINTER(c.c_uint32), c.POINTER(BM_coverageType_C), c.c_uint32]
562 self._estimate_P_MEDIAN_Coverage.restype = c.c_float
563
564 self._estimate_P_MEAN_TRIMMED_Coverage = self.libPMBam.estimate_P_MEAN_TRIMMED_Coverage
565 self._estimate_P_MEAN_TRIMMED_Coverage.argtypes = [c.POINTER(c.c_uint32), c.POINTER(BM_coverageType_C), c.c_uint32]
566 self._estimate_P_MEAN_TRIMMED_Coverage.restype = c.c_float
567
568 self._estimate_P_MEAN_OUTLIER_Coverage = self.libPMBam.estimate_P_MEAN_OUTLIER_Coverage
569 self._estimate_P_MEAN_OUTLIER_Coverage.argtypes = [c.POINTER(c.c_uint32), c.POINTER(BM_coverageType_C), c.c_uint32]
570 self._estimate_P_MEAN_OUTLIER_Coverage.restype = c.c_float
571
572 self._BM_median = self.libPMBam.BM_median
573 self._BM_median.argtypes = [c.POINTER(c.c_uint32), c.c_uint32]
574 self._BM_median.restype = c.c_float
575
576 self._BM_mean = self.libPMBam.BM_mean
577 self._BM_mean.argtypes = [c.POINTER(c.c_uint32), c.c_uint32]
578 self._BM_mean.restype = c.c_float
579
580 self._BM_stdDev = self.libPMBam.BM_stdDev
581 self._BM_stdDev.argtypes = [c.POINTER(c.c_uint32), c.c_uint32, c.c_float]
582 self._BM_stdDev.restype = c.c_float
583
584
585
586
587
588
589
590