1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 __author__ = "Michael Imelfort"
28 __copyright__ = "Copyright 2014"
29 __credits__ = ["Michael Imelfort"]
30 __license__ = "LGPLv3"
31 __maintainer__ = "Michael Imelfort"
32 __email__ = "mike@mikeimelfort.com"
33
34
35
36
37 import ctypes as c
38 import numpy as np
39 import sys
40
41
42 from bamLink import *
43 from cWrapper import *
44
45
46
47
48
49
50
52 '''Container class for storing the type of coverage to calculate'''
53
54 - def __init__(self,
55 cType,
56 cUpper,
57 cLower):
58 '''
59 Default constructor.
60
61 Inputs:
62 cType - enum, from the CT enum (see cWrapper.py)
63 cUpper - float, percent of coverage values or number of stdevs. used
64 to determine upper cutoff when calculating coverage values
65 cLower - float, percent of coverage values or number of stdevs. used
66 to determine lower cutoff when calculating coverage values
67
68 Outputs:
69 None
70
71 '''
72 self.cType = cType
73 self.cUpper = cUpper
74 self.cLower = cLower
75
76
77
78
79
80
81
83 ''' Container class for Storing information about the inserts
84 and orientation types of a BAM file'''
85
86 - def __init__(self,
87 orientationType = OT.NONE,
88 insertSize = 0.,
89 insertStdev = 0.,
90 supporting = 0):
91 '''
92 Default constructor.
93
94 Inputs:
95 orientationType - enum, from the OT enum (see cWrapper.py)
96 insertSize - float, estimated mean template size for the library
97 insertStdev - float, estimated stdev of the template size
98 supporting - int, number or reads used to define stats
99
100 Outputs:
101 None
102
103 '''
104 self.orientationType = orientationType
105 self.insertSize = insertSize
106 self.insertStdev = insertStdev
107 self.supporting = supporting
108
115
116
117
118
119
120
122 '''Container class for storing information about a BAM file'''
123 - def __init__(self,
124 bid,
125 fileName):
126 '''
127 Default constructor.
128
129 Inputs:
130 bid - unique identifier for the bameFile
131 fileName - long fileName
132
133 Outputs:
134 None
135 '''
136 self.bid = bid
137 self.fileName = fileName
138 self.types = []
139
141 '''override basic string function'''
142 return "\n".join(["%s\t%s" % (self.fileName, type)
143 for type in self.types])
144
145
146
147
148
149
151 '''High level management of BamFiles, contigs and associated
152 mapping statistics'''
153
154 - def __init__(self,
155 coverages,
156 contigLengths,
157 numBams,
158 numContigs,
159 contigNames,
160 bamFiles,
161 links):
162 '''
163 Default constructor.
164
165 Inputs:
166 coverages - (C x B) array of floats; C = num contigs and B = num bams
167 contigLengths -(C) array of ints storing contig lengths
168 numBams - The number of BAMs the instance is managing ( == B )
169 numContigs - The number of unique contigs across all BAMs ( == C )
170 contigNames - (C) array of strings storing long contig names
171 bamFiles - (B) array of BM_BamFile instances
172 links - { uid, BM_linkPair }, all links joining the contigs, uid made
173 by BM_BM_linkPair.makeKey()
174
175 Outputs:
176 None
177 '''
178 self.coverages = np.array(coverages)
179 self.contigLengths = np.array(contigLengths)
180 self.numBams = numBams
181 self.numContigs = numContigs
182 self.contigNames = contigNames
183 self.bamFiles = bamFiles
184 self.links = links
185
187 '''Merge another BFI's internals with this one
188
189 You should destory the merged BFI after merging to avoid
190 duplication of results
191
192 Inputs:
193 BFIb, BM_fileInfo, that will be merged
194
195 Outputs:
196 None
197 '''
198 if len(self.coverages) > 0:
199 tmp = np.zeros((self.numContigs, (self.numBams+1)))
200 tmp[:,:-1] = self.coverages
201 try:
202 tmp[:,-1] = np.reshape(BFIb.coverages, (1, self.numContigs))
203 except ValueError:
204 print "Error combining results from different BAMs. Are you " \
205 "sure they're from the same mapping?"
206 raise
207 self.coverages = tmp
208
209 self.numBams += BFIb.numBams
210 for BF in BFIb.bamFiles:
211 self.bamFiles.append(BF)
212
213 for key in BFIb.links.keys():
214 try:
215 (self.links[key]).links += (BFIb.links[key]).links
216 except KeyError:
217 self.links[key] = BFIb.links[key]
218
220 ''' Write bam type information to the given filehandle or stdout
221
222 used for printing output to types file
223
224 Inputs:
225 fileHandle - open FILE, == None implies writing to stdout
226
227 Outputs:
228 None
229 '''
230 if fileHandle is None:
231 fileHandle = sys.stdout
232
233 fileHandle.write("%s\n" % "\t".join(["#file",
234 "insert",
235 "stdev",
236 "orientation",
237 "supporting"])
238 )
239
240 fileHandle.write("%s\n" % "\n".join([str(BF) for BF in self.bamFiles]))
241
243 '''Write coverage profile information to the given filehandle or stdout
244
245 used for printing output to types file
246
247 Inputs:
248 fileHandle - open FILE, == None implies writing to stdout
249
250 Outputs:
251 None
252 '''
253 if fileHandle is None:
254 fileHandle = sys.stdout
255
256 fileHandle.write("#contig\tLength\t%s\n" % \
257 "\t".join([self.bamFiles[i].fileName for i in range(self.numBams)]))
258
259 fileHandle.write("%s\n" % "\n".join(["%s\t%d\t" % \
260 (self.contigNames[i],
261 self.contigLengths[i]) +
262 "\t".join(["%0.4f" % self.coverages[i,j]
263 for j in range(self.numBams)])
264 for i in range(self.numContigs)]))
265
266 - def printLinks(self, bamFileNames, fileHandle=None):
267 '''Write contig linking information to the given filehandle or stdout
268
269 used for printing output to types file
270
271 Inputs:
272 bamFileNames - { bamId : string }, storage for long bam file names
273 fileHandle - open FILE, == None implies writing to stdout
274
275 Outputs:
276 None
277 '''
278 if fileHandle is None:
279 fileHandle = sys.stdout
280 if len(self.links) != 0:
281
282 fileHandle.write("%s\n" % "\t".join(["#cid_1",
283 "cid_2",
284 "len_1",
285 "pos_1",
286 "rev_1",
287 "len_2",
288 "pos_2",
289 "rev_2",
290 "file"]))
291
292 fileHandle.write("%s\n" % \
293 "\n".join([self.links[key].printMore(self.contigNames,
294 self.contigLengths,
295 bamFileNames)
296 for key in self.links.keys()]))
297
299 '''Override basic string function
300
301 calls printBamTypes, printCoverages and printLinks
302 '''
303 retstr = self.printBamTypes(returnStr = True) + "\n"
304 retstr += self.printCoverages(returnStr = True)
305 if len(self.links) != 0:
306 retstr += self.printLinks(returnStr = True)
307 return retstr
308
309
310
311
312
313