blob: 5f629f3a4494e162a43c5e3a6613be423f47b08a [file] [log] [blame]
Wei Wangdd6a06c2018-03-29 14:54:18 -07001#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""Analyze ext4 trace with custom open trace"""
18import collections
19import math
20import os
21import re
22import string
23import sys
24
25DBG = False
26DBG_ISSUE = False
27
28# hard coded maps to detect partition for given device or the other way around
29# this can be different per each device. This works for marlin.
30DEVICE_TO_PARTITION = { "253,0": "/system/", "253,1": "/vendor/", "259,19": "/data/" }
31PARTITION_TO_DEVICE = {}
32for key, value in DEVICE_TO_PARTITION.iteritems():
33 PARTITION_TO_DEVICE[value] = key
34
35# init-1 [003] .... 2.703964: do_sys_open: init: open("/sys/fs/selinux/null", 131074, 0) fd = 0, inode = 22
36RE_DO_SYS_OPEN = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+do_sys_open:\s+(\S+):\sopen..(\S+).,\s([0-9]+).\s+.+inode\s=\s([0-9]+)"""
37# init-1 [003] ...1 2.703991: ext4_ext_map_blocks_enter: dev 253,0 ino 2719 lblk 154 len 30 flags
38RE_EXT4_MA_BLOCKS_ENTER = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_enter:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\s+lblk\s+([0-9]+)\s+len\s+([0-9]+)"""
39# init-1 [002] ...1 2.687205: ext4_ext_map_blocks_exit: dev 253,0 ino 8 flags lblk 0 pblk 196608 len 1 mflags M ret 1
40RE_EXT4_MA_BLOCKS_EXIT = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_exit:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\sflags.*\slblk\s+([0-9]+)\spblk\s([0-9]+)\slen\s+([0-9]+).*mflags\s(\S*)\sret\s([0-9]+)"""
41# init-1 [002] ...1 2.887119: block_bio_remap: 8,0 R 10010384 + 8 <- (259,18) 3998944
42RE_BLOCK_BIO_REMAP = r""".+block_bio_remap:\s\d+,\d+\s\S+\s(\d+)\s\+\s\d+\s<-\s\([^\)]+\)\s(\d+)"""
43# kworker/u9:1-83 [003] d..2 2.682991: block_rq_issue: 8,0 RA 0 () 10140208 + 32 [kworker/u9:1]
44RE_BLOCK_RQ_ISSUE = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_rq_issue:\s+([0-9]+)\,([0-9]+)\s+([RW]\S*)\s[0-9]+\s\([^\)]*\)\s([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)\]"""
45
46EXT4_SIZE_TO_BLOCK_SIZE = 8 # ext4: 4KB, block device block size: 512B
47
48class FileAccess:
49 def __init__(self, file):
50 self.file = file
51 self.accesses = []
52 self.total_access = 0
53 self.ext4_access_size_histogram = {} #key: read size, value: occurrence
54 self.block_access_size_histogram = {}
55 self.ext4_single_block_accesses = {} # process name, occurrence
56 self.block_single_block_accesses = {} # process name, occurrence
57 self.blocks_histogram = {} # K: offset, V: read counter
58
59 def add_if_single_block(self, container, size, offset, process_name):
60 if size != 1:
61 return
62 offsets = container.get(process_name)
63 if not offsets:
64 offsets = []
65 container[process_name] = offsets
66 offsets.append(offset)
67
68 def add_access(self, time, offset, size, process_name, read_sizes):
69 self.accesses.append((time, offset, size, process_name))
70 self.total_access += size
71 self.ext4_access_size_histogram[size] = self.ext4_access_size_histogram.get(size, 0) + 1
72 read_offset = offset
73 for s in read_sizes:
74 self.block_access_size_histogram[s] = self.block_access_size_histogram.get(s, 0) + 1
75 self.add_if_single_block(self.block_single_block_accesses, s, read_offset, process_name)
76 read_offset += s
77 for i in range(size):
78 self.blocks_histogram[offset + i] = self.blocks_histogram.get(offset + i, 0) + 1
79 self.add_if_single_block(self.ext4_single_block_accesses, size, offset, process_name)
80
81 def add_merged_access(self, time, offsets, lens, process_names):
82 total_read_offsets = set() # each read can overwrap. So count only once for block level counting
83 for i in range(len(offsets)):
84 self.accesses.append((time, offsets[i], lens[i], process_names[i]))
85 self.ext4_access_size_histogram[lens[i]] = self.ext4_access_size_histogram.get(lens[i], 0) + 1
86 self.add_if_single_block(self.ext4_single_block_accesses, lens[i], offsets[i], process_names[i])
87 for j in range(len(lens)):
88 total_read_offsets.add(offsets[i] + j)
89 total_lens = len(total_read_offsets)
90 start_offset = min(total_read_offsets)
91 self.total_access += total_lens
92 self.block_access_size_histogram[total_lens] = self.block_access_size_histogram.get(total_lens, 0) \
93 + 1
94 self.add_if_single_block(self.block_single_block_accesses, total_lens, start_offset, \
95 process_names[0])
96 for s in range(total_lens):
97 self.blocks_histogram[start_offset + s] = self.blocks_histogram.get(start_offset + s, 0) + 1
98
99
100 def dump(self):
101 if len(self.ext4_access_size_histogram) > 1:
102 print " Ext4 access size histograms:", collections.OrderedDict( \
103 sorted(self.ext4_access_size_histogram.items(), key = lambda item: item[0]))
104 if len(self.ext4_single_block_accesses) > 0 and self.total_access > 1:
105 print " Ext4 single block accesses:", collections.OrderedDict( \
106 sorted(self.ext4_single_block_accesses.items(), key = lambda item: item[1], reverse = True))
107 if len(self.block_access_size_histogram) > 1:
108 print " Block access size histograms:", collections.OrderedDict( \
109 sorted(self.block_access_size_histogram.items(), key = lambda item: item[0]))
110 if len(self.block_single_block_accesses) > 0 and self.total_access > 1:
111 print " Block single block accesses:", collections.OrderedDict( \
112 sorted(self.block_single_block_accesses.items(), key = lambda item: item[1], reverse = True))
113 if self.total_access > 1:
114 sorted_blocks_histogram = sorted(self.blocks_histogram.items(), key = lambda item: item[1], \
115 reverse = True)
116 prints = []
117 repeating_reads_counter = 0
118 for entry in sorted_blocks_histogram:
119 offset = entry[0]
120 counter = entry[1]
121 if counter == 1:
122 break
123 prints.append(str(offset) + ":" + str(counter))
124 repeating_reads_counter += (counter - 1)
125 if len(prints) > 0:
126 print " repeating accesses", repeating_reads_counter, " offset:count ->", ','.join(prints)
127
128class FileEvent:
129 def __init__(self, open_time, file_name, process_name, inode, flags):
130 self.file_name = file_name
131 self.inode = inode
132 self.total_open = 1
133 self.processes = []
134 self.processes.append((open_time, process_name, flags))
135 self.read = FileAccess(self)
136 self.write = FileAccess(self)
137
138
139 def add_open(self, open_time, process_name, flags):
140 self.processes.append((open_time, process_name, flags))
141 self.total_open += 1
142
143 def add_access(self, is_read, time, offset, size, process_name, read_sizes):
144 if is_read:
145 self.read.add_access(time, offset, size, process_name, read_sizes)
146 else:
147 self.write.add_access(time, offset, size, process_name, read_sizes)
148
149 def add_merged_access(self, is_read, time, offsets, lens, process_names):
150 if is_read:
151 self.read.add_merged_access(time, offsets, lens, process_names)
152 else:
153 self.write.add_merged_access(time, offsets, lens, process_names)
154
155 def dump(self, name_to_pid_map):
156 print " ***filename %s, total reads %d, total writes %d, total open %d inode %s" \
157 % (self.file_name, self.read.total_access, self.write.total_access, self.total_open,\
158 self.inode)
159 process_names = []
160 for opener in self.processes:
161 process_names.append(opener[1] + "-" + name_to_pid_map.get(opener[1], '?') + " t:" + \
162 str(opener[0]) + " flags:" + opener[2])
163 print " Processes opened this file:", ','.join(process_names)
164 if self.read.total_access > 0:
165 print " ****Reads:"
166 self.read.dump()
167 if self.write.total_access > 0:
168 print " ****Writes:"
169 self.write.dump()
170
171 def dump_short(self):
172 print " filename %s, total reads %d, total writes %d" % (self.file_name,
173 self.read.total_access, self.write.total_access)
174
175class PendingAccess:
176 def __init__(self, process_name, pid, time, dev, inode, lblk, pblk, len, fevent):
177 self.process_name = process_name
178 self.pid = pid
179 self.time = time
180 self.dev = dev
181 self.inode = inode
182 self.lblk = lblk
183 self.pblk = pblk
184 self.blk_len = len * EXT4_SIZE_TO_BLOCK_SIZE
185 self.len = len
186 self.fevent = fevent
187 self.pending_accesses = set()
188 for i in range(len):
189 self.pending_accesses.add(i)
190 self.access_sizes = [] # valid read for this file in block dev level.
191 self.block_access_counter = 0
192
193 def get_valid_access(self, block_offset, block_len):
194 ext4_offset = block_offset / EXT4_SIZE_TO_BLOCK_SIZE
195 if ext4_offset > self.len:
196 return 0, 0
197 ext4_len = block_len / EXT4_SIZE_TO_BLOCK_SIZE
198 if (ext4_offset + ext4_len) > self.len:
199 ext4_len = self.len - ext4_offset
200 return ext4_offset, ext4_len
201
202 def queue_block_access(self, ext4_offset, ext4_len):
203 if ext4_len <= 0:
204 return
205 self.block_access_counter += 1
206 ext4_blocks_accessed = 0
207 for i in range(ext4_len):
208 ext4_block_i = i + ext4_offset
209 if ext4_block_i in self.pending_accesses:
210 ext4_blocks_accessed += 1
211 self.pending_accesses.remove(ext4_block_i)
212 if ext4_blocks_accessed > 0:
213 self.access_sizes.append(ext4_blocks_accessed)
214
215 def handle_req_complete(self, time, is_read):
216 self.fevent.add_access(is_read, self.time, self.lblk, self.len, self.process_name,\
217 self.access_sizes)
218
219 def handle_merged_req(self, time, offsets, lens, names, is_read):
220 self.fevent.add_merged_access(is_read, time, offsets, lens, names)
221
222 def is_req_complete(self):
223 return len(self.pending_accesses) == 0
224
225 def is_req_started(self):
226 return self.len is not len(self.pending_accesses)
227
228class Trace:
229 def __init__(self):
230 self.files_per_device = {} # key: device, value: { key: inode, value; FileEvent }
231 self.re_open = re.compile(RE_DO_SYS_OPEN)
232 self.re_ext4_access = re.compile(RE_EXT4_MA_BLOCKS_EXIT)
233 self.re_bio_remap = re.compile(RE_BLOCK_BIO_REMAP)
234 self.re_block_issue = re.compile(RE_BLOCK_RQ_ISSUE)
235 # req from ext4 that has not gone down to block level yet, K:block address,
236 # V: list of PendingRead
237 self.pending_accesses = {}
238 self.remap = {}
239 self.process_names = {} # K: PID, V : name
240
241 def handle_line(self, line):
242 match = self.re_open.match(line)
243 if match:
244 self.handle_open(match)
245 return
246 match = self.re_ext4_access.match(line)
247 if match:
248 self.handle_ext4_block_exit(match)
249 return
250 match = self.re_bio_remap.match(line)
251 if match:
252 self.handle_bio_remap(match)
253 return
254 match = self.re_block_issue.match(line)
255 if match:
256 self.handle_block_issue(match)
257 return
258
259 def handle_open(self, match):
260 pid = int(match.group(1))
261 time = match.group(2)
262 process_name = match.group(3)
263 file_name = match.group(4)
264 flag = match.group(5)
265 inode = int(match.group(6))
266 dev_name = None
267 self.process_names[pid] = process_name
268 #print "open", pid, process_name, file_name, inode
269 for p in PARTITION_TO_DEVICE:
270 if file_name.startswith(p):
271 dev_name = PARTITION_TO_DEVICE[p]
272 if not dev_name:
273 if DBG:
274 print "Ignore open for file", file_name
275 return
276 files = self.files_per_device[dev_name]
277 fevent = files.get(inode)
278 if not fevent:
279 fevent = FileEvent(time, file_name, process_name, inode, flag)
280 files[inode] = fevent
281 else:
282 fevent.add_open(time, process_name, flag)
283
284 def handle_ext4_block_exit(self, match):
285 process_name = match.group(1)
286 pid = int(match.group(2))
287 time = float(match.group(3))
288 dev = match.group(4)
289 inode = int(match.group(5))
290 lblk = int(match.group(6))
291 pblk = int(match.group(7)) * EXT4_SIZE_TO_BLOCK_SIZE # address in ext4 blocks, ...
292 l = int(match.group(8))
293 mflags = match.group(9)
294 ret = int(match.group(10))
295 if ret <= 0: # no block access
296 return
297 process_name = self.process_names.get(pid, process_name)
298 if process_name == '<...>':
299 process_name = "pid-" + str(pid)
300 if DBG_ISSUE:
301 print "ext4", pblk, l, inode, process_name
302 files = self.files_per_device.get(dev)
303 if not files:
304 if DEVICE_TO_PARTITION.get(dev):
305 files = {}
306 self.files_per_device[dev] = files
307 else:
308 if DBG:
309 print "access ignored for device", dev
310 return
311 fevent = files.get(inode)
312 if not fevent:
313 if DBG:
314 print 'no open for device %s with inode %s' % (dev, inode)
315 fevent = FileEvent(time, "unknown", process_name, inode, "-")
316 files[inode] = fevent
317 pending_access = PendingAccess(process_name, pid, time, dev, inode, lblk, pblk, l,\
318 fevent)
319 access_list = self.pending_accesses.get(pblk, [])
320 access_list.append(pending_access)
321 self.pending_accesses[pblk] = access_list
322
323 def handle_bio_remap(self, match):
324 new_addr = int(match.group(1))
325 old_addr = int(match.group(2))
326 self.remap[new_addr] = old_addr
327 if DBG_ISSUE:
328 print "remap", new_addr, old_addr
329
330 def handle_block_issue(self, match):
331 pid = int(match.group(1))
332 time = float(match.group(2))
333 dev_major = match.group(3)
334 dev_minor = match.group(4)
335 access = match.group(5)
336 new_address = int(match.group(6))
337 l = int(match.group(7))
338 name = match.group(8)
339 name = self.process_names.get(pid, name)
340 if name == '<...>':
341 name = "pid-" + str(pid)
342 is_read = not 'W' in access
343 accesses_per_inodes = {} # K:inodes, V: list of two entries, 1st: offsets, 2nd: length
344 addrs_to_remove = []
345 completed_reqs = []
346 address = self.remap.get(new_address, new_address)
347 if DBG_ISSUE:
348 print "issue", address, l, is_read, access
349 for access_addr, access_list in self.pending_accesses.iteritems():
350 if (address >= access_addr) and (address + l) > access_addr:
351 reqs_to_remove = []
352 for pending in access_list:
353 offset, valid_access_size = pending.get_valid_access(address - access_addr, l)
354 if valid_access_size > 0:
355 if pending.is_req_started(): # spread across multiple reads. complete alone
356 pending.queue_block_access(offset, valid_access_size)
357 if pending.is_req_complete():
358 pending.handle_req_complete(time, is_read)
359 reqs_to_remove.append(pending)
360 else: # possible multiple reads completed in this read. complete them together
361 pending.queue_block_access(offset, valid_access_size)
362 if pending.is_req_complete():
363 reads = accesses_per_inodes.get(pending.inode, [[], [], []])
364 reads[0].append(offset + pending.lblk)
365 reads[1].append(valid_access_size)
366 reads[2].append(pending.process_name)
367 accesses_per_inodes[pending.inode] = reads
368 completed_reqs.append(pending)
369 reqs_to_remove.append(pending)
370 for to_remove in reqs_to_remove:
371 access_list.remove(to_remove)
372 if len(access_list) == 0:
373 addrs_to_remove.append(access_addr)
374 for addr in addrs_to_remove:
375 del self.pending_accesses[addr]
376 for pending in completed_reqs: # these will be reported as batch
377 accesses = accesses_per_inodes.get(pending.inode)
378 if not accesses: # merged one already dispatched
379 continue
380 if len(accesses[0]) == 1:
381 pending.handle_req_complete(time, is_read)
382 else: #merged
383 pending.handle_merged_req(time, accesses[0], accesses[1], accesses[2], is_read)
384 del accesses_per_inodes[pending.inode]
385
386 def dump_partition(self, partition_name, files):
387 name_to_pid_map = {}
388 for pid, name in self.process_names.iteritems():
389 name_to_pid_map[name] = str(pid)
390 print "**Dump partition:", partition_name, "total number of files:", len(files)
391 total_reads = 0
392 total_writes = 0
393 files_sorted_by_read = files.values()
394 files_sorted_by_read.sort(key=lambda f : f.read.total_access, reverse = True)
395 files_sorted_by_write = files.values()
396 files_sorted_by_write.sort(key=lambda f : f.write.total_access, reverse = True)
397 print " Top 10 readers:"
398 for i in range(min(10, len(files_sorted_by_read))):
399 files_sorted_by_read[i].dump_short()
400 print " Top 10 writers:"
401 for i in range(min(10, len(files_sorted_by_write))):
402 files_sorted_by_write[i].dump_short()
403 for f in files_sorted_by_read:
404 f.dump(name_to_pid_map)
405 total_reads += f.read.total_access
406 total_writes += f.write.total_access
407 print " Total reads:", total_reads, " total writes:", total_writes
408 return total_reads, total_writes, len(files)
409
410
411 def dump(self):
412 print "*Dump R/W per each partition"
413 total_reads = 0
414 total_writes = 0
415 summaries = []
416 for d in self.files_per_device:
417 reads, writes, num_files = self.dump_partition(DEVICE_TO_PARTITION[d], \
418 self.files_per_device[d])
419 total_reads += reads
420 total_writes += writes
421 summaries.append((DEVICE_TO_PARTITION[d], reads, writes, num_files))
422 print "*Summary*"
423 print "Total blocks read", total_reads
424 print "Total blocks wrote", total_writes
425 print "Partition total_reads total_writes num_files"
426 for s in summaries:
427 print s[0], s[1], s[2], s[3]
428
429def main(argv):
430 if (len(argv) < 2):
Wei Wang1c8fd562018-03-29 14:58:12 -0700431 print "check_file_read.py filename"
Wei Wangdd6a06c2018-03-29 14:54:18 -0700432 return
433 filename = argv[1]
434 trace = Trace()
435 with open(filename) as f:
436 for l in f:
437 trace.handle_line(l)
438 trace.dump()
439
440if __name__ == '__main__':
441 main(sys.argv)