Wei Wang | dd6a06c | 2018-03-29 14:54:18 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # Copyright (C) 2016 The Android Open Source Project |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | # |
| 17 | """Analyze ext4 trace with custom open trace""" |
| 18 | import collections |
| 19 | import math |
| 20 | import os |
| 21 | import re |
| 22 | import string |
| 23 | import sys |
| 24 | |
| 25 | DBG = False |
| 26 | DBG_ISSUE = False |
| 27 | |
| 28 | # hard coded maps to detect partition for given device or the other way around |
| 29 | # this can be different per each device. This works for marlin. |
| 30 | DEVICE_TO_PARTITION = { "253,0": "/system/", "253,1": "/vendor/", "259,19": "/data/" } |
| 31 | PARTITION_TO_DEVICE = {} |
| 32 | for key, value in DEVICE_TO_PARTITION.iteritems(): |
| 33 | PARTITION_TO_DEVICE[value] = key |
| 34 | |
| 35 | # init-1 [003] .... 2.703964: do_sys_open: init: open("/sys/fs/selinux/null", 131074, 0) fd = 0, inode = 22 |
| 36 | RE_DO_SYS_OPEN = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+do_sys_open:\s+(\S+):\sopen..(\S+).,\s([0-9]+).\s+.+inode\s=\s([0-9]+)""" |
| 37 | # init-1 [003] ...1 2.703991: ext4_ext_map_blocks_enter: dev 253,0 ino 2719 lblk 154 len 30 flags |
| 38 | RE_EXT4_MA_BLOCKS_ENTER = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_enter:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\s+lblk\s+([0-9]+)\s+len\s+([0-9]+)""" |
| 39 | # init-1 [002] ...1 2.687205: ext4_ext_map_blocks_exit: dev 253,0 ino 8 flags lblk 0 pblk 196608 len 1 mflags M ret 1 |
| 40 | RE_EXT4_MA_BLOCKS_EXIT = r"""\s+(\S+)-([0-9]+).+\s+([0-9]+\.[0-9]+):\s+ext4_ext_map_blocks_exit:\s+dev\s+(\S+)\s+ino\s+([0-9]+)\sflags.*\slblk\s+([0-9]+)\spblk\s([0-9]+)\slen\s+([0-9]+).*mflags\s(\S*)\sret\s([0-9]+)""" |
| 41 | # init-1 [002] ...1 2.887119: block_bio_remap: 8,0 R 10010384 + 8 <- (259,18) 3998944 |
| 42 | RE_BLOCK_BIO_REMAP = r""".+block_bio_remap:\s\d+,\d+\s\S+\s(\d+)\s\+\s\d+\s<-\s\([^\)]+\)\s(\d+)""" |
| 43 | # kworker/u9:1-83 [003] d..2 2.682991: block_rq_issue: 8,0 RA 0 () 10140208 + 32 [kworker/u9:1] |
| 44 | RE_BLOCK_RQ_ISSUE = r"""\s+\S+-([0-9]+).*\s+([0-9]+\.[0-9]+):\s+block_rq_issue:\s+([0-9]+)\,([0-9]+)\s+([RW]\S*)\s[0-9]+\s\([^\)]*\)\s([0-9]+)\s+\+\s+([0-9]+)\s+\[([^\]]+)\]""" |
| 45 | |
| 46 | EXT4_SIZE_TO_BLOCK_SIZE = 8 # ext4: 4KB, block device block size: 512B |
| 47 | |
| 48 | class FileAccess: |
| 49 | def __init__(self, file): |
| 50 | self.file = file |
| 51 | self.accesses = [] |
| 52 | self.total_access = 0 |
| 53 | self.ext4_access_size_histogram = {} #key: read size, value: occurrence |
| 54 | self.block_access_size_histogram = {} |
| 55 | self.ext4_single_block_accesses = {} # process name, occurrence |
| 56 | self.block_single_block_accesses = {} # process name, occurrence |
| 57 | self.blocks_histogram = {} # K: offset, V: read counter |
| 58 | |
| 59 | def add_if_single_block(self, container, size, offset, process_name): |
| 60 | if size != 1: |
| 61 | return |
| 62 | offsets = container.get(process_name) |
| 63 | if not offsets: |
| 64 | offsets = [] |
| 65 | container[process_name] = offsets |
| 66 | offsets.append(offset) |
| 67 | |
| 68 | def add_access(self, time, offset, size, process_name, read_sizes): |
| 69 | self.accesses.append((time, offset, size, process_name)) |
| 70 | self.total_access += size |
| 71 | self.ext4_access_size_histogram[size] = self.ext4_access_size_histogram.get(size, 0) + 1 |
| 72 | read_offset = offset |
| 73 | for s in read_sizes: |
| 74 | self.block_access_size_histogram[s] = self.block_access_size_histogram.get(s, 0) + 1 |
| 75 | self.add_if_single_block(self.block_single_block_accesses, s, read_offset, process_name) |
| 76 | read_offset += s |
| 77 | for i in range(size): |
| 78 | self.blocks_histogram[offset + i] = self.blocks_histogram.get(offset + i, 0) + 1 |
| 79 | self.add_if_single_block(self.ext4_single_block_accesses, size, offset, process_name) |
| 80 | |
| 81 | def add_merged_access(self, time, offsets, lens, process_names): |
| 82 | total_read_offsets = set() # each read can overwrap. So count only once for block level counting |
| 83 | for i in range(len(offsets)): |
| 84 | self.accesses.append((time, offsets[i], lens[i], process_names[i])) |
| 85 | self.ext4_access_size_histogram[lens[i]] = self.ext4_access_size_histogram.get(lens[i], 0) + 1 |
| 86 | self.add_if_single_block(self.ext4_single_block_accesses, lens[i], offsets[i], process_names[i]) |
| 87 | for j in range(len(lens)): |
| 88 | total_read_offsets.add(offsets[i] + j) |
| 89 | total_lens = len(total_read_offsets) |
| 90 | start_offset = min(total_read_offsets) |
| 91 | self.total_access += total_lens |
| 92 | self.block_access_size_histogram[total_lens] = self.block_access_size_histogram.get(total_lens, 0) \ |
| 93 | + 1 |
| 94 | self.add_if_single_block(self.block_single_block_accesses, total_lens, start_offset, \ |
| 95 | process_names[0]) |
| 96 | for s in range(total_lens): |
| 97 | self.blocks_histogram[start_offset + s] = self.blocks_histogram.get(start_offset + s, 0) + 1 |
| 98 | |
| 99 | |
| 100 | def dump(self): |
| 101 | if len(self.ext4_access_size_histogram) > 1: |
| 102 | print " Ext4 access size histograms:", collections.OrderedDict( \ |
| 103 | sorted(self.ext4_access_size_histogram.items(), key = lambda item: item[0])) |
| 104 | if len(self.ext4_single_block_accesses) > 0 and self.total_access > 1: |
| 105 | print " Ext4 single block accesses:", collections.OrderedDict( \ |
| 106 | sorted(self.ext4_single_block_accesses.items(), key = lambda item: item[1], reverse = True)) |
| 107 | if len(self.block_access_size_histogram) > 1: |
| 108 | print " Block access size histograms:", collections.OrderedDict( \ |
| 109 | sorted(self.block_access_size_histogram.items(), key = lambda item: item[0])) |
| 110 | if len(self.block_single_block_accesses) > 0 and self.total_access > 1: |
| 111 | print " Block single block accesses:", collections.OrderedDict( \ |
| 112 | sorted(self.block_single_block_accesses.items(), key = lambda item: item[1], reverse = True)) |
| 113 | if self.total_access > 1: |
| 114 | sorted_blocks_histogram = sorted(self.blocks_histogram.items(), key = lambda item: item[1], \ |
| 115 | reverse = True) |
| 116 | prints = [] |
| 117 | repeating_reads_counter = 0 |
| 118 | for entry in sorted_blocks_histogram: |
| 119 | offset = entry[0] |
| 120 | counter = entry[1] |
| 121 | if counter == 1: |
| 122 | break |
| 123 | prints.append(str(offset) + ":" + str(counter)) |
| 124 | repeating_reads_counter += (counter - 1) |
| 125 | if len(prints) > 0: |
| 126 | print " repeating accesses", repeating_reads_counter, " offset:count ->", ','.join(prints) |
| 127 | |
| 128 | class FileEvent: |
| 129 | def __init__(self, open_time, file_name, process_name, inode, flags): |
| 130 | self.file_name = file_name |
| 131 | self.inode = inode |
| 132 | self.total_open = 1 |
| 133 | self.processes = [] |
| 134 | self.processes.append((open_time, process_name, flags)) |
| 135 | self.read = FileAccess(self) |
| 136 | self.write = FileAccess(self) |
| 137 | |
| 138 | |
| 139 | def add_open(self, open_time, process_name, flags): |
| 140 | self.processes.append((open_time, process_name, flags)) |
| 141 | self.total_open += 1 |
| 142 | |
| 143 | def add_access(self, is_read, time, offset, size, process_name, read_sizes): |
| 144 | if is_read: |
| 145 | self.read.add_access(time, offset, size, process_name, read_sizes) |
| 146 | else: |
| 147 | self.write.add_access(time, offset, size, process_name, read_sizes) |
| 148 | |
| 149 | def add_merged_access(self, is_read, time, offsets, lens, process_names): |
| 150 | if is_read: |
| 151 | self.read.add_merged_access(time, offsets, lens, process_names) |
| 152 | else: |
| 153 | self.write.add_merged_access(time, offsets, lens, process_names) |
| 154 | |
| 155 | def dump(self, name_to_pid_map): |
| 156 | print " ***filename %s, total reads %d, total writes %d, total open %d inode %s" \ |
| 157 | % (self.file_name, self.read.total_access, self.write.total_access, self.total_open,\ |
| 158 | self.inode) |
| 159 | process_names = [] |
| 160 | for opener in self.processes: |
| 161 | process_names.append(opener[1] + "-" + name_to_pid_map.get(opener[1], '?') + " t:" + \ |
| 162 | str(opener[0]) + " flags:" + opener[2]) |
| 163 | print " Processes opened this file:", ','.join(process_names) |
| 164 | if self.read.total_access > 0: |
| 165 | print " ****Reads:" |
| 166 | self.read.dump() |
| 167 | if self.write.total_access > 0: |
| 168 | print " ****Writes:" |
| 169 | self.write.dump() |
| 170 | |
| 171 | def dump_short(self): |
| 172 | print " filename %s, total reads %d, total writes %d" % (self.file_name, |
| 173 | self.read.total_access, self.write.total_access) |
| 174 | |
| 175 | class PendingAccess: |
| 176 | def __init__(self, process_name, pid, time, dev, inode, lblk, pblk, len, fevent): |
| 177 | self.process_name = process_name |
| 178 | self.pid = pid |
| 179 | self.time = time |
| 180 | self.dev = dev |
| 181 | self.inode = inode |
| 182 | self.lblk = lblk |
| 183 | self.pblk = pblk |
| 184 | self.blk_len = len * EXT4_SIZE_TO_BLOCK_SIZE |
| 185 | self.len = len |
| 186 | self.fevent = fevent |
| 187 | self.pending_accesses = set() |
| 188 | for i in range(len): |
| 189 | self.pending_accesses.add(i) |
| 190 | self.access_sizes = [] # valid read for this file in block dev level. |
| 191 | self.block_access_counter = 0 |
| 192 | |
| 193 | def get_valid_access(self, block_offset, block_len): |
| 194 | ext4_offset = block_offset / EXT4_SIZE_TO_BLOCK_SIZE |
| 195 | if ext4_offset > self.len: |
| 196 | return 0, 0 |
| 197 | ext4_len = block_len / EXT4_SIZE_TO_BLOCK_SIZE |
| 198 | if (ext4_offset + ext4_len) > self.len: |
| 199 | ext4_len = self.len - ext4_offset |
| 200 | return ext4_offset, ext4_len |
| 201 | |
| 202 | def queue_block_access(self, ext4_offset, ext4_len): |
| 203 | if ext4_len <= 0: |
| 204 | return |
| 205 | self.block_access_counter += 1 |
| 206 | ext4_blocks_accessed = 0 |
| 207 | for i in range(ext4_len): |
| 208 | ext4_block_i = i + ext4_offset |
| 209 | if ext4_block_i in self.pending_accesses: |
| 210 | ext4_blocks_accessed += 1 |
| 211 | self.pending_accesses.remove(ext4_block_i) |
| 212 | if ext4_blocks_accessed > 0: |
| 213 | self.access_sizes.append(ext4_blocks_accessed) |
| 214 | |
| 215 | def handle_req_complete(self, time, is_read): |
| 216 | self.fevent.add_access(is_read, self.time, self.lblk, self.len, self.process_name,\ |
| 217 | self.access_sizes) |
| 218 | |
| 219 | def handle_merged_req(self, time, offsets, lens, names, is_read): |
| 220 | self.fevent.add_merged_access(is_read, time, offsets, lens, names) |
| 221 | |
| 222 | def is_req_complete(self): |
| 223 | return len(self.pending_accesses) == 0 |
| 224 | |
| 225 | def is_req_started(self): |
| 226 | return self.len is not len(self.pending_accesses) |
| 227 | |
| 228 | class Trace: |
| 229 | def __init__(self): |
| 230 | self.files_per_device = {} # key: device, value: { key: inode, value; FileEvent } |
| 231 | self.re_open = re.compile(RE_DO_SYS_OPEN) |
| 232 | self.re_ext4_access = re.compile(RE_EXT4_MA_BLOCKS_EXIT) |
| 233 | self.re_bio_remap = re.compile(RE_BLOCK_BIO_REMAP) |
| 234 | self.re_block_issue = re.compile(RE_BLOCK_RQ_ISSUE) |
| 235 | # req from ext4 that has not gone down to block level yet, K:block address, |
| 236 | # V: list of PendingRead |
| 237 | self.pending_accesses = {} |
| 238 | self.remap = {} |
| 239 | self.process_names = {} # K: PID, V : name |
| 240 | |
| 241 | def handle_line(self, line): |
| 242 | match = self.re_open.match(line) |
| 243 | if match: |
| 244 | self.handle_open(match) |
| 245 | return |
| 246 | match = self.re_ext4_access.match(line) |
| 247 | if match: |
| 248 | self.handle_ext4_block_exit(match) |
| 249 | return |
| 250 | match = self.re_bio_remap.match(line) |
| 251 | if match: |
| 252 | self.handle_bio_remap(match) |
| 253 | return |
| 254 | match = self.re_block_issue.match(line) |
| 255 | if match: |
| 256 | self.handle_block_issue(match) |
| 257 | return |
| 258 | |
| 259 | def handle_open(self, match): |
| 260 | pid = int(match.group(1)) |
| 261 | time = match.group(2) |
| 262 | process_name = match.group(3) |
| 263 | file_name = match.group(4) |
| 264 | flag = match.group(5) |
| 265 | inode = int(match.group(6)) |
| 266 | dev_name = None |
| 267 | self.process_names[pid] = process_name |
| 268 | #print "open", pid, process_name, file_name, inode |
| 269 | for p in PARTITION_TO_DEVICE: |
| 270 | if file_name.startswith(p): |
| 271 | dev_name = PARTITION_TO_DEVICE[p] |
| 272 | if not dev_name: |
| 273 | if DBG: |
| 274 | print "Ignore open for file", file_name |
| 275 | return |
| 276 | files = self.files_per_device[dev_name] |
| 277 | fevent = files.get(inode) |
| 278 | if not fevent: |
| 279 | fevent = FileEvent(time, file_name, process_name, inode, flag) |
| 280 | files[inode] = fevent |
| 281 | else: |
| 282 | fevent.add_open(time, process_name, flag) |
| 283 | |
| 284 | def handle_ext4_block_exit(self, match): |
| 285 | process_name = match.group(1) |
| 286 | pid = int(match.group(2)) |
| 287 | time = float(match.group(3)) |
| 288 | dev = match.group(4) |
| 289 | inode = int(match.group(5)) |
| 290 | lblk = int(match.group(6)) |
| 291 | pblk = int(match.group(7)) * EXT4_SIZE_TO_BLOCK_SIZE # address in ext4 blocks, ... |
| 292 | l = int(match.group(8)) |
| 293 | mflags = match.group(9) |
| 294 | ret = int(match.group(10)) |
| 295 | if ret <= 0: # no block access |
| 296 | return |
| 297 | process_name = self.process_names.get(pid, process_name) |
| 298 | if process_name == '<...>': |
| 299 | process_name = "pid-" + str(pid) |
| 300 | if DBG_ISSUE: |
| 301 | print "ext4", pblk, l, inode, process_name |
| 302 | files = self.files_per_device.get(dev) |
| 303 | if not files: |
| 304 | if DEVICE_TO_PARTITION.get(dev): |
| 305 | files = {} |
| 306 | self.files_per_device[dev] = files |
| 307 | else: |
| 308 | if DBG: |
| 309 | print "access ignored for device", dev |
| 310 | return |
| 311 | fevent = files.get(inode) |
| 312 | if not fevent: |
| 313 | if DBG: |
| 314 | print 'no open for device %s with inode %s' % (dev, inode) |
| 315 | fevent = FileEvent(time, "unknown", process_name, inode, "-") |
| 316 | files[inode] = fevent |
| 317 | pending_access = PendingAccess(process_name, pid, time, dev, inode, lblk, pblk, l,\ |
| 318 | fevent) |
| 319 | access_list = self.pending_accesses.get(pblk, []) |
| 320 | access_list.append(pending_access) |
| 321 | self.pending_accesses[pblk] = access_list |
| 322 | |
| 323 | def handle_bio_remap(self, match): |
| 324 | new_addr = int(match.group(1)) |
| 325 | old_addr = int(match.group(2)) |
| 326 | self.remap[new_addr] = old_addr |
| 327 | if DBG_ISSUE: |
| 328 | print "remap", new_addr, old_addr |
| 329 | |
| 330 | def handle_block_issue(self, match): |
| 331 | pid = int(match.group(1)) |
| 332 | time = float(match.group(2)) |
| 333 | dev_major = match.group(3) |
| 334 | dev_minor = match.group(4) |
| 335 | access = match.group(5) |
| 336 | new_address = int(match.group(6)) |
| 337 | l = int(match.group(7)) |
| 338 | name = match.group(8) |
| 339 | name = self.process_names.get(pid, name) |
| 340 | if name == '<...>': |
| 341 | name = "pid-" + str(pid) |
| 342 | is_read = not 'W' in access |
| 343 | accesses_per_inodes = {} # K:inodes, V: list of two entries, 1st: offsets, 2nd: length |
| 344 | addrs_to_remove = [] |
| 345 | completed_reqs = [] |
| 346 | address = self.remap.get(new_address, new_address) |
| 347 | if DBG_ISSUE: |
| 348 | print "issue", address, l, is_read, access |
| 349 | for access_addr, access_list in self.pending_accesses.iteritems(): |
| 350 | if (address >= access_addr) and (address + l) > access_addr: |
| 351 | reqs_to_remove = [] |
| 352 | for pending in access_list: |
| 353 | offset, valid_access_size = pending.get_valid_access(address - access_addr, l) |
| 354 | if valid_access_size > 0: |
| 355 | if pending.is_req_started(): # spread across multiple reads. complete alone |
| 356 | pending.queue_block_access(offset, valid_access_size) |
| 357 | if pending.is_req_complete(): |
| 358 | pending.handle_req_complete(time, is_read) |
| 359 | reqs_to_remove.append(pending) |
| 360 | else: # possible multiple reads completed in this read. complete them together |
| 361 | pending.queue_block_access(offset, valid_access_size) |
| 362 | if pending.is_req_complete(): |
| 363 | reads = accesses_per_inodes.get(pending.inode, [[], [], []]) |
| 364 | reads[0].append(offset + pending.lblk) |
| 365 | reads[1].append(valid_access_size) |
| 366 | reads[2].append(pending.process_name) |
| 367 | accesses_per_inodes[pending.inode] = reads |
| 368 | completed_reqs.append(pending) |
| 369 | reqs_to_remove.append(pending) |
| 370 | for to_remove in reqs_to_remove: |
| 371 | access_list.remove(to_remove) |
| 372 | if len(access_list) == 0: |
| 373 | addrs_to_remove.append(access_addr) |
| 374 | for addr in addrs_to_remove: |
| 375 | del self.pending_accesses[addr] |
| 376 | for pending in completed_reqs: # these will be reported as batch |
| 377 | accesses = accesses_per_inodes.get(pending.inode) |
| 378 | if not accesses: # merged one already dispatched |
| 379 | continue |
| 380 | if len(accesses[0]) == 1: |
| 381 | pending.handle_req_complete(time, is_read) |
| 382 | else: #merged |
| 383 | pending.handle_merged_req(time, accesses[0], accesses[1], accesses[2], is_read) |
| 384 | del accesses_per_inodes[pending.inode] |
| 385 | |
| 386 | def dump_partition(self, partition_name, files): |
| 387 | name_to_pid_map = {} |
| 388 | for pid, name in self.process_names.iteritems(): |
| 389 | name_to_pid_map[name] = str(pid) |
| 390 | print "**Dump partition:", partition_name, "total number of files:", len(files) |
| 391 | total_reads = 0 |
| 392 | total_writes = 0 |
| 393 | files_sorted_by_read = files.values() |
| 394 | files_sorted_by_read.sort(key=lambda f : f.read.total_access, reverse = True) |
| 395 | files_sorted_by_write = files.values() |
| 396 | files_sorted_by_write.sort(key=lambda f : f.write.total_access, reverse = True) |
| 397 | print " Top 10 readers:" |
| 398 | for i in range(min(10, len(files_sorted_by_read))): |
| 399 | files_sorted_by_read[i].dump_short() |
| 400 | print " Top 10 writers:" |
| 401 | for i in range(min(10, len(files_sorted_by_write))): |
| 402 | files_sorted_by_write[i].dump_short() |
| 403 | for f in files_sorted_by_read: |
| 404 | f.dump(name_to_pid_map) |
| 405 | total_reads += f.read.total_access |
| 406 | total_writes += f.write.total_access |
| 407 | print " Total reads:", total_reads, " total writes:", total_writes |
| 408 | return total_reads, total_writes, len(files) |
| 409 | |
| 410 | |
| 411 | def dump(self): |
| 412 | print "*Dump R/W per each partition" |
| 413 | total_reads = 0 |
| 414 | total_writes = 0 |
| 415 | summaries = [] |
| 416 | for d in self.files_per_device: |
| 417 | reads, writes, num_files = self.dump_partition(DEVICE_TO_PARTITION[d], \ |
| 418 | self.files_per_device[d]) |
| 419 | total_reads += reads |
| 420 | total_writes += writes |
| 421 | summaries.append((DEVICE_TO_PARTITION[d], reads, writes, num_files)) |
| 422 | print "*Summary*" |
| 423 | print "Total blocks read", total_reads |
| 424 | print "Total blocks wrote", total_writes |
| 425 | print "Partition total_reads total_writes num_files" |
| 426 | for s in summaries: |
| 427 | print s[0], s[1], s[2], s[3] |
| 428 | |
| 429 | def main(argv): |
| 430 | if (len(argv) < 2): |
Wei Wang | 1c8fd56 | 2018-03-29 14:58:12 -0700 | [diff] [blame] | 431 | print "check_file_read.py filename" |
Wei Wang | dd6a06c | 2018-03-29 14:54:18 -0700 | [diff] [blame] | 432 | return |
| 433 | filename = argv[1] |
| 434 | trace = Trace() |
| 435 | with open(filename) as f: |
| 436 | for l in f: |
| 437 | trace.handle_line(l) |
| 438 | trace.dump() |
| 439 | |
| 440 | if __name__ == '__main__': |
| 441 | main(sys.argv) |