blob: 045cb1dd71ce3afd5867463978ab0f35f8b8ef35 [file] [log] [blame]
Logan Chien0e53d882018-11-06 17:32:40 +08001#!/usr/bin/env python
2#
3# Copyright (C) 2019 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""ELF file checker.
18
19This command ensures all undefined symbols in an ELF file can be resolved to
20global (or weak) symbols defined in shared objects specified in DT_NEEDED
21entries.
22"""
23
24from __future__ import print_function
25
26import argparse
27import collections
28import os
29import os.path
30import re
31import struct
32import subprocess
33import sys
34
35
36_ELF_MAGIC = b'\x7fELF'
37
38
39# Known machines
40_EM_386 = 3
41_EM_ARM = 40
42_EM_X86_64 = 62
43_EM_AARCH64 = 183
44
45_KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64}
46
47
48# ELF header struct
49_ELF_HEADER_STRUCT = (
50 ('ei_magic', '4s'),
51 ('ei_class', 'B'),
52 ('ei_data', 'B'),
53 ('ei_version', 'B'),
54 ('ei_osabi', 'B'),
55 ('ei_pad', '8s'),
56 ('e_type', 'H'),
57 ('e_machine', 'H'),
58 ('e_version', 'I'),
59)
60
61_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT)
62
63
64ELFHeader = collections.namedtuple(
65 'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT])
66
67
68ELF = collections.namedtuple(
69 'ELF',
70 ('dt_soname', 'dt_needed', 'imported', 'exported', 'header'))
71
72
73def _get_os_name():
74 """Get the host OS name."""
75 if sys.platform == 'linux2':
76 return 'linux'
77 if sys.platform == 'darwin':
78 return 'darwin'
79 raise ValueError(sys.platform + ' is not supported')
80
81
82def _get_build_top():
83 """Find the build top of the source tree ($ANDROID_BUILD_TOP)."""
84 prev_path = None
85 curr_path = os.path.abspath(os.getcwd())
86 while prev_path != curr_path:
87 if os.path.exists(os.path.join(curr_path, '.repo')):
88 return curr_path
89 prev_path = curr_path
90 curr_path = os.path.dirname(curr_path)
91 return None
92
93
94def _select_latest_llvm_version(versions):
95 """Select the latest LLVM prebuilts version from a set of versions."""
96 pattern = re.compile('clang-r([0-9]+)([a-z]?)')
97 found_rev = 0
98 found_ver = None
99 for curr_ver in versions:
100 match = pattern.match(curr_ver)
101 if not match:
102 continue
103 curr_rev = int(match.group(1))
104 if not found_ver or curr_rev > found_rev or (
105 curr_rev == found_rev and curr_ver > found_ver):
106 found_rev = curr_rev
107 found_ver = curr_ver
108 return found_ver
109
110
111def _get_latest_llvm_version(llvm_dir):
112 """Find the latest LLVM prebuilts version from `llvm_dir`."""
113 return _select_latest_llvm_version(os.listdir(llvm_dir))
114
115
116def _get_llvm_dir():
117 """Find the path to LLVM prebuilts."""
118 build_top = _get_build_top()
119
120 llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE')
121 if not llvm_prebuilts_base:
122 llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host')
123
124 llvm_dir = os.path.join(
125 build_top, llvm_prebuilts_base, _get_os_name() + '-x86')
126
127 if not os.path.exists(llvm_dir):
128 return None
129
130 llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION')
131 if not llvm_prebuilts_version:
132 llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir)
133
134 llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version)
135
136 if not os.path.exists(llvm_dir):
137 return None
138
139 return llvm_dir
140
141
142def _get_llvm_readobj():
143 """Find the path to llvm-readobj executable."""
144 llvm_dir = _get_llvm_dir()
145 llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj')
146 return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj'
147
148
149class ELFError(ValueError):
150 """Generic ELF parse error"""
151 pass
152
153
154class ELFInvalidMagicError(ELFError):
155 """Invalid ELF magic word error"""
156 def __init__(self):
157 super(ELFInvalidMagicError, self).__init__('bad ELF magic')
158
159
160class ELFParser(object):
161 """ELF file parser"""
162
163 @classmethod
164 def _read_elf_header(cls, elf_file_path):
165 """Read the ELF magic word from the beginning of the file."""
166 with open(elf_file_path, 'rb') as elf_file:
167 buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT))
168 try:
169 return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf))
170 except struct.error:
171 return None
172
173
174 @classmethod
175 def open(cls, elf_file_path, llvm_readobj):
176 """Open and parse the ELF file."""
Yo Chiang1237c1f2020-07-29 01:20:01 +0800177 # Parse the ELF header to check the magic word.
Logan Chien0e53d882018-11-06 17:32:40 +0800178 header = cls._read_elf_header(elf_file_path)
179 if not header or header.ei_magic != _ELF_MAGIC:
180 raise ELFInvalidMagicError()
181
182 # Run llvm-readobj and parse the output.
183 return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj)
184
185
186 @classmethod
187 def _find_prefix(cls, pattern, lines_it):
188 """Iterate `lines_it` until finding a string that starts with `pattern`."""
189 for line in lines_it:
190 if line.startswith(pattern):
191 return True
192 return False
193
194
195 @classmethod
196 def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj):
197 """Run llvm-readobj and parse the output."""
Yi Kong8b50dea2021-09-10 20:56:11 +0800198 cmd = [llvm_readobj, '--dynamic-table', '--dyn-symbols', elf_file_path]
199 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Logan Chien0e53d882018-11-06 17:32:40 +0800200 out, _ = proc.communicate()
Yi Kong8b50dea2021-09-10 20:56:11 +0800201 rc = proc.returncode
202 if rc != 0:
203 raise subprocess.CalledProcessError(rc, cmd, out)
Logan Chien0e53d882018-11-06 17:32:40 +0800204 lines = out.splitlines()
205 return cls._parse_llvm_readobj(elf_file_path, header, lines)
206
207
208 @classmethod
209 def _parse_llvm_readobj(cls, elf_file_path, header, lines):
210 """Parse the output of llvm-readobj."""
211 lines_it = iter(lines)
Logan Chien0e53d882018-11-06 17:32:40 +0800212 dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it)
Stephen Hines7f5d3262020-09-22 20:54:21 -0700213 imported, exported = cls._parse_dynamic_symbols(lines_it)
Logan Chien0e53d882018-11-06 17:32:40 +0800214 return ELF(dt_soname, dt_needed, imported, exported, header)
215
216
217 _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection ['
218
219 _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile(
220 '^ 0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$')
221
222 _DYNAMIC_SECTION_SONAME_PATTERN = re.compile(
223 '^ 0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$')
224
225 _DYNAMIC_SECTION_END_PATTERN = ']'
226
227
228 @classmethod
229 def _parse_dynamic_table(cls, elf_file_path, lines_it):
230 """Parse the dynamic table section."""
231 dt_soname = os.path.basename(elf_file_path)
232 dt_needed = []
233
234 dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it)
235 if not dynamic:
236 return (dt_soname, dt_needed)
237
238 for line in lines_it:
239 if line == cls._DYNAMIC_SECTION_END_PATTERN:
240 break
241
242 match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line)
243 if match:
244 dt_needed.append(match.group(1))
245 continue
246
247 match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line)
248 if match:
249 dt_soname = match.group(1)
250 continue
251
252 return (dt_soname, dt_needed)
253
254
255 _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols ['
256 _DYNAMIC_SYMBOLS_END_PATTERN = ']'
257
258 _SYMBOL_ENTRY_START_PATTERN = ' Symbol {'
259 _SYMBOL_ENTRY_PATTERN = re.compile('^ ([A-Za-z0-9_]+): (.*)$')
260 _SYMBOL_ENTRY_PAREN_PATTERN = re.compile(
261 '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$')
262 _SYMBOL_ENTRY_END_PATTERN = ' }'
263
264
Logan Chien99cdf5382019-03-20 15:10:03 +0800265 @staticmethod
266 def _parse_symbol_name(name_with_version):
Logan Chien0e53d882018-11-06 17:32:40 +0800267 """Split `name_with_version` into name and version. This function may split
268 at last occurrence of `@@` or `@`."""
Logan Chien99cdf5382019-03-20 15:10:03 +0800269 pos = name_with_version.rfind('@')
270 if pos == -1:
271 name = name_with_version
272 version = ''
273 else:
274 if pos > 0 and name_with_version[pos - 1] == '@':
275 name = name_with_version[0:pos - 1]
276 else:
277 name = name_with_version[0:pos]
278 version = name_with_version[pos + 1:]
Logan Chien0e53d882018-11-06 17:32:40 +0800279 return (name, version)
280
281
282 @classmethod
283 def _parse_dynamic_symbols(cls, lines_it):
284 """Parse dynamic symbol table and collect imported and exported symbols."""
285 imported = collections.defaultdict(set)
286 exported = collections.defaultdict(set)
287
288 for symbol in cls._parse_dynamic_symbols_internal(lines_it):
289 name, version = cls._parse_symbol_name(symbol['Name'])
290 if name:
291 if symbol['Section'] == 'Undefined':
292 if symbol['Binding'] != 'Weak':
293 imported[name].add(version)
294 else:
295 if symbol['Binding'] != 'Local':
296 exported[name].add(version)
297
298 # Freeze the returned imported/exported dict.
299 return (dict(imported), dict(exported))
300
301
302 @classmethod
303 def _parse_dynamic_symbols_internal(cls, lines_it):
304 """Parse symbols entries and yield each symbols."""
305
306 if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it):
307 return
308
309 for line in lines_it:
310 if line == cls._DYNAMIC_SYMBOLS_END_PATTERN:
311 return
312
313 if line == cls._SYMBOL_ENTRY_START_PATTERN:
314 symbol = {}
315 continue
316
317 if line == cls._SYMBOL_ENTRY_END_PATTERN:
318 yield symbol
319 symbol = None
320 continue
321
322 match = cls._SYMBOL_ENTRY_PATTERN.match(line)
323 if match:
324 key = match.group(1)
325 value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2))
326 symbol[key] = value
327 continue
328
329
330class Checker(object):
331 """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols."""
332
333 def __init__(self, llvm_readobj):
334 self._file_path = ''
335 self._file_under_test = None
336 self._shared_libs = []
337
338 self._llvm_readobj = llvm_readobj
339
340
341 if sys.stderr.isatty():
342 _ERROR_TAG = '\033[0;1;31merror:\033[m' # Red error
343 _NOTE_TAG = '\033[0;1;30mnote:\033[m' # Black note
344 else:
345 _ERROR_TAG = 'error:' # Red error
346 _NOTE_TAG = 'note:' # Black note
347
348
349 def _error(self, *args):
350 """Emit an error to stderr."""
351 print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr)
352
353
354 def _note(self, *args):
355 """Emit a note to stderr."""
356 print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr)
357
358
359 def _load_elf_file(self, path, skip_bad_elf_magic):
360 """Load an ELF file from the `path`."""
361 try:
362 return ELFParser.open(path, self._llvm_readobj)
363 except (IOError, OSError):
364 self._error('Failed to open "{}".'.format(path))
365 sys.exit(2)
366 except ELFInvalidMagicError:
367 if skip_bad_elf_magic:
368 sys.exit(0)
369 else:
370 self._error('File "{}" must have a valid ELF magic word.'.format(path))
371 sys.exit(2)
372 except:
373 self._error('An unknown error occurred while opening "{}".'.format(path))
374 raise
375
376
377 def load_file_under_test(self, path, skip_bad_elf_magic,
378 skip_unknown_elf_machine):
379 """Load file-under-test (either an executable or a shared lib)."""
380 self._file_path = path
381 self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic)
382
383 if skip_unknown_elf_machine and \
384 self._file_under_test.header.e_machine not in _KNOWN_MACHINES:
385 sys.exit(0)
386
387
388 def load_shared_libs(self, shared_lib_paths):
389 """Load shared libraries."""
390 for path in shared_lib_paths:
391 self._shared_libs.append(self._load_elf_file(path, False))
392
393
394 def check_dt_soname(self, soname):
395 """Check whether DT_SONAME matches installation file name."""
396 if self._file_under_test.dt_soname != soname:
397 self._error('DT_SONAME "{}" must be equal to the file name "{}".'
398 .format(self._file_under_test.dt_soname, soname))
399 sys.exit(2)
400
401
Logan Chien751a9872019-12-16 15:55:16 -0800402 def check_dt_needed(self, system_shared_lib_names):
Logan Chien0e53d882018-11-06 17:32:40 +0800403 """Check whether all DT_NEEDED entries are specified in the build
404 system."""
405
406 missing_shared_libs = False
407
408 # Collect the DT_SONAMEs from shared libs specified in the build system.
409 specified_sonames = {lib.dt_soname for lib in self._shared_libs}
410
411 # Chech whether all DT_NEEDED entries are specified.
412 for lib in self._file_under_test.dt_needed:
413 if lib not in specified_sonames:
414 self._error('DT_NEEDED "{}" is not specified in shared_libs.'
415 .format(lib.decode('utf-8')))
416 missing_shared_libs = True
417
418 if missing_shared_libs:
419 dt_needed = sorted(set(self._file_under_test.dt_needed))
420 modules = [re.sub('\\.so$', '', lib) for lib in dt_needed]
421
Logan Chien751a9872019-12-16 15:55:16 -0800422 # Remove system shared libraries from the suggestion since they are added
423 # by default.
424 modules = [name for name in modules
425 if name not in system_shared_lib_names]
426
Logan Chien0e53d882018-11-06 17:32:40 +0800427 self._note()
428 self._note('Fix suggestions:')
429 self._note(
430 ' Android.bp: shared_libs: [' +
431 ', '.join('"' + module + '"' for module in modules) + '],')
432 self._note(
433 ' Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules))
434
435 self._note()
436 self._note('If the fix above doesn\'t work, bypass this check with:')
437 self._note(' Android.bp: check_elf_files: false,')
438 self._note(' Android.mk: LOCAL_CHECK_ELF_FILES := false')
439
440 sys.exit(2)
441
442
443 @staticmethod
444 def _find_symbol(lib, name, version):
445 """Check whether the symbol name and version matches a definition in
446 lib."""
447 try:
448 lib_sym_vers = lib.exported[name]
449 except KeyError:
450 return False
451 if version == '': # Symbol version is not requested
452 return True
453 return version in lib_sym_vers
454
455
456 @classmethod
457 def _find_symbol_from_libs(cls, libs, name, version):
458 """Check whether the symbol name and version is defined in one of the
459 shared libraries in libs."""
460 for lib in libs:
461 if cls._find_symbol(lib, name, version):
462 return lib
463 return None
464
465
466 def check_symbols(self):
467 """Check whether all undefined symbols are resolved to a definition."""
468 all_elf_files = [self._file_under_test] + self._shared_libs
469 missing_symbols = []
470 for sym, imported_vers in self._file_under_test.imported.iteritems():
471 for imported_ver in imported_vers:
472 lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver)
473 if not lib:
474 missing_symbols.append((sym, imported_ver))
475
476 if missing_symbols:
477 for sym, ver in sorted(missing_symbols):
478 sym = sym.decode('utf-8')
479 if ver:
480 sym += '@' + ver.decode('utf-8')
481 self._error('Unresolved symbol: {}'.format(sym))
482
483 self._note()
484 self._note('Some dependencies might be changed, thus the symbol(s) '
485 'above cannot be resolved.')
486 self._note('Please re-build the prebuilt file: "{}".'
487 .format(self._file_path))
488
489 self._note()
490 self._note('If this is a new prebuilt file and it is designed to have '
491 'unresolved symbols, add one of the following properties:')
492 self._note(' Android.bp: allow_undefined_symbols: true,')
493 self._note(' Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true')
494
495 sys.exit(2)
496
497
498def _parse_args():
499 """Parse command line options."""
500 parser = argparse.ArgumentParser()
501
502 # Input file
503 parser.add_argument('file',
504 help='Path to the input file to be checked')
505 parser.add_argument('--soname',
506 help='Shared object name of the input file')
507
508 # Shared library dependencies
509 parser.add_argument('--shared-lib', action='append', default=[],
510 help='Path to shared library dependencies')
511
Logan Chien751a9872019-12-16 15:55:16 -0800512 # System Shared library names
513 parser.add_argument('--system-shared-lib', action='append', default=[],
514 help='System shared libraries to be hidden from fix '
515 'suggestions')
516
Logan Chien0e53d882018-11-06 17:32:40 +0800517 # Check options
518 parser.add_argument('--skip-bad-elf-magic', action='store_true',
519 help='Ignore the input file without the ELF magic word')
520 parser.add_argument('--skip-unknown-elf-machine', action='store_true',
521 help='Ignore the input file with unknown machine ID')
522 parser.add_argument('--allow-undefined-symbols', action='store_true',
523 help='Ignore unresolved undefined symbols')
524
525 # Other options
526 parser.add_argument('--llvm-readobj',
527 help='Path to the llvm-readobj executable')
528
529 return parser.parse_args()
530
531
532def main():
533 """Main function"""
534 args = _parse_args()
535
536 llvm_readobj = args.llvm_readobj
537 if not llvm_readobj:
538 llvm_readobj = _get_llvm_readobj()
539
540 # Load ELF files
541 checker = Checker(llvm_readobj)
542 checker.load_file_under_test(
543 args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine)
544 checker.load_shared_libs(args.shared_lib)
545
546 # Run checks
547 if args.soname:
548 checker.check_dt_soname(args.soname)
549
Logan Chien751a9872019-12-16 15:55:16 -0800550 checker.check_dt_needed(args.system_shared_lib)
Logan Chien0e53d882018-11-06 17:32:40 +0800551
552 if not args.allow_undefined_symbols:
553 checker.check_symbols()
554
555
556if __name__ == '__main__':
557 main()