blob: 1207f523f2ee41e7a38687568033e06454552799 [file] [log] [blame]
Inna Palantff3f07a2019-07-11 16:15:26 -07001//===- InputChunks.cpp ----------------------------------------------------===//
2//
Chih-Hung Hsieh08600532019-12-19 15:55:38 -08003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Inna Palantff3f07a2019-07-11 16:15:26 -07006//
7//===----------------------------------------------------------------------===//
8
9#include "InputChunks.h"
10#include "Config.h"
11#include "OutputSegment.h"
12#include "WriterUtils.h"
13#include "lld/Common/ErrorHandler.h"
14#include "lld/Common/LLVM.h"
15#include "llvm/Support/LEB128.h"
Chris Wailesbcf972c2021-10-21 11:03:28 -070016#include "llvm/Support/xxhash.h"
Inna Palantff3f07a2019-07-11 16:15:26 -070017
18#define DEBUG_TYPE "lld"
19
20using namespace llvm;
21using namespace llvm::wasm;
22using namespace llvm::support::endian;
Inna Palantff3f07a2019-07-11 16:15:26 -070023
Jeff Vander Stoep247d86b2020-08-11 14:27:44 +020024namespace lld {
25StringRef relocTypeToString(uint8_t relocType) {
Chih-Hung Hsieh08600532019-12-19 15:55:38 -080026 switch (relocType) {
Inna Palantff3f07a2019-07-11 16:15:26 -070027#define WASM_RELOC(NAME, REL) \
28 case REL: \
29 return #NAME;
30#include "llvm/BinaryFormat/WasmRelocs.def"
31#undef WASM_RELOC
32 }
33 llvm_unreachable("unknown reloc type");
34}
35
Thiébaud Weksteene40e7362020-10-28 15:03:00 +010036bool relocIs64(uint8_t relocType) {
37 switch (relocType) {
38 case R_WASM_MEMORY_ADDR_LEB64:
39 case R_WASM_MEMORY_ADDR_SLEB64:
40 case R_WASM_MEMORY_ADDR_REL_SLEB64:
41 case R_WASM_MEMORY_ADDR_I64:
Chris Wailesbcf972c2021-10-21 11:03:28 -070042 case R_WASM_TABLE_INDEX_SLEB64:
43 case R_WASM_TABLE_INDEX_I64:
44 case R_WASM_FUNCTION_OFFSET_I64:
45 case R_WASM_TABLE_INDEX_REL_SLEB64:
46 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +010047 return true;
48 default:
49 return false;
50 }
51}
52
Jeff Vander Stoep247d86b2020-08-11 14:27:44 +020053std::string toString(const wasm::InputChunk *c) {
Chih-Hung Hsieh08600532019-12-19 15:55:38 -080054 return (toString(c->file) + ":(" + c->getName() + ")").str();
Inna Palantff3f07a2019-07-11 16:15:26 -070055}
56
Jeff Vander Stoep247d86b2020-08-11 14:27:44 +020057namespace wasm {
Inna Palantff3f07a2019-07-11 16:15:26 -070058StringRef InputChunk::getComdatName() const {
Chih-Hung Hsieh08600532019-12-19 15:55:38 -080059 uint32_t index = getComdat();
60 if (index == UINT32_MAX)
Inna Palantff3f07a2019-07-11 16:15:26 -070061 return StringRef();
Chih-Hung Hsieh08600532019-12-19 15:55:38 -080062 return file->getWasmObj()->linkingData().Comdats[index];
Inna Palantff3f07a2019-07-11 16:15:26 -070063}
64
Chris Wailesbcf972c2021-10-21 11:03:28 -070065uint32_t InputChunk::getSize() const {
66 if (const auto *ms = dyn_cast<SyntheticMergedChunk>(this))
67 return ms->builder.getSize();
Inna Palantff3f07a2019-07-11 16:15:26 -070068
Chris Wailesbcf972c2021-10-21 11:03:28 -070069 if (const auto *f = dyn_cast<InputFunction>(this)) {
70 if (config->compressRelocations && f->file) {
71 return f->getCompressedSize();
Chih-Hung Hsieh08600532019-12-19 15:55:38 -080072 }
Inna Palantff3f07a2019-07-11 16:15:26 -070073 }
Chris Wailesbcf972c2021-10-21 11:03:28 -070074
75 return data().size();
76}
77
78uint32_t InputChunk::getInputSize() const {
79 if (const auto *f = dyn_cast<InputFunction>(this))
80 return f->function->Size;
81 return getSize();
Inna Palantff3f07a2019-07-11 16:15:26 -070082}
83
84// Copy this input chunk to an mmap'ed output file and apply relocations.
Chih-Hung Hsieh08600532019-12-19 15:55:38 -080085void InputChunk::writeTo(uint8_t *buf) const {
Chris Wailesbcf972c2021-10-21 11:03:28 -070086 if (const auto *f = dyn_cast<InputFunction>(this)) {
87 if (file && config->compressRelocations)
88 return f->writeCompressed(buf);
89 } else if (const auto *ms = dyn_cast<SyntheticMergedChunk>(this)) {
90 ms->builder.write(buf + outSecOff);
91 // Apply relocations
92 ms->relocate(buf + outSecOff);
93 return;
94 }
95
Inna Palantff3f07a2019-07-11 16:15:26 -070096 // Copy contents
Chris Wailesbcf972c2021-10-21 11:03:28 -070097 memcpy(buf + outSecOff, data().data(), data().size());
Inna Palantff3f07a2019-07-11 16:15:26 -070098
99 // Apply relocations
Chris Wailesbcf972c2021-10-21 11:03:28 -0700100 relocate(buf + outSecOff);
101}
102
103void InputChunk::relocate(uint8_t *buf) const {
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800104 if (relocations.empty())
Inna Palantff3f07a2019-07-11 16:15:26 -0700105 return;
106
Jeff Vander Stoep247d86b2020-08-11 14:27:44 +0200107 LLVM_DEBUG(dbgs() << "applying relocations: " << toString(this)
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800108 << " count=" << relocations.size() << "\n");
Chris Wailesbcf972c2021-10-21 11:03:28 -0700109 int32_t inputSectionOffset = getInputSectionOffset();
110 uint64_t tombstone = getTombstone();
Inna Palantff3f07a2019-07-11 16:15:26 -0700111
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800112 for (const WasmRelocation &rel : relocations) {
Chris Wailesbcf972c2021-10-21 11:03:28 -0700113 uint8_t *loc = buf + rel.Offset - inputSectionOffset;
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800114 LLVM_DEBUG(dbgs() << "apply reloc: type=" << relocTypeToString(rel.Type));
115 if (rel.Type != R_WASM_TYPE_INDEX_LEB)
116 LLVM_DEBUG(dbgs() << " sym=" << file->getSymbols()[rel.Index]->getName());
117 LLVM_DEBUG(dbgs() << " addend=" << rel.Addend << " index=" << rel.Index
Chris Wailesbcf972c2021-10-21 11:03:28 -0700118 << " offset=" << rel.Offset << "\n");
119 auto value = file->calcNewValue(rel, tombstone, this);
Inna Palantff3f07a2019-07-11 16:15:26 -0700120
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800121 switch (rel.Type) {
122 case R_WASM_TYPE_INDEX_LEB:
123 case R_WASM_FUNCTION_INDEX_LEB:
124 case R_WASM_GLOBAL_INDEX_LEB:
Chris Wailesbcf972c2021-10-21 11:03:28 -0700125 case R_WASM_TAG_INDEX_LEB:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800126 case R_WASM_MEMORY_ADDR_LEB:
Chris Wailese3116c42021-07-13 14:40:48 -0700127 case R_WASM_TABLE_NUMBER_LEB:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800128 encodeULEB128(value, loc, 5);
Inna Palantff3f07a2019-07-11 16:15:26 -0700129 break;
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100130 case R_WASM_MEMORY_ADDR_LEB64:
131 encodeULEB128(value, loc, 10);
132 break;
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800133 case R_WASM_TABLE_INDEX_SLEB:
134 case R_WASM_TABLE_INDEX_REL_SLEB:
135 case R_WASM_MEMORY_ADDR_SLEB:
136 case R_WASM_MEMORY_ADDR_REL_SLEB:
Chris Wailese3116c42021-07-13 14:40:48 -0700137 case R_WASM_MEMORY_ADDR_TLS_SLEB:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800138 encodeSLEB128(static_cast<int32_t>(value), loc, 5);
Inna Palantff3f07a2019-07-11 16:15:26 -0700139 break;
Chris Wailese3116c42021-07-13 14:40:48 -0700140 case R_WASM_TABLE_INDEX_SLEB64:
Chris Wailesbcf972c2021-10-21 11:03:28 -0700141 case R_WASM_TABLE_INDEX_REL_SLEB64:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100142 case R_WASM_MEMORY_ADDR_SLEB64:
143 case R_WASM_MEMORY_ADDR_REL_SLEB64:
Chris Wailesbcf972c2021-10-21 11:03:28 -0700144 case R_WASM_MEMORY_ADDR_TLS_SLEB64:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100145 encodeSLEB128(static_cast<int64_t>(value), loc, 10);
146 break;
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800147 case R_WASM_TABLE_INDEX_I32:
148 case R_WASM_MEMORY_ADDR_I32:
149 case R_WASM_FUNCTION_OFFSET_I32:
150 case R_WASM_SECTION_OFFSET_I32:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100151 case R_WASM_GLOBAL_INDEX_I32:
Chris Wailesbcf972c2021-10-21 11:03:28 -0700152 case R_WASM_MEMORY_ADDR_LOCREL_I32:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800153 write32le(loc, value);
Inna Palantff3f07a2019-07-11 16:15:26 -0700154 break;
Chris Wailese3116c42021-07-13 14:40:48 -0700155 case R_WASM_TABLE_INDEX_I64:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100156 case R_WASM_MEMORY_ADDR_I64:
Chris Wailese3116c42021-07-13 14:40:48 -0700157 case R_WASM_FUNCTION_OFFSET_I64:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100158 write64le(loc, value);
159 break;
Inna Palantff3f07a2019-07-11 16:15:26 -0700160 default:
161 llvm_unreachable("unknown relocation type");
162 }
163 }
164}
165
166// Copy relocation entries to a given output stream.
167// This function is used only when a user passes "-r". For a regular link,
168// we consume relocations instead of copying them to an output file.
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800169void InputChunk::writeRelocations(raw_ostream &os) const {
170 if (relocations.empty())
Inna Palantff3f07a2019-07-11 16:15:26 -0700171 return;
172
Chris Wailesbcf972c2021-10-21 11:03:28 -0700173 int32_t off = outSecOff - getInputSectionOffset();
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800174 LLVM_DEBUG(dbgs() << "writeRelocations: " << file->getName()
175 << " offset=" << Twine(off) << "\n");
Inna Palantff3f07a2019-07-11 16:15:26 -0700176
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800177 for (const WasmRelocation &rel : relocations) {
178 writeUleb128(os, rel.Type, "reloc type");
179 writeUleb128(os, rel.Offset + off, "reloc offset");
180 writeUleb128(os, file->calcNewIndex(rel), "reloc index");
Inna Palantff3f07a2019-07-11 16:15:26 -0700181
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800182 if (relocTypeHasAddend(rel.Type))
183 writeSleb128(os, file->calcNewAddend(rel), "reloc addend");
Inna Palantff3f07a2019-07-11 16:15:26 -0700184 }
185}
186
Chris Wailesbcf972c2021-10-21 11:03:28 -0700187uint64_t InputChunk::getTombstone() const {
188 if (const auto *s = dyn_cast<InputSection>(this)) {
189 return s->tombstoneValue;
190 }
191
192 return 0;
193}
194
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800195void InputFunction::setFunctionIndex(uint32_t index) {
Inna Palantff3f07a2019-07-11 16:15:26 -0700196 LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << getName()
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800197 << " -> " << index << "\n");
Inna Palantff3f07a2019-07-11 16:15:26 -0700198 assert(!hasFunctionIndex());
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800199 functionIndex = index;
Inna Palantff3f07a2019-07-11 16:15:26 -0700200}
201
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800202void InputFunction::setTableIndex(uint32_t index) {
Inna Palantff3f07a2019-07-11 16:15:26 -0700203 LLVM_DEBUG(dbgs() << "InputFunction::setTableIndex: " << getName() << " -> "
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800204 << index << "\n");
Inna Palantff3f07a2019-07-11 16:15:26 -0700205 assert(!hasTableIndex());
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800206 tableIndex = index;
Inna Palantff3f07a2019-07-11 16:15:26 -0700207}
208
209// Write a relocation value without padding and return the number of bytes
210// witten.
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800211static unsigned writeCompressedReloc(uint8_t *buf, const WasmRelocation &rel,
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100212 uint64_t value) {
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800213 switch (rel.Type) {
214 case R_WASM_TYPE_INDEX_LEB:
215 case R_WASM_FUNCTION_INDEX_LEB:
216 case R_WASM_GLOBAL_INDEX_LEB:
Chris Wailesbcf972c2021-10-21 11:03:28 -0700217 case R_WASM_TAG_INDEX_LEB:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800218 case R_WASM_MEMORY_ADDR_LEB:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100219 case R_WASM_MEMORY_ADDR_LEB64:
Chris Wailese3116c42021-07-13 14:40:48 -0700220 case R_WASM_TABLE_NUMBER_LEB:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800221 return encodeULEB128(value, buf);
222 case R_WASM_TABLE_INDEX_SLEB:
Chris Wailese3116c42021-07-13 14:40:48 -0700223 case R_WASM_TABLE_INDEX_SLEB64:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800224 case R_WASM_MEMORY_ADDR_SLEB:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100225 case R_WASM_MEMORY_ADDR_SLEB64:
226 return encodeSLEB128(static_cast<int64_t>(value), buf);
Inna Palantff3f07a2019-07-11 16:15:26 -0700227 default:
228 llvm_unreachable("unexpected relocation type");
229 }
230}
231
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800232static unsigned getRelocWidthPadded(const WasmRelocation &rel) {
233 switch (rel.Type) {
234 case R_WASM_TYPE_INDEX_LEB:
235 case R_WASM_FUNCTION_INDEX_LEB:
236 case R_WASM_GLOBAL_INDEX_LEB:
Chris Wailesbcf972c2021-10-21 11:03:28 -0700237 case R_WASM_TAG_INDEX_LEB:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800238 case R_WASM_MEMORY_ADDR_LEB:
Chris Wailese3116c42021-07-13 14:40:48 -0700239 case R_WASM_TABLE_NUMBER_LEB:
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800240 case R_WASM_TABLE_INDEX_SLEB:
241 case R_WASM_MEMORY_ADDR_SLEB:
Inna Palantff3f07a2019-07-11 16:15:26 -0700242 return 5;
Chris Wailese3116c42021-07-13 14:40:48 -0700243 case R_WASM_TABLE_INDEX_SLEB64:
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100244 case R_WASM_MEMORY_ADDR_LEB64:
245 case R_WASM_MEMORY_ADDR_SLEB64:
246 return 10;
Inna Palantff3f07a2019-07-11 16:15:26 -0700247 default:
248 llvm_unreachable("unexpected relocation type");
249 }
250}
251
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100252static unsigned getRelocWidth(const WasmRelocation &rel, uint64_t value) {
253 uint8_t buf[10];
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800254 return writeCompressedReloc(buf, rel, value);
Inna Palantff3f07a2019-07-11 16:15:26 -0700255}
256
257// Relocations of type LEB and SLEB in the code section are padded to 5 bytes
258// so that a fast linker can blindly overwrite them without needing to worry
259// about the number of bytes needed to encode the values.
260// However, for optimal output the code section can be compressed to remove
261// the padding then outputting non-relocatable files.
262// In this case we need to perform a size calculation based on the value at each
263// relocation. At best we end up saving 4 bytes for each relocation entry.
264//
265// This function only computes the final output size. It must be called
266// before getSize() is used to calculate of layout of the code section.
267void InputFunction::calculateSize() {
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800268 if (!file || !config->compressRelocations)
Inna Palantff3f07a2019-07-11 16:15:26 -0700269 return;
270
271 LLVM_DEBUG(dbgs() << "calculateSize: " << getName() << "\n");
272
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800273 const uint8_t *secStart = file->codeSection->Content.data();
274 const uint8_t *funcStart = secStart + getInputSectionOffset();
275 uint32_t functionSizeLength;
276 decodeULEB128(funcStart, &functionSizeLength);
Inna Palantff3f07a2019-07-11 16:15:26 -0700277
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800278 uint32_t start = getInputSectionOffset();
279 uint32_t end = start + function->Size;
Inna Palantff3f07a2019-07-11 16:15:26 -0700280
Chris Wailesbcf972c2021-10-21 11:03:28 -0700281 uint64_t tombstone = getTombstone();
Chris Wailese3116c42021-07-13 14:40:48 -0700282
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800283 uint32_t lastRelocEnd = start + functionSizeLength;
284 for (const WasmRelocation &rel : relocations) {
285 LLVM_DEBUG(dbgs() << " region: " << (rel.Offset - lastRelocEnd) << "\n");
286 compressedFuncSize += rel.Offset - lastRelocEnd;
Chris Wailesbcf972c2021-10-21 11:03:28 -0700287 compressedFuncSize +=
288 getRelocWidth(rel, file->calcNewValue(rel, tombstone, this));
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800289 lastRelocEnd = rel.Offset + getRelocWidthPadded(rel);
Inna Palantff3f07a2019-07-11 16:15:26 -0700290 }
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800291 LLVM_DEBUG(dbgs() << " final region: " << (end - lastRelocEnd) << "\n");
292 compressedFuncSize += end - lastRelocEnd;
Inna Palantff3f07a2019-07-11 16:15:26 -0700293
294 // Now we know how long the resulting function is we can add the encoding
295 // of its length
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800296 uint8_t buf[5];
297 compressedSize = compressedFuncSize + encodeULEB128(compressedFuncSize, buf);
Inna Palantff3f07a2019-07-11 16:15:26 -0700298
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800299 LLVM_DEBUG(dbgs() << " calculateSize orig: " << function->Size << "\n");
300 LLVM_DEBUG(dbgs() << " calculateSize new: " << compressedSize << "\n");
Inna Palantff3f07a2019-07-11 16:15:26 -0700301}
302
303// Override the default writeTo method so that we can (optionally) write the
304// compressed version of the function.
Chris Wailesbcf972c2021-10-21 11:03:28 -0700305void InputFunction::writeCompressed(uint8_t *buf) const {
306 buf += outSecOff;
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800307 uint8_t *orig = buf;
308 (void)orig;
Inna Palantff3f07a2019-07-11 16:15:26 -0700309
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800310 const uint8_t *secStart = file->codeSection->Content.data();
311 const uint8_t *funcStart = secStart + getInputSectionOffset();
312 const uint8_t *end = funcStart + function->Size;
Chris Wailesbcf972c2021-10-21 11:03:28 -0700313 uint64_t tombstone = getTombstone();
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800314 uint32_t count;
315 decodeULEB128(funcStart, &count);
316 funcStart += count;
Inna Palantff3f07a2019-07-11 16:15:26 -0700317
318 LLVM_DEBUG(dbgs() << "write func: " << getName() << "\n");
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800319 buf += encodeULEB128(compressedFuncSize, buf);
320 const uint8_t *lastRelocEnd = funcStart;
321 for (const WasmRelocation &rel : relocations) {
322 unsigned chunkSize = (secStart + rel.Offset) - lastRelocEnd;
323 LLVM_DEBUG(dbgs() << " write chunk: " << chunkSize << "\n");
324 memcpy(buf, lastRelocEnd, chunkSize);
325 buf += chunkSize;
Chris Wailesbcf972c2021-10-21 11:03:28 -0700326 buf += writeCompressedReloc(buf, rel,
327 file->calcNewValue(rel, tombstone, this));
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800328 lastRelocEnd = secStart + rel.Offset + getRelocWidthPadded(rel);
Inna Palantff3f07a2019-07-11 16:15:26 -0700329 }
330
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800331 unsigned chunkSize = end - lastRelocEnd;
332 LLVM_DEBUG(dbgs() << " write final chunk: " << chunkSize << "\n");
333 memcpy(buf, lastRelocEnd, chunkSize);
334 LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n");
335}
336
Chris Wailesbcf972c2021-10-21 11:03:28 -0700337uint64_t InputChunk::getChunkOffset(uint64_t offset) const {
338 if (const auto *ms = dyn_cast<MergeInputChunk>(this)) {
339 LLVM_DEBUG(dbgs() << "getChunkOffset(merged): " << getName() << "\n");
340 LLVM_DEBUG(dbgs() << "offset: " << offset << "\n");
341 LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset)
342 << "\n");
343 assert(ms->parent);
344 return ms->parent->getChunkOffset(ms->getParentOffset(offset));
345 }
346 return outputSegmentOffset + offset;
347}
348
349uint64_t InputChunk::getOffset(uint64_t offset) const {
350 return outSecOff + getChunkOffset(offset);
351}
352
353uint64_t InputChunk::getVA(uint64_t offset) const {
354 return (outputSeg ? outputSeg->startVA : 0) + getChunkOffset(offset);
355}
356
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800357// Generate code to apply relocations to the data section at runtime.
358// This is only called when generating shared libaries (PIC) where address are
359// not known at static link time.
Chris Wailesbcf972c2021-10-21 11:03:28 -0700360void InputChunk::generateRelocationCode(raw_ostream &os) const {
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800361 LLVM_DEBUG(dbgs() << "generating runtime relocations: " << getName()
362 << " count=" << relocations.size() << "\n");
363
Chris Wailesbcf972c2021-10-21 11:03:28 -0700364 bool is64 = config->is64.getValueOr(false);
365 unsigned opcode_ptr_const = is64 ? WASM_OPCODE_I64_CONST
366 : WASM_OPCODE_I32_CONST;
367 unsigned opcode_ptr_add = is64 ? WASM_OPCODE_I64_ADD
368 : WASM_OPCODE_I32_ADD;
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100369
Chris Wailesbcf972c2021-10-21 11:03:28 -0700370 uint64_t tombstone = getTombstone();
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800371 // TODO(sbc): Encode the relocations in the data section and write a loop
372 // here to apply them.
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800373 for (const WasmRelocation &rel : relocations) {
Chris Wailesbcf972c2021-10-21 11:03:28 -0700374 uint64_t offset = getVA(rel.Offset) - getInputSectionOffset();
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800375
376 LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type)
377 << " addend=" << rel.Addend << " index=" << rel.Index
Chris Wailesbcf972c2021-10-21 11:03:28 -0700378 << " output offset=" << offset << "\n");
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800379
380 // Get __memory_base
381 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
382 writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base");
383
384 // Add the offset of the relocation
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100385 writeU8(os, opcode_ptr_const, "CONST");
Chris Wailesbcf972c2021-10-21 11:03:28 -0700386 writeSleb128(os, offset, "offset");
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100387 writeU8(os, opcode_ptr_add, "ADD");
388
389 bool is64 = relocIs64(rel.Type);
390 unsigned opcode_reloc_const =
391 is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST;
392 unsigned opcode_reloc_add =
393 is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD;
394 unsigned opcode_reloc_store =
395 is64 ? WASM_OPCODE_I64_STORE : WASM_OPCODE_I32_STORE;
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800396
397 Symbol *sym = file->getSymbol(rel);
398 // Now figure out what we want to store
399 if (sym->hasGOTIndex()) {
400 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
401 writeUleb128(os, sym->getGOTIndex(), "global index");
402 if (rel.Addend) {
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100403 writeU8(os, opcode_reloc_const, "CONST");
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800404 writeSleb128(os, rel.Addend, "addend");
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100405 writeU8(os, opcode_reloc_add, "ADD");
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800406 }
407 } else {
408 const GlobalSymbol* baseSymbol = WasmSym::memoryBase;
Chris Wailese3116c42021-07-13 14:40:48 -0700409 if (rel.Type == R_WASM_TABLE_INDEX_I32 ||
410 rel.Type == R_WASM_TABLE_INDEX_I64)
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800411 baseSymbol = WasmSym::tableBase;
412 writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
413 writeUleb128(os, baseSymbol->getGlobalIndex(), "base");
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100414 writeU8(os, opcode_reloc_const, "CONST");
Chris Wailesbcf972c2021-10-21 11:03:28 -0700415 writeSleb128(os, file->calcNewValue(rel, tombstone, this), "offset");
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100416 writeU8(os, opcode_reloc_add, "ADD");
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800417 }
418
419 // Store that value at the virtual address
Thiébaud Weksteene40e7362020-10-28 15:03:00 +0100420 writeU8(os, opcode_reloc_store, "I32_STORE");
Chih-Hung Hsieh08600532019-12-19 15:55:38 -0800421 writeUleb128(os, 2, "align");
422 writeUleb128(os, 0, "offset");
423 }
Inna Palantff3f07a2019-07-11 16:15:26 -0700424}
Jeff Vander Stoep247d86b2020-08-11 14:27:44 +0200425
Chris Wailesbcf972c2021-10-21 11:03:28 -0700426// Split WASM_SEG_FLAG_STRINGS section. Such a section is a sequence of
427// null-terminated strings.
428void MergeInputChunk::splitStrings(ArrayRef<uint8_t> data) {
429 LLVM_DEBUG(llvm::dbgs() << "splitStrings\n");
430 size_t off = 0;
431 StringRef s = toStringRef(data);
432
433 while (!s.empty()) {
434 size_t end = s.find(0);
435 if (end == StringRef::npos)
436 fatal(toString(this) + ": string is not null terminated");
437 size_t size = end + 1;
438
439 pieces.emplace_back(off, xxHash64(s.substr(0, size)), true);
440 s = s.substr(size);
441 off += size;
442 }
443}
444
445// This function is called after we obtain a complete list of input sections
446// that need to be linked. This is responsible to split section contents
447// into small chunks for further processing.
448//
449// Note that this function is called from parallelForEach. This must be
450// thread-safe (i.e. no memory allocation from the pools).
451void MergeInputChunk::splitIntoPieces() {
452 assert(pieces.empty());
453 // As of now we only support WASM_SEG_FLAG_STRINGS but in the future we
454 // could add other types of splitting (see ELF's splitIntoPieces).
455 assert(flags & WASM_SEG_FLAG_STRINGS);
456 splitStrings(data());
457}
458
459SectionPiece *MergeInputChunk::getSectionPiece(uint64_t offset) {
460 if (this->data().size() <= offset)
461 fatal(toString(this) + ": offset is outside the section");
462
463 // If Offset is not at beginning of a section piece, it is not in the map.
464 // In that case we need to do a binary search of the original section piece
465 // vector.
466 auto it = partition_point(
467 pieces, [=](SectionPiece p) { return p.inputOff <= offset; });
468 return &it[-1];
469}
470
471// Returns the offset in an output section for a given input offset.
472// Because contents of a mergeable section is not contiguous in output,
473// it is not just an addition to a base output offset.
474uint64_t MergeInputChunk::getParentOffset(uint64_t offset) const {
475 // If Offset is not at beginning of a section piece, it is not in the map.
476 // In that case we need to search from the original section piece vector.
477 const SectionPiece *piece = getSectionPiece(offset);
478 uint64_t addend = offset - piece->inputOff;
479 return piece->outputOff + addend;
480}
481
482void SyntheticMergedChunk::finalizeContents() {
483 // Add all string pieces to the string table builder to create section
484 // contents.
485 for (MergeInputChunk *sec : chunks)
486 for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
487 if (sec->pieces[i].live)
488 builder.add(sec->getData(i));
489
490 // Fix the string table content. After this, the contents will never change.
491 builder.finalize();
492
493 // finalize() fixed tail-optimized strings, so we can now get
494 // offsets of strings. Get an offset for each string and save it
495 // to a corresponding SectionPiece for easy access.
496 for (MergeInputChunk *sec : chunks)
497 for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
498 if (sec->pieces[i].live)
499 sec->pieces[i].outputOff = builder.getOffset(sec->getData(i));
500}
501
Chris Wailese3116c42021-07-13 14:40:48 -0700502uint64_t InputSection::getTombstoneForSection(StringRef name) {
503 // When a function is not live we need to update relocations referring to it.
504 // If they occur in DWARF debug symbols, we want to change the pc of the
505 // function to -1 to avoid overlapping with a valid range. However for the
506 // debug_ranges and debug_loc sections that would conflict with the existing
507 // meaning of -1 so we use -2.
508 // Returning 0 means there is no tombstone value for this section, and relocation
509 // will just use the addend.
510 if (!name.startswith(".debug_"))
511 return 0;
512 if (name.equals(".debug_ranges") || name.equals(".debug_loc"))
513 return UINT64_C(-2);
514 return UINT64_C(-1);
515}
516
Jeff Vander Stoep247d86b2020-08-11 14:27:44 +0200517} // namespace wasm
518} // namespace lld