Merge "Create a HandleInput class to handle user input" into main
diff --git a/ext4_utils/Android.bp b/ext4_utils/Android.bp
index b28e84f..4db055b 100644
--- a/ext4_utils/Android.bp
+++ b/ext4_utils/Android.bp
@@ -87,7 +87,12 @@
 
 prebuilt_etc {
     name: "mke2fs.conf",
-    recovery_available: true,
+    src: "mke2fs.conf",
+}
+
+prebuilt_etc {
+    name: "mke2fs.conf.recovery",
+    recovery: true,
     src: "mke2fs.conf",
 }
 
diff --git a/libatrace_rust/Android.bp b/libatrace_rust/Android.bp
index 01dd1a1..e08ca1e 100644
--- a/libatrace_rust/Android.bp
+++ b/libatrace_rust/Android.bp
@@ -10,6 +10,7 @@
         "libtracing",
         "libtracing_subscriber",
     ],
+    min_sdk_version: "35",
 }
 
 rust_library {
@@ -43,6 +44,7 @@
         "libstatic_assertions",
         "libbitflags",
     ],
+    min_sdk_version: "35",
 }
 
 rust_library {
@@ -88,6 +90,7 @@
         "//apex_available:platform",
         "//apex_available:anyapex",
     ],
+    min_sdk_version: "35",
 }
 
 // TODO: b/291544011 - Replace with autogenerated wrappers once they are supported.
@@ -104,4 +107,5 @@
         "//apex_available:platform",
         "//apex_available:anyapex",
     ],
+    min_sdk_version: "35",
 }
diff --git a/memory_replay/VerifyTrace.cpp b/memory_replay/VerifyTrace.cpp
index daac05d..0094940 100644
--- a/memory_replay/VerifyTrace.cpp
+++ b/memory_replay/VerifyTrace.cpp
@@ -18,6 +18,7 @@
 #include <getopt.h>
 #include <inttypes.h>
 #include <stdio.h>
+#include <unistd.h>
 
 #include <string>
 #include <unordered_map>
@@ -30,36 +31,37 @@
 #include "File.h"
 
 static void Usage() {
-  fprintf(stderr, "Usage: %s [--attempt_recovery] TRACE_FILE1 TRACE_FILE2 ...\n",
+  fprintf(stderr, "Usage: %s [--attempt_repair] TRACE_FILE1 TRACE_FILE2 ...\n",
           android::base::Basename(android::base::GetExecutablePath()).c_str());
-  fprintf(stderr, "  --attempt_recovery\n");
-  fprintf(stderr, "    If a trace file has some errors, try to fix it. The new\n");
+  fprintf(stderr, "  --attempt_repair\n");
+  fprintf(stderr, "    If a trace file has some errors, try to fix them. The new\n");
   fprintf(stderr, "    file will be named TRACE_FILE.repair\n");
   fprintf(stderr, "  TRACE_FILE1 TRACE_FILE2 ...\n");
   fprintf(stderr, "      The trace files to verify\n");
-  fprintf(stderr, "\n  Print a trace to stdout.\n");
+  fprintf(stderr, "\n  Verify trace are valid.\n");
   exit(1);
 }
 
-static bool WriteRepairEntries(const char* trace_file, memory_trace::Entry* entries,
+static bool WriteRepairEntries(const std::string& repair_file, memory_trace::Entry* entries,
                                size_t num_entries) {
-  printf("Attempting to reapir trace_file %s\n", trace_file);
-  std::string repair_file(std::string(trace_file) + ".repair");
-  int fd = open(repair_file.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0644);
+  int fd = open(repair_file.c_str(), O_WRONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0644);
   if (fd == -1) {
-    printf("Failed to create repair file %s: %s\n", repair_file.c_str(), strerror(errno));
+    printf("  Failed to create repair file %s: %s\n", repair_file.c_str(), strerror(errno));
     return false;
   }
+  bool valid = true;
   for (size_t i = 0; i < num_entries; i++) {
     if (!memory_trace::WriteEntryToFd(fd, entries[i])) {
-      printf("Failed to write entry to file:\n");
-      close(fd);
-      return false;
+      printf("  Failed to write entry to file:\n");
+      valid = false;
+      break;
     }
   }
   close(fd);
-  printf("Attempt to repair trace has succeeded, new trace %s\n", repair_file.c_str());
-  return true;
+  if (!valid) {
+    unlink(repair_file.c_str());
+  }
+  return valid;
 }
 
 static void VerifyTrace(const char* trace_file, bool attempt_repair) {
@@ -69,9 +71,10 @@
   size_t num_entries;
   GetUnwindInfo(trace_file, &entries, &num_entries);
 
-  bool found_error = false;
-  bool error_repaired = false;
+  size_t errors_found = 0;
+  size_t errors_repaired = 0;
   std::unordered_map<uint64_t, std::pair<memory_trace::Entry*, size_t>> live_ptrs;
+  std::pair<memory_trace::Entry*, size_t> erased(nullptr, 0);
   for (size_t i = 0; i < num_entries; i++) {
     memory_trace::Entry* entry = &entries[i];
 
@@ -90,32 +93,38 @@
         }
         if (entry->u.old_ptr != 0) {
           // Verify old pointer
-          auto old_entry = live_ptrs.find(entry->u.old_ptr);
-          if (old_entry == live_ptrs.end()) {
-            printf("  Line %zu: freeing of unknown ptr 0x%" PRIx64 "\n", i + 1, entry->u.old_ptr);
-            printf("    %s\n", memory_trace::CreateStringFromEntry(*entry).c_str());
-            found_error = true;
-            if (attempt_repair) {
-              printf("  Unable to repair this failure.\n");
+          auto entry_iter = live_ptrs.find(entry->u.old_ptr);
+          if (entry_iter == live_ptrs.end()) {
+            // Verify the pointer didn't get realloc'd to itself.
+            if (entry->u.old_ptr != entry->ptr) {
+              printf("  Line %zu: freeing of unknown ptr 0x%" PRIx64 "\n", i + 1, entry->u.old_ptr);
+              printf("    %s\n", memory_trace::CreateStringFromEntry(*entry).c_str());
+              errors_found++;
+              if (attempt_repair) {
+                printf("  Unable to repair this failure.\n");
+              }
             }
           } else {
-            live_ptrs.erase(old_entry);
+            if (attempt_repair) {
+              erased = entry_iter->second;
+            }
+            live_ptrs.erase(entry_iter);
           }
         }
         break;
       case memory_trace::FREE:
         if (entry->ptr != 0) {
           // Verify pointer is present.
-          auto old_entry = live_ptrs.find(entry->ptr);
-          if (old_entry == live_ptrs.end()) {
+          auto entry_iter = live_ptrs.find(entry->ptr);
+          if (entry_iter == live_ptrs.end()) {
             printf("  Line %zu: freeing of unknown ptr 0x%" PRIx64 "\n", i + 1, entry->ptr);
             printf("    %s\n", memory_trace::CreateStringFromEntry(*entry).c_str());
-            found_error = true;
+            errors_found++;
             if (attempt_repair) {
               printf("  Unable to repair this failure.\n");
             }
           } else {
-            live_ptrs.erase(old_entry);
+            live_ptrs.erase(entry_iter);
           }
         }
         break;
@@ -126,30 +135,44 @@
     if (ptr != 0) {
       auto old_entry = live_ptrs.find(ptr);
       if (old_entry != live_ptrs.end()) {
-        printf("  Line %zu: duplicate ptr 0x%" PRIx64 " previously found at line %" PRId64 "\n",
-               i + 1, ptr, old_entry->second.second);
-        printf("    Original entry:\n");
-        printf("      %s\n", memory_trace::CreateStringFromEntry(*entry).c_str());
-        printf("    Duplicate pointer entry:\n");
+        printf("  Line %zu: duplicate ptr 0x%" PRIx64 "\n", i + 1, ptr);
+        printf("    Original entry at line %zu:\n", old_entry->second.second);
         printf("      %s\n", memory_trace::CreateStringFromEntry(*old_entry->second.first).c_str());
-        found_error = true;
+        printf("    Duplicate entry at line %zu:\n", i + 1);
+        printf("      %s\n", memory_trace::CreateStringFromEntry(*entry).c_str());
+        errors_found++;
         if (attempt_repair) {
           // There is a small chance of a race where the same pointer is returned
           // in two different threads before the free is recorded. If this occurs,
           // the way to repair is to search forward for the free of the pointer and
           // swap the two entries.
-          error_repaired = false;
+          bool fixed = false;
           for (size_t j = i + 1; j < num_entries; j++) {
-            if (entries[j].type == memory_trace::FREE && entries[j].ptr == ptr) {
-              memory_trace::Entry alloc_entry = *entry;
+            if ((entries[j].type == memory_trace::FREE && entries[j].ptr == ptr) ||
+                (entries[j].type == memory_trace::REALLOC && entries[j].u.old_ptr == ptr)) {
+              memory_trace::Entry tmp_entry = *entry;
               *entry = entries[j];
-              entries[j] = alloc_entry;
-              error_repaired = true;
+              entries[j] = tmp_entry;
+              errors_repaired++;
 
               live_ptrs.erase(old_entry);
+              if (entry->type == memory_trace::REALLOC) {
+                if (entry->ptr != 0) {
+                  // Need to add the newly allocated pointer.
+                  live_ptrs[entry->ptr] = std::make_pair(entry, i + 1);
+                }
+                if (erased.first != nullptr) {
+                  // Need to put the erased old ptr back.
+                  live_ptrs[tmp_entry.u.old_ptr] = erased;
+                }
+              }
+              fixed = true;
               break;
             }
           }
+          if (!fixed) {
+            printf("  Unable to fix error.\n");
+          }
         }
       } else {
         live_ptrs[ptr] = std::make_pair(entry, i + 1);
@@ -157,18 +180,22 @@
     }
   }
 
-  if (found_error) {
+  if (errors_found != 0) {
     printf("Trace %s is not valid.\n", trace_file);
     if (attempt_repair) {
-      if (error_repaired) {
-        // Save the repaired data out to a file.
-        if (!WriteRepairEntries(trace_file, entries, num_entries)) {
-          printf("Failed to write repaired entries to a file.\n");
-        }
+      // Save the repaired data out to a file.
+      std::string repair_file(std::string(trace_file) + ".repair");
+      printf("Creating repaired trace file %s...\n", repair_file.c_str());
+      if (!WriteRepairEntries(repair_file, entries, num_entries)) {
+        printf("Failed trying to write repaired entries to file.\n");
+      } else if (errors_repaired == errors_found) {
+        printf("Repaired file is complete, no more errors.\n");
       } else {
-        printf("Attempt to repair trace has failed.\n");
+        printf("Repaired file is still not valid.\n");
       }
     }
+  } else if (attempt_repair) {
+    printf("Trace %s is valid, no repair needed.\n", trace_file);
   } else {
     printf("Trace %s is valid.\n", trace_file);
   }
@@ -183,17 +210,22 @@
   };
   int option_index = 0;
   int opt = getopt_long(argc, argv, "", options, &option_index);
+  if (argc == 1 || (argc == 2 && opt != -1)) {
+    fprintf(stderr, "Requires at least one TRACE_FILE\n");
+    Usage();
+  }
+
   bool attempt_repair = false;
   if (opt == 'a') {
     attempt_repair = true;
   } else if (opt != -1) {
     Usage();
-  } else if (optind == argc) {
-    fprintf(stderr, "Requires at least one TRACE_FILE\n");
-    Usage();
   }
 
-  for (int i = optind; i < argc; i++) {
+  for (int i = 1; i < argc; i++) {
+    if (i + 1 == optind) {
+      continue;
+    }
     VerifyTrace(argv[i], attempt_repair);
   }
 
diff --git a/simpleperf/BranchListFile.cpp b/simpleperf/BranchListFile.cpp
index 5e2e4d1..13ab72e 100644
--- a/simpleperf/BranchListFile.cpp
+++ b/simpleperf/BranchListFile.cpp
@@ -615,6 +615,14 @@
   return true;
 }
 
+void BranchListProtoReader::Rewind() {
+  if (input_fp_) {
+    rewind(input_fp_.get());
+  } else {
+    input_str_pos_ = 0;
+  }
+}
+
 bool BranchListProtoReader::ReadData(void* data, size_t size) {
   if (input_fp_) {
     if (fread(data, size, 1, input_fp_.get()) != 1) {
@@ -642,6 +650,7 @@
   } else {
     size = input_str_.size();
   }
+  Rewind();
   proto::BranchList proto_branch_list;
   if (!ReadProtoBranchList(size, proto_branch_list)) {
     return false;
diff --git a/simpleperf/BranchListFile.h b/simpleperf/BranchListFile.h
index 2b9c07c..64a00ab 100644
--- a/simpleperf/BranchListFile.h
+++ b/simpleperf/BranchListFile.h
@@ -225,6 +225,7 @@
   bool ReadProtoBranchList(uint32_t size, proto::BranchList& proto_branch_list);
   bool AddETMBinary(const proto::ETMBinary& proto_binary, ETMBinaryMap& etm_data);
   void AddLBRData(const proto::LBRData& proto_lbr_data, LBRData& lbr_data);
+  void Rewind();
   bool ReadData(void* data, size_t size);
   bool ReadOldFileFormat(ETMBinaryMap& etm_data, LBRData& lbr_data);
 
diff --git a/simpleperf/BranchListFile_test.cpp b/simpleperf/BranchListFile_test.cpp
index af278a0..b5c7ec6 100644
--- a/simpleperf/BranchListFile_test.cpp
+++ b/simpleperf/BranchListFile_test.cpp
@@ -17,6 +17,7 @@
 #include <gtest/gtest.h>
 
 #include "BranchListFile.h"
+#include "get_test_data.h"
 
 using namespace simpleperf;
 
@@ -149,3 +150,14 @@
     }
   }
 }
+
+// @CddTest = 6.1/C-0-2
+TEST(BranchListProtoReaderWriter, read_old_branch_list_file) {
+  std::string path = GetTestData("etm/old_branch_list.data");
+  auto reader = BranchListProtoReader::CreateForFile(path);
+  ASSERT_TRUE(reader);
+  ETMBinaryMap etm_data;
+  LBRData lbr_data;
+  ASSERT_TRUE(reader->Read(etm_data, lbr_data));
+  ASSERT_EQ(etm_data.size(), 1u);
+}
diff --git a/simpleperf/doc/collect_etm_data_for_autofdo.md b/simpleperf/doc/collect_etm_data_for_autofdo.md
index abf5a46..53cf72f 100644
--- a/simpleperf/doc/collect_etm_data_for_autofdo.md
+++ b/simpleperf/doc/collect_etm_data_for_autofdo.md
@@ -4,16 +4,24 @@
 
 ## Introduction
 
-ETM is a hardware feature available on arm64 devices. It collects the instruction stream running on
-each cpu. ARM uses ETM as an alternative for LBR (last branch record) on x86.
-Simpleperf supports collecting ETM data, and converting it to input files for AutoFDO, which can
-then be used for PGO (profile-guided optimization) during compilation.
+The ARM Embedded Trace Macrocell (ETM) is an instruction tracing unit available on ARM SoCs. ETM
+traces the instruction stream executed on each core and sends the stream to system memory via other
+Coresight components. ETM data contains branch records, similar to Last Branch Records (LBRs) on
+x86 architectures.
 
-On ARMv8, ETM is considered as an external debug interface (unless ARMv8.4 Self-hosted Trace
-extension is impelemented). So it needs to be enabled explicitly in the bootloader, and isn't
-available on user devices. For Pixel devices, it's available on EVT and DVT devices on Pixel 4,
-Pixel 4a (5G) and Pixel 5. To test if it's available on other devices, you can follow commands in
-this doc and see if you can record any ETM data.
+Simpleperf supports collecting ETM data and converting it to input files for AutoFDO, which can
+then be used for Profile-Guided Optimization (PGO) during compilation.
+
+On ARMv8, the ETM and other Coresight components are considered part of the external debug
+interface. Therefore, they are typically only used internally and are disabled on production
+devices. ARMv9 introduces the Embedded Trace Extension (ETE) and Trace Buffer Extension (TRBE)
+to enhance self-hosted ETM data collection. This new hardware is not bound to the external debug
+interface and can be used more widely to collect AutoFDO profiles.
+
+For Pixel devices, ETM data collection is supported on EVT and DVT devices starting with Pixel 4.
+For other devices, you can try the commands in this document to see if ETM data recording is
+possible. To enable ETM data collection on a device, refer to the documentation in
+[Enable ETM data collection](#enable-etm-data-collection).
 
 ## Examples
 
@@ -305,71 +313,240 @@
 generation rate. For this reason, profcollectd currently uses `-z` for compression instead of
 `--decode-etm`.
 
-## Support ETM in the kernel
+## Enable ETM data collection
 
-To let simpleperf use ETM function, we need to enable Coresight driver in the kernel, which lives in
-`<linux_kernel>/drivers/hwtracing/coresight`.
+To enable ETM data collection on a device, you must first verify that the required hardware is
+present. Then, you need to enable ETM in both the bootloader and the kernel.
 
-The Coresight driver can be enabled by below kernel configs:
+### Check hardware support
 
-```config
-	CONFIG_CORESIGHT=y
-	CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y
-	CONFIG_CORESIGHT_SOURCE_ETM4X=y
+In ARMv8, instruction tracing relies on two Coresight components:
+
+**Coresight ETM**: Generates the ETM data, recording the instruction stream.
+
+**Coresight ETR**: Transfers the ETM data to system memory for analysis.
+
+ARMv9 offers more flexibility with the introduction of new components:
+
+**Embedded Trace Extension (ETE)**: Replaces the Coresight ETM as the instruction trace source.
+
+**Trace Buffer Extension (TRBE)**: Provides an alternative to Coresight ETR for transferring trace
+data to memory. For example:
+
+Pixel 7: Uses Coresight ETM and Coresight ETR (ARMv8).
+
+Pixel 8: Uses ETE and Coresight ETR (ARMv9). While the Pixel 8 has TRBE, known errata with TRBE on
+         its Cortex cores makes it unsuitable for use.
+
+Finding Device Support Information:
+
+**ETE and TRBE support**: Refer to the relevant core's technical reference manual (e.g.,
+                          [Arm® Cortex-X4 Core Technical Reference Manual](https://developer.arm.com/documentation/102484/0002)).
+
+**TRBE errata**: Consult the core's errata notice (e.g.,
+                 [Arm® Cortex-X4 (MP161) Software Developer Errata Notice](https://developer.arm.com/documentation/SDEN-2432808/0800/?lang=en)).
+
+**Coresight ETR support**: Typically detailed in the SoC's manual.
+
+### Enable ETM in the bootloader
+
+To enable Coresight ETM and Coresight ETR on ARMv8 devices (or only Coresight ETR on ARMv9 devices),
+you need to allow non-secure, non-invasive debug access on the CPU. The specific method for doing
+this varies depending on the SoC. After enabling ETM in the bootloader and kernel, you can verify
+that Coresight ETM and ETR are operational by checking their respective `TRCAUTHSTATUS` registers.
+Following is an example of Pixel 6 with ETM enabled:
+
+```sh
+oriole:/ # cat /sys/bus/coresight/devices/etm0/mgmt/trcauthstatus
+0xcc
+oriole:/ # cat /sys/bus/coresight/devices/tmc_etr0/mgmt/authstatus
+0x33
 ```
 
-On Kernel 5.10+, we recommend building Coresight driver as kernel modules. Because it works with
-GKI kernel.
+To enable ETE on ARMv9 devices, you need to allow the kernel to access trace system registers. This
+is done by setting the `ENABLE_SYS_REG_TRACE_FOR_NS` build option in Trusted Firmware-A (see
+[documentation](https://trustedfirmware-a.readthedocs.io/en/v2.11/getting_started/build-options.html)).
 
+To enable TRBE on ARMv9 devices, you need to allow the kernel to access trace buffer control
+registers. This is done by setting the `ENABLE_TRBE_FOR_NS` build option in Trusted Firmware-A (see
+[documentation](https://trustedfirmware-a.readthedocs.io/en/v2.11/getting_started/build-options.html)).
+
+
+### Enable ETM in the kernel
+
+Android kernels from version 6.x onwards generally include the necessary patches for ETM data
+collection. To enable ETM in the kernel, you need to build the required kernel modules and add the
+appropriate device tree entries.
+
+Enable the following kernel configuration options to include the ETM kernel modules:
 ```config
 	CONFIG_CORESIGHT=m
 	CONFIG_CORESIGHT_LINK_AND_SINK_TMC=m
 	CONFIG_CORESIGHT_SOURCE_ETM4X=m
+	CONFIG_CORESIGHT_TRBE=m
 ```
 
-Android common kernel 5.10+ should have all the Coresight patches needed to collect ETM data.
-Android common kernel 5.4 misses two patches. But by adding patches in
-https://android-review.googlesource.com/q/topic:test_etm_on_hikey960_5.4, we can collect ETM data
-on hikey960 with 5.4 kernel.
-For Android common kernel 4.14 and 4.19, we have backported all necessary Coresight patches.
+These options will build the following kernel modules:
+```
+coresight.ko
+coresight-etm4x.ko
+coresight-funnel.ko
+coresight-replicator.ko
+coresight-tmc.ko
+coresight-trbe.ko
+```
 
-Besides Coresight driver, we also need to add Coresight devices in device tree. An example is in
-https://github.com/torvalds/linux/blob/master/arch/arm64/boot/dts/arm/juno-base.dtsi. There should
-be a path flowing ETM data from ETM device through funnels, ETF and replicators, all the way to
-ETR, which writes ETM data to system memory.
+Different SoCs have varying Coresight device connections, address assignments, and interrupt
+configurations. Therefore, providing a universal device tree example is not feasible. However, the
+following examples from Pixel devices illustrate how device tree entries for ETM components might
+look.
 
-One optional flag in ETM device tree is "arm,coresight-loses-context-with-cpu". It saves ETM
-registers when a CPU enters low power state. It may be needed to avoid
-"coresight_disclaim_device_unlocked" warning when doing system wide collection.
+**Example 1: Coresight ETM and Coresight ETR (Pixel 6)**
 
-One optional flag in ETR device tree is "arm,scatter-gather". Simpleperf requests 4M system memory
-for ETR to store ETM data. Without IOMMU, the memory needs to be contiguous. If the kernel can't
-fulfill the request, simpleperf will report out of memory error. Fortunately, we can use
-"arm,scatter-gather" flag to let ETR run in scatter gather mode, which uses non-contiguous memory.
+This example shows the device tree entries for Coresight ETM and ETR on Pixel 6
+(source: [gs101-debug.dtsi](https://android.googlesource.com/kernel/devices/google/gs101/+/refs/heads/android-gs-tangorpro-6.1-android16-dp/dts/gs101-debug.dtsi#287)).
+
+```device-tree
+etm0: etm@25840000 {
+    compatible = "arm,primecell";
+    arm,primecell-periphid = <0x000bb95d>;
+    reg = <0 0x25840000 0x1000>;
+    cpu = <&cpu0>;
+    coresight-name = "coresight-etm0";
+    clocks = <&clock ATCLK>;
+    clock-names = "apb_pclk";
+    arm,coresight-loses-context-with-cpu;
+    out-ports {
+        port {
+            etm0_out_port: endpoint {
+                remote-endpoint = <&funnel0_in_port0>;
+            };
+        };
+    };
+};
+
+// ... etm1 to etm7, funnel0 to funnel2, etf0, etf1 ...
+
+etr: etr@2500a000 {
+    compatible = "arm,coresight-tmc", "arm,primecell";
+    arm,primecell-periphid = <0x001bb961>;
+    reg = <0 0x2500a000 0x1000>;
+    coresight-name = "coresight-etr";
+    arm,scatter-gather;
+    clocks = <&clock ATCLK>;
+    clock-names = "apb_pclk";
+    in-ports {
+        port {
+            etr_in_port: endpoint {
+                remote-endpoint = <&funnel2_out_port>;
+            };
+        };
+    };
+};
+
+**Example 2: ETE and Coresight ETR (Pixel 8)**
+
+This example shows the device tree entries for ETE and Coresight ETR on Pixel 8
+(source: [zuma-debug.dtsi](https://android.googlesource.com/kernel/devices/google/zuma/+/refs/heads/android-gs-shusky-6.1-android16-dp/dts/zuma-debug.dtsi#428)).
+
+```device-tree
+ete0 {
+    compatible = "arm,embedded-trace-extension";
+    cpu = <&cpu0>;
+    arm,coresight-loses-context-with-cpu;
+    out-ports {
+        port {
+            ete0_out_port: endpoint {
+                remote-endpoint = <&funnel0_in_port0>;
+            };
+        };
+    };
+};
+
+// ... ete1 to ete8, funnel0 to funnel2, etf0 ...
+
+etr: etr@2a00a000 {
+    compatible = "arm,coresight-tmc", "arm,primecell";
+    arm,primecell-periphid = <0x001bb961>;
+    reg = <0 0x2a00a000 0x1000>;
+    coresight-name = "coresight-etr";
+    arm,scatter-gather;
+    clocks = <&clock ATCLK>;
+    clock-names = "apb_pclk";
+    in-ports {
+        port {
+            etr_in_port: endpoint {
+                remote-endpoint = <&funnel2_out_port>;
+            };
+        };
+    };
+};
+```
+
+**Example 3: TRBE
+
+This example shows a basic device tree entry for TRBE.
+
+```device-tree
+trbe {
+    compatible = "arm,trace-buffer-extension";
+    interrupts = <GIC_PPI 0 IRQ_TYPE_LEVEL_HIGH 0>;
+};
+```
+
+One optional flag in the ETM/ETE device tree is `arm,coresight-loses-context-with-cpu`. This flag
+ensures that ETM registers are saved when a CPU enters a low-power state. It is necessary if the
+CPU powers down the ETM/ETE during low-power states. Without this flag, the kernel cannot properly
+resume ETM data collection after the CPU wakes up, and you will likely see a
+`coresight_disclaim_device_unlocked` warning during system-wide data collection.
+
+Another optional flag in the ETR device tree is `arm,scatter-gather`. Simpleperf requires 4MB of
+contiguous system memory for the ETR to store ETM data (unless an IOMMU is present). If the kernel
+cannot provide this contiguous memory, simpleperf will report an out-of-memory error.  Using the
+`arm,scatter-gather` flag allows the ETR to operate in scatter-gather mode, enabling it to utilize
+non-contiguous memory.
+
+Each CPU has an ETM device with a unique trace_id assigned by the kernel. The standard formula for
+determining the trace_id is: `trace_id = 0x10 + cpu * 2` (as defined in
+[coresight-pmu.h](https://github.com/torvalds/linux/blob/master/include/linux/coresight-pmu.h#L22)).
+If your kernel uses a different formula due to local patches, the simpleperf inject command may
+fail to parse the ETM data correctly, potentially resulting in empty output.
 
 
-### A possible problem: trace_id mismatch
+### Check ETM enable status in /sys
 
-Each CPU has an ETM device, which has a unique trace_id assigned from the kernel.
-The formula is: `trace_id = 0x10 + cpu * 2`, as in https://github.com/torvalds/linux/blob/master/include/linux/coresight-pmu.h#L37.
-If the formula is modified by local patches, then simpleperf inject command can't parse ETM data
-properly and is likely to give empty output.
+The status of ETM devices is reflected in /sys. The following is an example from a Pixel 9.
 
+```sh
+# List available Coresight devices, including ETE and TRBE.
+comet:/sys/bus/coresight/devices $ ls
+ete0  ete1  ete2  ete3  ete4  ete5  ete6  ete7  funnel0  funnel1  funnel2  tmc_etf0  tmc_etr0
 
-## Enable ETM in the bootloader
+# Check if Coresight ETR is enabled.
+comet:/sys/bus/coresight/devices $ cat tmc_etr0/mgmt/authstatus
+0x33
 
-Unless ARMv8.4 Self-hosted Trace extension is implemented, ETM is considered as an external debug
-interface. It may be disabled by fuse (like JTAG). So we need to check if ETM is disabled, and
-if bootloader provides a way to reenable it.
+# Check if we have Coresight ETM/ETE devices as perf event sources.
+comet:/sys/bus/event_source/devices/cs_etm $ ls -l
+total 0
+lrwxrwxrwx 1 root root    0 2024-12-03 17:37 cpu0 -> ../platform/ete0/ete0
+lrwxrwxrwx 1 root root    0 2024-12-03 17:37 cpu1 -> ../platform/ete1/ete1
+lrwxrwxrwx 1 root root    0 2024-12-03 17:37 cpu2 -> ../platform/ete2/ete2
+lrwxrwxrwx 1 root root    0 2024-12-03 17:37 cpu3 -> ../platform/ete3/ete3
+lrwxrwxrwx 1 root root    0 2024-12-03 17:37 cpu4 -> ../platform/ete4/ete4
+lrwxrwxrwx 1 root root    0 2024-12-03 17:37 cpu5 -> ../platform/ete5/ete5
+lrwxrwxrwx 1 root root    0 2024-12-03 17:37 cpu6 -> ../platform/ete6/ete6
+lrwxrwxrwx 1 root root    0 2024-12-03 17:37 cpu7 -> ../platform/ete7/ete7
 
-We can tell if ETM is disable by checking its TRCAUTHSTATUS register, which is exposed in sysfs,
-like /sys/bus/coresight/devices/coresight-etm0/mgmt/trcauthstatus. To reenable ETM, we need to
-enable non-Secure non-invasive debug on ARM CPU. The method depends on chip vendors(SOCs).
+# Check if we have Coresight ETR/TRBE to move ETM data to system memory.
+comet:/sys/bus/event_source/devices/cs_etm/sinks $ ls
+tmc_etf0  tmc_etr0
+```
 
 
 ## Related docs
 
-* [Arm Architecture Reference Manual Armv8, D3 AArch64 Self-hosted Trace](https://developer.arm.com/documentation/ddi0487/latest)
+* [Arm Architecture Reference Manual for A-profile architecture, D3-D6](https://developer.arm.com/documentation/ddi0487/latest/)
 * [ARM ETM Architecture Specification](https://developer.arm.com/documentation/ihi0064/latest/)
 * [ARM CoreSight Architecture Specification](https://developer.arm.com/documentation/ihi0029/latest)
 * [CoreSight Components Technical Reference Manual](https://developer.arm.com/documentation/ddi0314/h/)
diff --git a/simpleperf/event_table_generator.py b/simpleperf/event_table_generator.py
index 03e630b..d44cbb6 100755
--- a/simpleperf/event_table_generator.py
+++ b/simpleperf/event_table_generator.py
@@ -278,12 +278,26 @@
         #include <unordered_map>
         #include <unordered_set>
         #include <map>
+        #include <string_view>
 
         #include "event_type.h"
 
         namespace simpleperf {
 
-        std::set<EventType> builtin_event_types = {
+        // A constexpr-constructible version of EventType for the built-in table.
+        struct BuiltinEventType {
+          std::string_view name;
+          uint32_t type;
+          uint64_t config;
+          std::string_view description;
+          std::string_view limited_arch;
+
+          explicit operator EventType() const {
+            return {std::string(name), type, config, std::string(description), std::string(limited_arch)};
+          }
+        };
+
+        static constexpr BuiltinEventType kBuiltinEventTypes[] = {
     """
     generated_str += gen_hardware_events() + '\n'
     generated_str += gen_software_events() + '\n'
@@ -293,6 +307,12 @@
     generated_str += """
         };
 
+        void LoadBuiltinEventTypes(std::set<EventType>& set) {
+          for (const auto& event_type : kBuiltinEventTypes) {
+            set.insert(static_cast<EventType>(event_type));
+          }
+        }
+
 
     """
     generated_str += raw_event_generator.generate_cpu_support_events()
diff --git a/simpleperf/event_type.cpp b/simpleperf/event_type.cpp
index d837914..fef1a9f 100644
--- a/simpleperf/event_type.cpp
+++ b/simpleperf/event_type.cpp
@@ -44,7 +44,7 @@
   int shift;
 };
 
-extern std::set<EventType> builtin_event_types;
+void LoadBuiltinEventTypes(std::set<EventType>&);
 
 enum class EventFinderType {
   BUILTIN,
@@ -93,7 +93,7 @@
   BuiltinTypeFinder() : EventTypeFinder(EventFinderType::BUILTIN) {}
 
  protected:
-  void LoadTypes() override { types_ = std::move(builtin_event_types); }
+  void LoadTypes() override { LoadBuiltinEventTypes(types_); }
 };
 
 class TracepointStringFinder : public EventTypeFinder {
diff --git a/simpleperf/testdata/etm/old_branch_list.data b/simpleperf/testdata/etm/old_branch_list.data
new file mode 100644
index 0000000..434f417
--- /dev/null
+++ b/simpleperf/testdata/etm/old_branch_list.data
Binary files differ