Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | from enum import Enum |
| 4 | from pathlib import Path |
| 5 | from typing import Sequence |
| 6 | from typing import Tuple |
| 7 | from fontTools import ttLib |
| 8 | import tempfile |
| 9 | import subprocess |
| 10 | import json |
| 11 | import argparse |
| 12 | import contextlib |
| 13 | import os |
| 14 | import re |
| 15 | import sys |
| 16 | |
| 17 | # list of specific files to be ignored. |
| 18 | IGNORE_FILE_NAME = [ |
| 19 | # Exclude myself |
| 20 | "generate_notice.py", |
| 21 | |
| 22 | # License files |
| 23 | "LICENSE", |
| 24 | "LICENSE_APACHE2.TXT", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 25 | "LICENSE_FSFAP.TXT", |
| 26 | "LICENSE_GPLv2.TXT", |
| 27 | "LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT", |
| 28 | "LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 29 | "LICENSE_HPND_SELL_VARIANT.TXT", |
| 30 | "LICENSE_ISC.TXT", |
| 31 | "LICENSE_MIT_MODERN_VARIANT.TXT", |
| 32 | "LICENSE_OFL.TXT", |
| 33 | "METADATA", |
| 34 | "MODULE_LICENSE_MIT", |
| 35 | "NOTICE", |
| 36 | |
| 37 | # dictionary which has Copyright word |
| 38 | "perf/texts/en-words.txt", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 39 | |
| 40 | # broken unreadable font file for fuzzing target |
| 41 | "test/fuzzing/fonts/sbix-extents.ttf", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 42 | ] |
| 43 | |
| 44 | IGNORE_DIR_IF_NO_COPYRIGHT = [ |
| 45 | "test", |
| 46 | "perf", |
| 47 | ] |
| 48 | |
| 49 | NO_COPYRIGHT_FILES = [ |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 50 | ".ci/build-win32.sh", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 51 | ".ci/build-win64.sh", |
| 52 | ".ci/deploy-docs.sh", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 53 | ".ci/publish_release_artifact.sh", |
Seigo Nonaka | 974d6cf | 2023-10-18 11:20:17 +0900 | [diff] [blame] | 54 | ".ci/requirements-fonttools.in", |
| 55 | ".ci/requirements-fonttools.txt", |
| 56 | ".ci/requirements.in", |
| 57 | ".ci/requirements.txt", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 58 | ".ci/win32-cross-file.txt", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 59 | ".ci/win64-cross-file.txt", |
| 60 | ".circleci/config.yml", |
| 61 | ".clang-format", |
| 62 | ".codecov.yml", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 63 | ".editorconfig", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 64 | ".github/dependabot.yml", |
Seigo Nonaka | c62d6f4 | 2023-03-01 19:52:57 +0900 | [diff] [blame] | 65 | ".github/workflows/arm-ci.yml", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 66 | ".github/workflows/cifuzz.yml", |
| 67 | ".github/workflows/configs-build.yml", |
| 68 | ".github/workflows/coverity-scan.yml", |
| 69 | ".github/workflows/linux-ci.yml", |
| 70 | ".github/workflows/macos-ci.yml", |
| 71 | ".github/workflows/msvc-ci.yml", |
| 72 | ".github/workflows/msys2-ci.yml", |
Seigo Nonaka | 974d6cf | 2023-10-18 11:20:17 +0900 | [diff] [blame] | 73 | ".github/workflows/scorecard.yml", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 74 | "AUTHORS", |
| 75 | "BUILD.md", |
| 76 | "CMakeLists.txt", |
| 77 | "CONFIG.md", |
| 78 | "Makefile.am", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 79 | "NEWS", |
| 80 | "OWNERS", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 81 | "README", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 82 | "README.android", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 83 | "README.md", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 84 | "README.mingw.md", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 85 | "README.python.md", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 86 | "RELEASING.md", |
Seigo Nonaka | e3320e0 | 2023-05-29 16:16:05 +0900 | [diff] [blame] | 87 | "SECURITY.md", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 88 | "TESTING.md", |
| 89 | "TEST_MAPPING", |
| 90 | "THANKS", |
| 91 | "autogen.sh", |
| 92 | "configure.ac", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 93 | "docs/HarfBuzz.png", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 94 | "docs/HarfBuzz.svg", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 95 | "docs/Makefile.am", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 96 | "docs/features.dot", |
| 97 | "docs/harfbuzz-docs.xml", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 98 | "docs/harfbuzz-overrides.txt", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 99 | "docs/harfbuzz-sections.txt", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 100 | "docs/meson.build", |
| 101 | "docs/repacker.md", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 102 | "docs/serializer.md", |
Seigo Nonaka | c62d6f4 | 2023-03-01 19:52:57 +0900 | [diff] [blame] | 103 | "docs/subset-preprocessing.md", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 104 | "docs/usermanual-buffers-language-script-and-direction.xml", |
| 105 | "docs/usermanual-clusters.xml", |
| 106 | "docs/usermanual-fonts-and-faces.xml", |
| 107 | "docs/usermanual-getting-started.xml", |
| 108 | "docs/usermanual-glyph-information.xml", |
| 109 | "docs/usermanual-install-harfbuzz.xml", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 110 | "docs/usermanual-integration.xml", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 111 | "docs/usermanual-object-model.xml", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 112 | "docs/usermanual-opentype-features.xml", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 113 | "docs/usermanual-shaping-concepts.xml", |
| 114 | "docs/usermanual-utilities.xml", |
| 115 | "docs/usermanual-what-is-harfbuzz.xml", |
| 116 | "docs/version.xml.in", |
Seigo Nonaka | 974d6cf | 2023-10-18 11:20:17 +0900 | [diff] [blame] | 117 | "docs/wasm-shaper.md", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 118 | "harfbuzz.doap", |
| 119 | "meson.build", |
| 120 | "meson_options.txt", |
| 121 | "mingw-configure.sh", |
| 122 | "replace-enum-strings.cmake", |
| 123 | "src/ArabicPUASimplified.txt", |
| 124 | "src/ArabicPUATraditional.txt", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 125 | "src/Makefile.am", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 126 | "src/Makefile.sources", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 127 | "src/OT/Layout/GPOS/Anchor.hh", |
| 128 | "src/OT/Layout/GPOS/AnchorFormat1.hh", |
| 129 | "src/OT/Layout/GPOS/AnchorFormat2.hh", |
| 130 | "src/OT/Layout/GPOS/AnchorFormat3.hh", |
| 131 | "src/OT/Layout/GPOS/AnchorMatrix.hh", |
| 132 | "src/OT/Layout/GPOS/ChainContextPos.hh", |
| 133 | "src/OT/Layout/GPOS/Common.hh", |
| 134 | "src/OT/Layout/GPOS/ContextPos.hh", |
| 135 | "src/OT/Layout/GPOS/CursivePos.hh", |
| 136 | "src/OT/Layout/GPOS/CursivePosFormat1.hh", |
| 137 | "src/OT/Layout/GPOS/ExtensionPos.hh", |
| 138 | "src/OT/Layout/GPOS/GPOS.hh", |
| 139 | "src/OT/Layout/GPOS/LigatureArray.hh", |
| 140 | "src/OT/Layout/GPOS/MarkArray.hh", |
| 141 | "src/OT/Layout/GPOS/MarkBasePos.hh", |
| 142 | "src/OT/Layout/GPOS/MarkBasePosFormat1.hh", |
| 143 | "src/OT/Layout/GPOS/MarkLigPos.hh", |
| 144 | "src/OT/Layout/GPOS/MarkLigPosFormat1.hh", |
| 145 | "src/OT/Layout/GPOS/MarkMarkPos.hh", |
| 146 | "src/OT/Layout/GPOS/MarkMarkPosFormat1.hh", |
| 147 | "src/OT/Layout/GPOS/MarkRecord.hh", |
| 148 | "src/OT/Layout/GPOS/PairPos.hh", |
| 149 | "src/OT/Layout/GPOS/PairPosFormat1.hh", |
| 150 | "src/OT/Layout/GPOS/PairPosFormat2.hh", |
| 151 | "src/OT/Layout/GPOS/PairSet.hh", |
| 152 | "src/OT/Layout/GPOS/PairValueRecord.hh", |
| 153 | "src/OT/Layout/GPOS/PosLookup.hh", |
| 154 | "src/OT/Layout/GPOS/PosLookupSubTable.hh", |
| 155 | "src/OT/Layout/GPOS/SinglePos.hh", |
| 156 | "src/OT/Layout/GPOS/SinglePosFormat1.hh", |
| 157 | "src/OT/Layout/GPOS/SinglePosFormat2.hh", |
| 158 | "src/OT/Layout/GPOS/ValueFormat.hh", |
| 159 | "src/OT/Layout/GSUB/AlternateSet.hh", |
| 160 | "src/OT/Layout/GSUB/AlternateSubst.hh", |
| 161 | "src/OT/Layout/GSUB/AlternateSubstFormat1.hh", |
| 162 | "src/OT/Layout/GSUB/ChainContextSubst.hh", |
| 163 | "src/OT/Layout/GSUB/Common.hh", |
| 164 | "src/OT/Layout/GSUB/ContextSubst.hh", |
| 165 | "src/OT/Layout/GSUB/ExtensionSubst.hh", |
| 166 | "src/OT/Layout/GSUB/GSUB.hh", |
| 167 | "src/OT/Layout/GSUB/Ligature.hh", |
| 168 | "src/OT/Layout/GSUB/LigatureSet.hh", |
| 169 | "src/OT/Layout/GSUB/LigatureSubst.hh", |
| 170 | "src/OT/Layout/GSUB/LigatureSubstFormat1.hh", |
| 171 | "src/OT/Layout/GSUB/MultipleSubst.hh", |
| 172 | "src/OT/Layout/GSUB/MultipleSubstFormat1.hh", |
| 173 | "src/OT/Layout/GSUB/ReverseChainSingleSubst.hh", |
| 174 | "src/OT/Layout/GSUB/ReverseChainSingleSubstFormat1.hh", |
| 175 | "src/OT/Layout/GSUB/Sequence.hh", |
| 176 | "src/OT/Layout/GSUB/SingleSubst.hh", |
| 177 | "src/OT/Layout/GSUB/SingleSubstFormat1.hh", |
| 178 | "src/OT/Layout/GSUB/SingleSubstFormat2.hh", |
| 179 | "src/OT/Layout/GSUB/SubstLookup.hh", |
| 180 | "src/OT/Layout/GSUB/SubstLookupSubTable.hh", |
| 181 | "src/OT/glyf/CompositeGlyph.hh", |
| 182 | "src/OT/glyf/Glyph.hh", |
| 183 | "src/OT/glyf/GlyphHeader.hh", |
| 184 | "src/OT/glyf/SimpleGlyph.hh", |
| 185 | "src/OT/glyf/SubsetGlyph.hh", |
Seigo Nonaka | c62d6f4 | 2023-03-01 19:52:57 +0900 | [diff] [blame] | 186 | "src/OT/glyf/VarCompositeGlyph.hh", |
| 187 | "src/OT/glyf/composite-iter.hh", |
| 188 | "src/OT/glyf/coord-setter.hh", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 189 | "src/OT/glyf/glyf-helpers.hh", |
| 190 | "src/OT/glyf/glyf.hh", |
| 191 | "src/OT/glyf/loca.hh", |
| 192 | "src/OT/glyf/path-builder.hh", |
Seigo Nonaka | 974d6cf | 2023-10-18 11:20:17 +0900 | [diff] [blame] | 193 | "src/addTable.py", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 194 | "src/check-c-linkage-decls.py", |
| 195 | "src/check-externs.py", |
| 196 | "src/check-header-guards.py", |
| 197 | "src/check-includes.py", |
| 198 | "src/check-libstdc++.py", |
| 199 | "src/check-static-inits.py", |
| 200 | "src/check-symbols.py", |
| 201 | "src/fix_get_types.py", |
| 202 | "src/gen-arabic-joining-list.py", |
| 203 | "src/gen-arabic-pua.py", |
| 204 | "src/gen-arabic-table.py", |
| 205 | "src/gen-def.py", |
| 206 | "src/gen-emoji-table.py", |
| 207 | "src/gen-harfbuzzcc.py", |
| 208 | "src/gen-hb-version.py", |
| 209 | "src/gen-indic-table.py", |
| 210 | "src/gen-os2-unicode-ranges.py", |
| 211 | "src/gen-ragel-artifacts.py", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 212 | "src/gen-tag-table.py", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 213 | "src/gen-ucd-table.py", |
| 214 | "src/gen-use-table.py", |
| 215 | "src/gen-vowel-constraints.py", |
Seigo Nonaka | e3320e0 | 2023-05-29 16:16:05 +0900 | [diff] [blame] | 216 | "src/harfbuzz-cairo.pc.in", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 217 | "src/harfbuzz-config.cmake.in", |
| 218 | "src/harfbuzz-gobject.pc.in", |
| 219 | "src/harfbuzz-icu.pc.in", |
| 220 | "src/harfbuzz-subset.cc", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 221 | "src/harfbuzz-subset.pc.in", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 222 | "src/harfbuzz.cc", |
| 223 | "src/harfbuzz.pc.in", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 224 | "src/hb-ot-shaper-arabic-joining-list.hh", |
| 225 | "src/hb-ot-shaper-arabic-pua.hh", |
| 226 | "src/hb-ot-shaper-arabic-table.hh", |
| 227 | "src/hb-ot-shaper-indic-table.cc", |
| 228 | "src/hb-ot-shaper-use-table.hh", |
| 229 | "src/hb-ot-shaper-vowel-constraints.cc", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 230 | "src/hb-ot-tag-table.hh", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 231 | "src/hb-ucd-table.hh", |
| 232 | "src/hb-unicode-emoji-table.hh", |
Seigo Nonaka | e3320e0 | 2023-05-29 16:16:05 +0900 | [diff] [blame] | 233 | "src/justify.py", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 234 | "src/meson.build", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 235 | "src/ms-use/IndicPositionalCategory-Additional.txt", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 236 | "src/ms-use/IndicShapingInvalidCluster.txt", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 237 | "src/ms-use/IndicSyllabicCategory-Additional.txt", |
Seigo Nonaka | 974d6cf | 2023-10-18 11:20:17 +0900 | [diff] [blame] | 238 | "src/relative_to.py", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 239 | "src/sample.py", |
| 240 | "src/test-use-table.cc", |
| 241 | "src/update-unicode-tables.make", |
Seigo Nonaka | 974d6cf | 2023-10-18 11:20:17 +0900 | [diff] [blame] | 242 | "src/wasm/graphite/Makefile", |
| 243 | "src/wasm/graphite/shape.cc", |
| 244 | "src/wasm/rust/harfbuzz-wasm/Cargo.toml", |
| 245 | "src/wasm/rust/harfbuzz-wasm/src/lib.rs", |
| 246 | "src/wasm/sample/c/Makefile", |
| 247 | "src/wasm/sample/c/shape-fallback.cc", |
| 248 | "src/wasm/sample/c/shape-ot.cc", |
| 249 | "src/wasm/sample/rust/hello-wasm/Cargo.toml", |
| 250 | "src/wasm/sample/rust/hello-wasm/src/lib.rs", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 251 | "subprojects/.gitignore", |
| 252 | "subprojects/cairo.wrap", |
| 253 | "subprojects/freetype2.wrap", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 254 | "subprojects/glib.wrap", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 255 | "subprojects/google-benchmark.wrap", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 256 | "subprojects/packagefiles/ragel/meson.build", |
Seigo Nonaka | 2b1d6e2 | 2022-12-20 10:36:29 +0900 | [diff] [blame] | 257 | "subprojects/ragel.wrap", |
| 258 | "util/Makefile.am", |
| 259 | "util/Makefile.sources", |
| 260 | "util/meson.build", |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 261 | ] |
| 262 | |
| 263 | class CommentType(Enum): |
| 264 | C_STYLE_BLOCK = 1 # /* ... */ |
| 265 | C_STYLE_BLOCK_AS_LINE = 2 # /* ... */ but uses multiple lines of block comments. |
| 266 | C_STYLE_LINE = 3 # // ... |
| 267 | SCRIPT_STYLE_HASH = 4 # # ... |
| 268 | OPENTYPE_NAME = 5 |
| 269 | OPENTYPE_COLLECTION_NAME = 6 |
| 270 | UNKNOWN = 10000 |
| 271 | |
| 272 | |
| 273 | # Helper function of showing error message and immediate exit. |
| 274 | def fatal(msg: str): |
| 275 | sys.stderr.write(str(msg)) |
| 276 | sys.stderr.write("\n") |
| 277 | sys.exit(1) |
| 278 | |
| 279 | |
| 280 | def warn(msg: str): |
| 281 | sys.stderr.write(str(msg)) |
| 282 | sys.stderr.write("\n") |
| 283 | |
| 284 | def debug(msg: str): |
| 285 | # sys.stderr.write(str(msg)) |
| 286 | # sys.stderr.write("\n") |
| 287 | pass |
| 288 | |
| 289 | |
| 290 | def cleanup_and_join(out_lines: Sequence[str]): |
| 291 | while not out_lines[-1].strip(): |
| 292 | out_lines.pop(-1) |
| 293 | |
| 294 | # If all lines starts from empty space, strip it out. |
| 295 | while all([len(x) == 0 or x[0] == ' ' for x in out_lines]): |
| 296 | out_lines = [x[1:] for x in out_lines] |
| 297 | |
| 298 | if not out_lines: |
| 299 | fatal("Failed to get copyright info") |
| 300 | return "\n".join(out_lines) |
| 301 | |
| 302 | |
| 303 | def get_comment_type(copyright_line: str, path_str: str) -> CommentType: |
| 304 | # vms_make.com contains multiple copyright header as a string constants. |
| 305 | if copyright_line.startswith("#"): |
| 306 | return CommentType.SCRIPT_STYLE_HASH |
| 307 | if copyright_line.startswith("//"): |
| 308 | return CommentType.C_STYLE_LINE |
| 309 | return CommentType.C_STYLE_BLOCK |
| 310 | |
| 311 | def extract_copyright_font(path_str: str) -> str: |
| 312 | path = Path(path_str) |
| 313 | if path.suffix in ['.ttf', '.otf', '.dfont']: |
| 314 | return extract_from_opentype_name(path, 0) |
| 315 | elif path.suffix in ['.ttc', '.otc']: |
| 316 | return extract_from_opentype_collection_name(path) |
| 317 | |
| 318 | |
| 319 | # Extract copyright notice and returns next index. |
| 320 | def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| 321 | commentType = get_comment_type(lines[i], path) |
| 322 | |
| 323 | if commentType == CommentType.C_STYLE_BLOCK: |
| 324 | return extract_from_c_style_block_at(lines, i, path) |
| 325 | elif commentType == CommentType.C_STYLE_LINE: |
| 326 | return extract_from_c_style_lines_at(lines, i, path) |
| 327 | elif commentType == CommentType.SCRIPT_STYLE_HASH: |
| 328 | return extract_from_script_hash_at(lines, i, path) |
| 329 | else: |
| 330 | fatal("Uknown comment style: %s" % lines[i]) |
| 331 | |
| 332 | def extract_from_opentype_collection_name(path: str) -> str: |
| 333 | |
| 334 | with open(path, mode="rb") as f: |
| 335 | head = f.read(12) |
| 336 | |
| 337 | if head[0:4].decode() != 'ttcf': |
| 338 | fatal('Invalid magic number for TTC file: %s' % path) |
| 339 | numFonts = int.from_bytes(head[8:12], byteorder="big") |
| 340 | |
| 341 | licenses = set() |
| 342 | for i in range(0, numFonts): |
| 343 | license = extract_from_opentype_name(path, i) |
| 344 | licenses.add(license) |
| 345 | |
| 346 | return '\n\n'.join(licenses) |
| 347 | |
| 348 | def extract_from_opentype_name(path: str, index: int) -> str: |
| 349 | |
| 350 | def get_preferred_name(nameID: int, ttf): |
| 351 | def get_score(platID: int, encID: int): |
| 352 | if platID == 3 and encID == 10: |
| 353 | return 0 |
| 354 | elif platID == 0 and encID == 6: |
| 355 | return 1 |
| 356 | elif platID == 0 and encID == 4: |
| 357 | return 2 |
| 358 | elif platID == 3 and encID == 1: |
| 359 | return 3 |
| 360 | elif platID == 0 and encID == 3: |
| 361 | return 4 |
| 362 | elif platID == 0 and encID == 2: |
| 363 | return 5 |
| 364 | elif platID == 0 and encID == 1: |
| 365 | return 6 |
| 366 | elif platID == 0 and encID == 0: |
| 367 | return 7 |
| 368 | else: |
| 369 | return 10000 |
| 370 | |
| 371 | best_score = 1000000 |
| 372 | best_name = None |
| 373 | |
| 374 | if 'name' not in ttf: |
| 375 | return None |
| 376 | |
| 377 | for name in ttf['name'].names: |
| 378 | if name.nameID != nameID: |
| 379 | continue |
| 380 | |
| 381 | score = get_score(name.platformID, name.platEncID) |
| 382 | if score < best_score: |
| 383 | best_score = score |
| 384 | best_name = name |
| 385 | |
| 386 | return best_name |
| 387 | |
| 388 | def get_notice_from_cff(ttf): |
| 389 | if 'CFF ' not in ttf: |
| 390 | return None |
| 391 | |
| 392 | # Looks like there is no way of getting Notice line in CFF table. |
| 393 | # Use the line that has "Copyright" in the string pool. |
| 394 | cff = ttf['CFF '].cff |
| 395 | for string in cff.strings: |
| 396 | if 'Copyright' in string: |
| 397 | return string |
| 398 | return None |
| 399 | |
| 400 | with contextlib.closing(ttLib.TTFont(path, 0, fontNumber=index)) as ttf: |
| 401 | copyright = get_preferred_name(0, ttf) |
| 402 | if not copyright: |
| 403 | copyright = get_notice_from_cff(ttf) |
| 404 | if not copyright: |
| 405 | return None |
| 406 | |
| 407 | license_description = get_preferred_name(13, ttf) |
| 408 | |
| 409 | if license_description: |
| 410 | copyright = str(copyright) + "\n\n" + str(license_description) |
| 411 | else: |
| 412 | copyright = str(copyright) |
| 413 | |
| 414 | license_url = get_preferred_name(14, ttf) |
| 415 | |
| 416 | if license_url: |
| 417 | copyright = str(copyright) + "\n\n" + str(license_url) |
| 418 | else: |
| 419 | copyright = str(copyright) |
| 420 | |
| 421 | return copyright |
| 422 | |
| 423 | def extract_from_c_style_lines_at( |
| 424 | lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| 425 | def is_copyright_end(line): |
| 426 | if line.startswith("//"): |
| 427 | return False |
| 428 | else: |
| 429 | return True |
| 430 | start = i |
| 431 | while i < len(lines): |
| 432 | if is_copyright_end(lines[i]): |
| 433 | break |
| 434 | i += 1 |
| 435 | end = i |
| 436 | |
| 437 | if start == end: |
| 438 | fatal("Failed to get copyright info") |
| 439 | |
| 440 | out_lines = [] |
| 441 | for line in lines[start:end]: |
| 442 | if line.startswith("//# "): # Andorid.bp uses //# style |
| 443 | out_lines.append(line[4:]) |
| 444 | elif line.startswith("//#"): # Andorid.bp uses //# style |
| 445 | out_lines.append(line[3:]) |
| 446 | elif line.startswith("// "): |
| 447 | out_lines.append(line[3:]) |
| 448 | elif line == "//": |
| 449 | out_lines.append(line[2:]) |
| 450 | else: |
| 451 | out_lines.append(line) |
| 452 | |
| 453 | return (cleanup_and_join(out_lines), i + 1) |
| 454 | |
| 455 | |
| 456 | def extract_from_script_hash_at( |
| 457 | lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| 458 | if lines[i].strip()[0] != "#": |
| 459 | return (None, i + 1) |
| 460 | def is_copyright_end(lines: str, i: int) -> bool: |
| 461 | if "#" not in lines[i]: |
| 462 | return True |
| 463 | # treat double spacing as end of license header |
| 464 | if lines[i] == "#" and lines[i+1] == "#": |
| 465 | return True |
| 466 | return False |
| 467 | |
| 468 | start = i |
| 469 | while i < len(lines): |
| 470 | if is_copyright_end(lines, i): |
| 471 | break |
| 472 | i += 1 |
| 473 | end = i |
| 474 | |
| 475 | if start == end: |
| 476 | fatal("Failed to get copyright info") |
| 477 | |
| 478 | out_lines = [] |
| 479 | for line in lines[start:end]: |
| 480 | if line.startswith("# "): |
| 481 | out_lines.append(line[2:]) |
| 482 | elif line == "#": |
| 483 | out_lines.append(line[1:]) |
| 484 | else: |
| 485 | out_lines.append(line) |
| 486 | |
| 487 | return (cleanup_and_join(out_lines), i + 1) |
| 488 | |
| 489 | |
| 490 | def extract_from_c_style_block_at( |
| 491 | lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: |
| 492 | |
| 493 | def is_copyright_end(lines: str, i: int) -> bool: |
| 494 | if "*/" in lines[i]: |
| 495 | return True |
| 496 | if lines[i] == " *" and lines[i + 1] == " *": |
| 497 | return True |
| 498 | if lines[i] == "" and lines[i + 1] == "": |
| 499 | return True |
| 500 | return False |
| 501 | |
| 502 | start = i |
| 503 | i += 1 # include at least one line |
| 504 | while i < len(lines): |
| 505 | if is_copyright_end(lines, i): |
| 506 | break |
| 507 | i += 1 |
| 508 | end = i + 1 |
| 509 | |
| 510 | out_lines = [] |
| 511 | for line in lines[start:end]: |
| 512 | clean_line = line |
| 513 | |
| 514 | # Strip begining "/*" chars |
| 515 | if clean_line.startswith("/* "): |
| 516 | clean_line = clean_line[3:] |
| 517 | if clean_line == "/*": |
| 518 | clean_line = clean_line[2:] |
| 519 | |
| 520 | # Strip ending "*/" chars |
| 521 | if clean_line.endswith(" */"): |
| 522 | clean_line = clean_line[:-3] |
| 523 | if clean_line.endswith("*/"): |
| 524 | clean_line = clean_line[:-2] |
| 525 | |
| 526 | # Strip starting " *" chars |
| 527 | if clean_line.startswith(" * "): |
| 528 | clean_line = clean_line[3:] |
| 529 | if clean_line == " *": |
| 530 | clean_line = clean_line[2:] |
| 531 | |
| 532 | # hb-aots-tester.cpp has underline separater which can be dropped. |
| 533 | if path.endswith("test/shape/data/aots/hb-aots-tester.cpp"): |
| 534 | clean_line = clean_line.replace("_", "") |
| 535 | |
| 536 | # Strip trailing spaces |
| 537 | clean_line = clean_line.rstrip() |
| 538 | |
| 539 | out_lines.append(clean_line) |
| 540 | |
| 541 | return (cleanup_and_join(out_lines), i + 1) |
| 542 | |
| 543 | |
| 544 | # Returns true if the line shows the start of copyright notice. |
| 545 | def is_copyright_line(line: str, path: str) -> bool: |
| 546 | if "Copyright" not in line: |
| 547 | return False |
| 548 | |
| 549 | # For avoiding unexpected mismatches, exclude quoted Copyright string. |
| 550 | if "`Copyright'" in line: |
| 551 | return False |
| 552 | if "\"Copyright\"" in line: |
| 553 | return False |
| 554 | |
| 555 | if "OpCode_Copyright" in line: |
| 556 | return False |
| 557 | |
| 558 | if path.endswith("src/hb-ot-name.h") and "HB_OT_NAME_ID_COPYRIGHT" in line: |
| 559 | return False |
| 560 | |
| 561 | return True |
| 562 | |
| 563 | def assert_mandatory_copyright(path_str: str): |
| 564 | path = Path(path_str) |
| 565 | toplevel_dir = str(path).split(os.sep)[0] |
| 566 | |
| 567 | if toplevel_dir in IGNORE_DIR_IF_NO_COPYRIGHT: |
| 568 | return |
| 569 | |
| 570 | fatal("%s does not contain Copyright line" % path) |
| 571 | |
| 572 | |
| 573 | # Extract the copyright notice and put it into copyrights arg. |
| 574 | def do_file(path: str, copyrights: set, no_copyright_files: set): |
| 575 | raw = Path(path).read_bytes() |
| 576 | basename = os.path.basename(path) |
| 577 | dirname = os.path.dirname(path) |
| 578 | |
| 579 | is_font = (dirname.endswith('./test/fuzzing/fonts') or |
| 580 | Path(path).suffix in ['.ttf', '.otf', '.dfont', '.ttc', '.otc']) |
| 581 | |
| 582 | if is_font: |
| 583 | notice = extract_copyright_font(path) |
| 584 | if not notice: |
| 585 | assert_mandatory_copyright(path) |
| 586 | return |
| 587 | |
| 588 | if not notice in copyrights: |
| 589 | copyrights[notice] = [] |
| 590 | copyrights[notice].append(path) |
| 591 | else: |
| 592 | try: |
| 593 | content = raw.decode("utf-8") |
| 594 | except UnicodeDecodeError: |
| 595 | content = raw.decode("iso-8859-1") |
| 596 | |
| 597 | if not "Copyright" in content: |
| 598 | if path in no_copyright_files: |
| 599 | no_copyright_files.remove(path) |
| 600 | else: |
| 601 | assert_mandatory_copyright(path) |
| 602 | return |
| 603 | |
| 604 | lines = content.splitlines() |
| 605 | |
| 606 | # The COPYING in the in-house dir has full OFL license with description. |
| 607 | # Use the OFL license description body. |
Seigo Nonaka | e3320e0 | 2023-05-29 16:16:05 +0900 | [diff] [blame] | 608 | if path.endswith("test/shape/data/in-house/COPYING") or path.endswith("test/COPYING"): |
Seigo Nonaka | 246cc29 | 2022-12-16 00:56:26 +0900 | [diff] [blame] | 609 | notice = cleanup_and_join(lines[9:]) |
| 610 | copyrights.setdefault(notice, []) |
| 611 | copyrights[notice].append(path) |
| 612 | return |
| 613 | |
| 614 | # The COPYING in the top dir has MIT-Modern-Variant license with description. |
| 615 | # Use the entire file as a license notice. |
| 616 | if path.endswith("COPYING") and str(Path(path)) == 'COPYING': |
| 617 | notice = cleanup_and_join(lines) |
| 618 | copyrights.setdefault(notice, []) |
| 619 | copyrights[notice].append(path) |
| 620 | return |
| 621 | |
| 622 | i = 0 |
| 623 | license_found = False |
| 624 | while i < len(lines): |
| 625 | if is_copyright_line(lines[i], path): |
| 626 | (notice, nexti) = extract_copyright_at(lines, i, path) |
| 627 | if notice: |
| 628 | copyrights.setdefault(notice, []) |
| 629 | copyrights[notice].append(path) |
| 630 | license_found = True |
| 631 | |
| 632 | i = nexti |
| 633 | else: |
| 634 | i += 1 |
| 635 | |
| 636 | if not license_found: |
| 637 | assert_mandatory_copyright(path) |
| 638 | |
| 639 | def do_check(path, format): |
| 640 | if not path.endswith('/'): # make sure the path ends with slash |
| 641 | path = path + '/' |
| 642 | |
| 643 | file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME]) |
| 644 | no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES]) |
| 645 | copyrights = {} |
| 646 | |
| 647 | for directory, sub_directories, filenames in os.walk(path): |
| 648 | # skip .git directory |
| 649 | if ".git" in sub_directories: |
| 650 | sub_directories.remove(".git") |
| 651 | |
| 652 | for fname in filenames: |
| 653 | fpath = os.path.join(directory, fname) |
| 654 | if fpath in file_to_ignore: |
| 655 | file_to_ignore.remove(fpath) |
| 656 | continue |
| 657 | |
| 658 | do_file(fpath, copyrights, no_copyright_files) |
| 659 | |
| 660 | if len(file_to_ignore) != 0: |
| 661 | fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n" |
| 662 | + "\n".join(file_to_ignore)) |
| 663 | |
| 664 | if len(no_copyright_files) != 0: |
| 665 | fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n" |
| 666 | + "\n".join(no_copyright_files)) |
| 667 | |
| 668 | if format == Format.notice: |
| 669 | print_notice(copyrights, False) |
| 670 | elif format == Format.notice_with_filename: |
| 671 | print_notice(copyrights, True) |
| 672 | elif format == Format.html: |
| 673 | print_html(copyrights) |
| 674 | elif format == Format.json: |
| 675 | print_json(copyrights) |
| 676 | |
| 677 | def print_html(copyrights): |
| 678 | print('<html>') |
| 679 | print(""" |
| 680 | <head> |
| 681 | <style> |
| 682 | table { |
| 683 | font-family: monospace |
| 684 | } |
| 685 | |
| 686 | table tr td { |
| 687 | padding: 10px 10px 10px 10px |
| 688 | } |
| 689 | </style> |
| 690 | </head> |
| 691 | """) |
| 692 | print('<body>') |
| 693 | print('<table border="1" style="border-collapse:collapse">') |
| 694 | for notice in sorted(copyrights.keys()): |
| 695 | files = sorted(copyrights[notice]) |
| 696 | |
| 697 | print('<tr>') |
| 698 | print('<td>') |
| 699 | print('<ul>') |
| 700 | for file in files: |
| 701 | print('<li>%s</li>' % file) |
| 702 | print('</ul>') |
| 703 | print('</td>') |
| 704 | |
| 705 | print('<td>') |
| 706 | print('<p>%s</p>' % notice.replace('\n', '<br>')) |
| 707 | print('</td>') |
| 708 | |
| 709 | print('</tr>') |
| 710 | |
| 711 | |
| 712 | print('</table>') |
| 713 | print('</body></html>') |
| 714 | |
| 715 | def print_notice(copyrights, print_file): |
| 716 | # print the copyright in sorted order for stable output. |
| 717 | for notice in sorted(copyrights.keys()): |
| 718 | if print_file: |
| 719 | files = sorted(copyrights[notice]) |
| 720 | print("\n".join(files)) |
| 721 | print() |
| 722 | print(notice) |
| 723 | print() |
| 724 | print("-" * 67) |
| 725 | print() |
| 726 | |
| 727 | def print_json(copyrights): |
| 728 | print(json.dumps(copyrights)) |
| 729 | |
| 730 | class Format(Enum): |
| 731 | notice = 'notice' |
| 732 | notice_with_filename = 'notice_with_filename' |
| 733 | html = 'html' |
| 734 | json = 'json' |
| 735 | |
| 736 | def __str__(self): |
| 737 | return self.value |
| 738 | |
| 739 | def main(): |
| 740 | parser = argparse.ArgumentParser(description="Collect notice headers.") |
| 741 | parser.add_argument("--format", dest="format", type=Format, choices=list(Format), |
| 742 | default=Format.notice, help="print filename before the license notice") |
| 743 | parser.add_argument("--target", dest="target", action='store', |
| 744 | required=True, help="target directory to collect notice headers") |
| 745 | res = parser.parse_args() |
| 746 | do_check(res.target, res.format) |
| 747 | |
| 748 | if __name__ == "__main__": |
| 749 | main() |
| 750 | |