blob: 5b00e402a39ebf6bfd600bcdef2578a34c7c9ad8 [file] [log] [blame]
Seigo Nonaka246cc292022-12-16 00:56:26 +09001#!/usr/bin/env python3
2
3from enum import Enum
4from pathlib import Path
5from typing import Sequence
6from typing import Tuple
7from fontTools import ttLib
8import tempfile
9import subprocess
10import json
11import argparse
12import contextlib
13import os
14import re
15import sys
16
17# list of specific files to be ignored.
18IGNORE_FILE_NAME = [
19 # Exclude myself
20 "generate_notice.py",
21
22 # License files
23 "LICENSE",
24 "LICENSE_APACHE2.TXT",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090025 "LICENSE_FSFAP.TXT",
26 "LICENSE_GPLv2.TXT",
27 "LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT",
28 "LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT",
Seigo Nonaka246cc292022-12-16 00:56:26 +090029 "LICENSE_HPND_SELL_VARIANT.TXT",
30 "LICENSE_ISC.TXT",
31 "LICENSE_MIT_MODERN_VARIANT.TXT",
32 "LICENSE_OFL.TXT",
33 "METADATA",
34 "MODULE_LICENSE_MIT",
35 "NOTICE",
36
37 # dictionary which has Copyright word
38 "perf/texts/en-words.txt",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090039
40 # broken unreadable font file for fuzzing target
41 "test/fuzzing/fonts/sbix-extents.ttf",
Seigo Nonaka246cc292022-12-16 00:56:26 +090042]
43
44IGNORE_DIR_IF_NO_COPYRIGHT = [
45 "test",
46 "perf",
47]
48
49NO_COPYRIGHT_FILES = [
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090050 ".ci/build-win32.sh",
Seigo Nonaka246cc292022-12-16 00:56:26 +090051 ".ci/build-win64.sh",
52 ".ci/deploy-docs.sh",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090053 ".ci/publish_release_artifact.sh",
Seigo Nonaka974d6cf2023-10-18 11:20:17 +090054 ".ci/requirements-fonttools.in",
55 ".ci/requirements-fonttools.txt",
56 ".ci/requirements.in",
57 ".ci/requirements.txt",
Seigo Nonaka246cc292022-12-16 00:56:26 +090058 ".ci/win32-cross-file.txt",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090059 ".ci/win64-cross-file.txt",
60 ".circleci/config.yml",
61 ".clang-format",
62 ".codecov.yml",
Seigo Nonaka246cc292022-12-16 00:56:26 +090063 ".editorconfig",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090064 ".github/dependabot.yml",
Seigo Nonakac62d6f42023-03-01 19:52:57 +090065 ".github/workflows/arm-ci.yml",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090066 ".github/workflows/cifuzz.yml",
67 ".github/workflows/configs-build.yml",
68 ".github/workflows/coverity-scan.yml",
69 ".github/workflows/linux-ci.yml",
70 ".github/workflows/macos-ci.yml",
71 ".github/workflows/msvc-ci.yml",
72 ".github/workflows/msys2-ci.yml",
Seigo Nonaka974d6cf2023-10-18 11:20:17 +090073 ".github/workflows/scorecard.yml",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090074 "AUTHORS",
75 "BUILD.md",
76 "CMakeLists.txt",
77 "CONFIG.md",
78 "Makefile.am",
Seigo Nonaka246cc292022-12-16 00:56:26 +090079 "NEWS",
80 "OWNERS",
Seigo Nonaka246cc292022-12-16 00:56:26 +090081 "README",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090082 "README.android",
Seigo Nonaka246cc292022-12-16 00:56:26 +090083 "README.md",
Seigo Nonaka246cc292022-12-16 00:56:26 +090084 "README.mingw.md",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090085 "README.python.md",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090086 "RELEASING.md",
Seigo Nonakae3320e02023-05-29 16:16:05 +090087 "SECURITY.md",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090088 "TESTING.md",
89 "TEST_MAPPING",
90 "THANKS",
91 "autogen.sh",
92 "configure.ac",
Seigo Nonaka246cc292022-12-16 00:56:26 +090093 "docs/HarfBuzz.png",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090094 "docs/HarfBuzz.svg",
Seigo Nonaka246cc292022-12-16 00:56:26 +090095 "docs/Makefile.am",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090096 "docs/features.dot",
97 "docs/harfbuzz-docs.xml",
Seigo Nonaka246cc292022-12-16 00:56:26 +090098 "docs/harfbuzz-overrides.txt",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +090099 "docs/harfbuzz-sections.txt",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900100 "docs/meson.build",
101 "docs/repacker.md",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900102 "docs/serializer.md",
Seigo Nonakac62d6f42023-03-01 19:52:57 +0900103 "docs/subset-preprocessing.md",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900104 "docs/usermanual-buffers-language-script-and-direction.xml",
105 "docs/usermanual-clusters.xml",
106 "docs/usermanual-fonts-and-faces.xml",
107 "docs/usermanual-getting-started.xml",
108 "docs/usermanual-glyph-information.xml",
109 "docs/usermanual-install-harfbuzz.xml",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900110 "docs/usermanual-integration.xml",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900111 "docs/usermanual-object-model.xml",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900112 "docs/usermanual-opentype-features.xml",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900113 "docs/usermanual-shaping-concepts.xml",
114 "docs/usermanual-utilities.xml",
115 "docs/usermanual-what-is-harfbuzz.xml",
116 "docs/version.xml.in",
Seigo Nonaka974d6cf2023-10-18 11:20:17 +0900117 "docs/wasm-shaper.md",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900118 "harfbuzz.doap",
119 "meson.build",
120 "meson_options.txt",
121 "mingw-configure.sh",
122 "replace-enum-strings.cmake",
123 "src/ArabicPUASimplified.txt",
124 "src/ArabicPUATraditional.txt",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900125 "src/Makefile.am",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900126 "src/Makefile.sources",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900127 "src/OT/Layout/GPOS/Anchor.hh",
128 "src/OT/Layout/GPOS/AnchorFormat1.hh",
129 "src/OT/Layout/GPOS/AnchorFormat2.hh",
130 "src/OT/Layout/GPOS/AnchorFormat3.hh",
131 "src/OT/Layout/GPOS/AnchorMatrix.hh",
132 "src/OT/Layout/GPOS/ChainContextPos.hh",
133 "src/OT/Layout/GPOS/Common.hh",
134 "src/OT/Layout/GPOS/ContextPos.hh",
135 "src/OT/Layout/GPOS/CursivePos.hh",
136 "src/OT/Layout/GPOS/CursivePosFormat1.hh",
137 "src/OT/Layout/GPOS/ExtensionPos.hh",
138 "src/OT/Layout/GPOS/GPOS.hh",
139 "src/OT/Layout/GPOS/LigatureArray.hh",
140 "src/OT/Layout/GPOS/MarkArray.hh",
141 "src/OT/Layout/GPOS/MarkBasePos.hh",
142 "src/OT/Layout/GPOS/MarkBasePosFormat1.hh",
143 "src/OT/Layout/GPOS/MarkLigPos.hh",
144 "src/OT/Layout/GPOS/MarkLigPosFormat1.hh",
145 "src/OT/Layout/GPOS/MarkMarkPos.hh",
146 "src/OT/Layout/GPOS/MarkMarkPosFormat1.hh",
147 "src/OT/Layout/GPOS/MarkRecord.hh",
148 "src/OT/Layout/GPOS/PairPos.hh",
149 "src/OT/Layout/GPOS/PairPosFormat1.hh",
150 "src/OT/Layout/GPOS/PairPosFormat2.hh",
151 "src/OT/Layout/GPOS/PairSet.hh",
152 "src/OT/Layout/GPOS/PairValueRecord.hh",
153 "src/OT/Layout/GPOS/PosLookup.hh",
154 "src/OT/Layout/GPOS/PosLookupSubTable.hh",
155 "src/OT/Layout/GPOS/SinglePos.hh",
156 "src/OT/Layout/GPOS/SinglePosFormat1.hh",
157 "src/OT/Layout/GPOS/SinglePosFormat2.hh",
158 "src/OT/Layout/GPOS/ValueFormat.hh",
159 "src/OT/Layout/GSUB/AlternateSet.hh",
160 "src/OT/Layout/GSUB/AlternateSubst.hh",
161 "src/OT/Layout/GSUB/AlternateSubstFormat1.hh",
162 "src/OT/Layout/GSUB/ChainContextSubst.hh",
163 "src/OT/Layout/GSUB/Common.hh",
164 "src/OT/Layout/GSUB/ContextSubst.hh",
165 "src/OT/Layout/GSUB/ExtensionSubst.hh",
166 "src/OT/Layout/GSUB/GSUB.hh",
167 "src/OT/Layout/GSUB/Ligature.hh",
168 "src/OT/Layout/GSUB/LigatureSet.hh",
169 "src/OT/Layout/GSUB/LigatureSubst.hh",
170 "src/OT/Layout/GSUB/LigatureSubstFormat1.hh",
171 "src/OT/Layout/GSUB/MultipleSubst.hh",
172 "src/OT/Layout/GSUB/MultipleSubstFormat1.hh",
173 "src/OT/Layout/GSUB/ReverseChainSingleSubst.hh",
174 "src/OT/Layout/GSUB/ReverseChainSingleSubstFormat1.hh",
175 "src/OT/Layout/GSUB/Sequence.hh",
176 "src/OT/Layout/GSUB/SingleSubst.hh",
177 "src/OT/Layout/GSUB/SingleSubstFormat1.hh",
178 "src/OT/Layout/GSUB/SingleSubstFormat2.hh",
179 "src/OT/Layout/GSUB/SubstLookup.hh",
180 "src/OT/Layout/GSUB/SubstLookupSubTable.hh",
181 "src/OT/glyf/CompositeGlyph.hh",
182 "src/OT/glyf/Glyph.hh",
183 "src/OT/glyf/GlyphHeader.hh",
184 "src/OT/glyf/SimpleGlyph.hh",
185 "src/OT/glyf/SubsetGlyph.hh",
Seigo Nonakac62d6f42023-03-01 19:52:57 +0900186 "src/OT/glyf/VarCompositeGlyph.hh",
187 "src/OT/glyf/composite-iter.hh",
188 "src/OT/glyf/coord-setter.hh",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900189 "src/OT/glyf/glyf-helpers.hh",
190 "src/OT/glyf/glyf.hh",
191 "src/OT/glyf/loca.hh",
192 "src/OT/glyf/path-builder.hh",
Seigo Nonaka974d6cf2023-10-18 11:20:17 +0900193 "src/addTable.py",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900194 "src/check-c-linkage-decls.py",
195 "src/check-externs.py",
196 "src/check-header-guards.py",
197 "src/check-includes.py",
198 "src/check-libstdc++.py",
199 "src/check-static-inits.py",
200 "src/check-symbols.py",
201 "src/fix_get_types.py",
202 "src/gen-arabic-joining-list.py",
203 "src/gen-arabic-pua.py",
204 "src/gen-arabic-table.py",
205 "src/gen-def.py",
206 "src/gen-emoji-table.py",
207 "src/gen-harfbuzzcc.py",
208 "src/gen-hb-version.py",
209 "src/gen-indic-table.py",
210 "src/gen-os2-unicode-ranges.py",
211 "src/gen-ragel-artifacts.py",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900212 "src/gen-tag-table.py",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900213 "src/gen-ucd-table.py",
214 "src/gen-use-table.py",
215 "src/gen-vowel-constraints.py",
Seigo Nonakae3320e02023-05-29 16:16:05 +0900216 "src/harfbuzz-cairo.pc.in",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900217 "src/harfbuzz-config.cmake.in",
218 "src/harfbuzz-gobject.pc.in",
219 "src/harfbuzz-icu.pc.in",
220 "src/harfbuzz-subset.cc",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900221 "src/harfbuzz-subset.pc.in",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900222 "src/harfbuzz.cc",
223 "src/harfbuzz.pc.in",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900224 "src/hb-ot-shaper-arabic-joining-list.hh",
225 "src/hb-ot-shaper-arabic-pua.hh",
226 "src/hb-ot-shaper-arabic-table.hh",
227 "src/hb-ot-shaper-indic-table.cc",
228 "src/hb-ot-shaper-use-table.hh",
229 "src/hb-ot-shaper-vowel-constraints.cc",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900230 "src/hb-ot-tag-table.hh",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900231 "src/hb-ucd-table.hh",
232 "src/hb-unicode-emoji-table.hh",
Seigo Nonakae3320e02023-05-29 16:16:05 +0900233 "src/justify.py",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900234 "src/meson.build",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900235 "src/ms-use/IndicPositionalCategory-Additional.txt",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900236 "src/ms-use/IndicShapingInvalidCluster.txt",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900237 "src/ms-use/IndicSyllabicCategory-Additional.txt",
Seigo Nonaka974d6cf2023-10-18 11:20:17 +0900238 "src/relative_to.py",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900239 "src/sample.py",
240 "src/test-use-table.cc",
241 "src/update-unicode-tables.make",
Seigo Nonaka974d6cf2023-10-18 11:20:17 +0900242 "src/wasm/graphite/Makefile",
243 "src/wasm/graphite/shape.cc",
244 "src/wasm/rust/harfbuzz-wasm/Cargo.toml",
245 "src/wasm/rust/harfbuzz-wasm/src/lib.rs",
246 "src/wasm/sample/c/Makefile",
247 "src/wasm/sample/c/shape-fallback.cc",
248 "src/wasm/sample/c/shape-ot.cc",
249 "src/wasm/sample/rust/hello-wasm/Cargo.toml",
250 "src/wasm/sample/rust/hello-wasm/src/lib.rs",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900251 "subprojects/.gitignore",
252 "subprojects/cairo.wrap",
253 "subprojects/freetype2.wrap",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900254 "subprojects/glib.wrap",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900255 "subprojects/google-benchmark.wrap",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900256 "subprojects/packagefiles/ragel/meson.build",
Seigo Nonaka2b1d6e22022-12-20 10:36:29 +0900257 "subprojects/ragel.wrap",
258 "util/Makefile.am",
259 "util/Makefile.sources",
260 "util/meson.build",
Seigo Nonaka246cc292022-12-16 00:56:26 +0900261]
262
263class CommentType(Enum):
264 C_STYLE_BLOCK = 1 # /* ... */
265 C_STYLE_BLOCK_AS_LINE = 2 # /* ... */ but uses multiple lines of block comments.
266 C_STYLE_LINE = 3 # // ...
267 SCRIPT_STYLE_HASH = 4 # # ...
268 OPENTYPE_NAME = 5
269 OPENTYPE_COLLECTION_NAME = 6
270 UNKNOWN = 10000
271
272
273# Helper function of showing error message and immediate exit.
274def fatal(msg: str):
275 sys.stderr.write(str(msg))
276 sys.stderr.write("\n")
277 sys.exit(1)
278
279
280def warn(msg: str):
281 sys.stderr.write(str(msg))
282 sys.stderr.write("\n")
283
284def debug(msg: str):
285 # sys.stderr.write(str(msg))
286 # sys.stderr.write("\n")
287 pass
288
289
290def cleanup_and_join(out_lines: Sequence[str]):
291 while not out_lines[-1].strip():
292 out_lines.pop(-1)
293
294 # If all lines starts from empty space, strip it out.
295 while all([len(x) == 0 or x[0] == ' ' for x in out_lines]):
296 out_lines = [x[1:] for x in out_lines]
297
298 if not out_lines:
299 fatal("Failed to get copyright info")
300 return "\n".join(out_lines)
301
302
303def get_comment_type(copyright_line: str, path_str: str) -> CommentType:
304 # vms_make.com contains multiple copyright header as a string constants.
305 if copyright_line.startswith("#"):
306 return CommentType.SCRIPT_STYLE_HASH
307 if copyright_line.startswith("//"):
308 return CommentType.C_STYLE_LINE
309 return CommentType.C_STYLE_BLOCK
310
311def extract_copyright_font(path_str: str) -> str:
312 path = Path(path_str)
313 if path.suffix in ['.ttf', '.otf', '.dfont']:
314 return extract_from_opentype_name(path, 0)
315 elif path.suffix in ['.ttc', '.otc']:
316 return extract_from_opentype_collection_name(path)
317
318
319# Extract copyright notice and returns next index.
320def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
321 commentType = get_comment_type(lines[i], path)
322
323 if commentType == CommentType.C_STYLE_BLOCK:
324 return extract_from_c_style_block_at(lines, i, path)
325 elif commentType == CommentType.C_STYLE_LINE:
326 return extract_from_c_style_lines_at(lines, i, path)
327 elif commentType == CommentType.SCRIPT_STYLE_HASH:
328 return extract_from_script_hash_at(lines, i, path)
329 else:
330 fatal("Uknown comment style: %s" % lines[i])
331
332def extract_from_opentype_collection_name(path: str) -> str:
333
334 with open(path, mode="rb") as f:
335 head = f.read(12)
336
337 if head[0:4].decode() != 'ttcf':
338 fatal('Invalid magic number for TTC file: %s' % path)
339 numFonts = int.from_bytes(head[8:12], byteorder="big")
340
341 licenses = set()
342 for i in range(0, numFonts):
343 license = extract_from_opentype_name(path, i)
344 licenses.add(license)
345
346 return '\n\n'.join(licenses)
347
348def extract_from_opentype_name(path: str, index: int) -> str:
349
350 def get_preferred_name(nameID: int, ttf):
351 def get_score(platID: int, encID: int):
352 if platID == 3 and encID == 10:
353 return 0
354 elif platID == 0 and encID == 6:
355 return 1
356 elif platID == 0 and encID == 4:
357 return 2
358 elif platID == 3 and encID == 1:
359 return 3
360 elif platID == 0 and encID == 3:
361 return 4
362 elif platID == 0 and encID == 2:
363 return 5
364 elif platID == 0 and encID == 1:
365 return 6
366 elif platID == 0 and encID == 0:
367 return 7
368 else:
369 return 10000
370
371 best_score = 1000000
372 best_name = None
373
374 if 'name' not in ttf:
375 return None
376
377 for name in ttf['name'].names:
378 if name.nameID != nameID:
379 continue
380
381 score = get_score(name.platformID, name.platEncID)
382 if score < best_score:
383 best_score = score
384 best_name = name
385
386 return best_name
387
388 def get_notice_from_cff(ttf):
389 if 'CFF ' not in ttf:
390 return None
391
392 # Looks like there is no way of getting Notice line in CFF table.
393 # Use the line that has "Copyright" in the string pool.
394 cff = ttf['CFF '].cff
395 for string in cff.strings:
396 if 'Copyright' in string:
397 return string
398 return None
399
400 with contextlib.closing(ttLib.TTFont(path, 0, fontNumber=index)) as ttf:
401 copyright = get_preferred_name(0, ttf)
402 if not copyright:
403 copyright = get_notice_from_cff(ttf)
404 if not copyright:
405 return None
406
407 license_description = get_preferred_name(13, ttf)
408
409 if license_description:
410 copyright = str(copyright) + "\n\n" + str(license_description)
411 else:
412 copyright = str(copyright)
413
414 license_url = get_preferred_name(14, ttf)
415
416 if license_url:
417 copyright = str(copyright) + "\n\n" + str(license_url)
418 else:
419 copyright = str(copyright)
420
421 return copyright
422
423def extract_from_c_style_lines_at(
424 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
425 def is_copyright_end(line):
426 if line.startswith("//"):
427 return False
428 else:
429 return True
430 start = i
431 while i < len(lines):
432 if is_copyright_end(lines[i]):
433 break
434 i += 1
435 end = i
436
437 if start == end:
438 fatal("Failed to get copyright info")
439
440 out_lines = []
441 for line in lines[start:end]:
442 if line.startswith("//# "): # Andorid.bp uses //# style
443 out_lines.append(line[4:])
444 elif line.startswith("//#"): # Andorid.bp uses //# style
445 out_lines.append(line[3:])
446 elif line.startswith("// "):
447 out_lines.append(line[3:])
448 elif line == "//":
449 out_lines.append(line[2:])
450 else:
451 out_lines.append(line)
452
453 return (cleanup_and_join(out_lines), i + 1)
454
455
456def extract_from_script_hash_at(
457 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
458 if lines[i].strip()[0] != "#":
459 return (None, i + 1)
460 def is_copyright_end(lines: str, i: int) -> bool:
461 if "#" not in lines[i]:
462 return True
463 # treat double spacing as end of license header
464 if lines[i] == "#" and lines[i+1] == "#":
465 return True
466 return False
467
468 start = i
469 while i < len(lines):
470 if is_copyright_end(lines, i):
471 break
472 i += 1
473 end = i
474
475 if start == end:
476 fatal("Failed to get copyright info")
477
478 out_lines = []
479 for line in lines[start:end]:
480 if line.startswith("# "):
481 out_lines.append(line[2:])
482 elif line == "#":
483 out_lines.append(line[1:])
484 else:
485 out_lines.append(line)
486
487 return (cleanup_and_join(out_lines), i + 1)
488
489
490def extract_from_c_style_block_at(
491 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
492
493 def is_copyright_end(lines: str, i: int) -> bool:
494 if "*/" in lines[i]:
495 return True
496 if lines[i] == " *" and lines[i + 1] == " *":
497 return True
498 if lines[i] == "" and lines[i + 1] == "":
499 return True
500 return False
501
502 start = i
503 i += 1 # include at least one line
504 while i < len(lines):
505 if is_copyright_end(lines, i):
506 break
507 i += 1
508 end = i + 1
509
510 out_lines = []
511 for line in lines[start:end]:
512 clean_line = line
513
514 # Strip begining "/*" chars
515 if clean_line.startswith("/* "):
516 clean_line = clean_line[3:]
517 if clean_line == "/*":
518 clean_line = clean_line[2:]
519
520 # Strip ending "*/" chars
521 if clean_line.endswith(" */"):
522 clean_line = clean_line[:-3]
523 if clean_line.endswith("*/"):
524 clean_line = clean_line[:-2]
525
526 # Strip starting " *" chars
527 if clean_line.startswith(" * "):
528 clean_line = clean_line[3:]
529 if clean_line == " *":
530 clean_line = clean_line[2:]
531
532 # hb-aots-tester.cpp has underline separater which can be dropped.
533 if path.endswith("test/shape/data/aots/hb-aots-tester.cpp"):
534 clean_line = clean_line.replace("_", "")
535
536 # Strip trailing spaces
537 clean_line = clean_line.rstrip()
538
539 out_lines.append(clean_line)
540
541 return (cleanup_and_join(out_lines), i + 1)
542
543
544# Returns true if the line shows the start of copyright notice.
545def is_copyright_line(line: str, path: str) -> bool:
546 if "Copyright" not in line:
547 return False
548
549 # For avoiding unexpected mismatches, exclude quoted Copyright string.
550 if "`Copyright'" in line:
551 return False
552 if "\"Copyright\"" in line:
553 return False
554
555 if "OpCode_Copyright" in line:
556 return False
557
558 if path.endswith("src/hb-ot-name.h") and "HB_OT_NAME_ID_COPYRIGHT" in line:
559 return False
560
561 return True
562
563def assert_mandatory_copyright(path_str: str):
564 path = Path(path_str)
565 toplevel_dir = str(path).split(os.sep)[0]
566
567 if toplevel_dir in IGNORE_DIR_IF_NO_COPYRIGHT:
568 return
569
570 fatal("%s does not contain Copyright line" % path)
571
572
573# Extract the copyright notice and put it into copyrights arg.
574def do_file(path: str, copyrights: set, no_copyright_files: set):
575 raw = Path(path).read_bytes()
576 basename = os.path.basename(path)
577 dirname = os.path.dirname(path)
578
579 is_font = (dirname.endswith('./test/fuzzing/fonts') or
580 Path(path).suffix in ['.ttf', '.otf', '.dfont', '.ttc', '.otc'])
581
582 if is_font:
583 notice = extract_copyright_font(path)
584 if not notice:
585 assert_mandatory_copyright(path)
586 return
587
588 if not notice in copyrights:
589 copyrights[notice] = []
590 copyrights[notice].append(path)
591 else:
592 try:
593 content = raw.decode("utf-8")
594 except UnicodeDecodeError:
595 content = raw.decode("iso-8859-1")
596
597 if not "Copyright" in content:
598 if path in no_copyright_files:
599 no_copyright_files.remove(path)
600 else:
601 assert_mandatory_copyright(path)
602 return
603
604 lines = content.splitlines()
605
606 # The COPYING in the in-house dir has full OFL license with description.
607 # Use the OFL license description body.
Seigo Nonakae3320e02023-05-29 16:16:05 +0900608 if path.endswith("test/shape/data/in-house/COPYING") or path.endswith("test/COPYING"):
Seigo Nonaka246cc292022-12-16 00:56:26 +0900609 notice = cleanup_and_join(lines[9:])
610 copyrights.setdefault(notice, [])
611 copyrights[notice].append(path)
612 return
613
614 # The COPYING in the top dir has MIT-Modern-Variant license with description.
615 # Use the entire file as a license notice.
616 if path.endswith("COPYING") and str(Path(path)) == 'COPYING':
617 notice = cleanup_and_join(lines)
618 copyrights.setdefault(notice, [])
619 copyrights[notice].append(path)
620 return
621
622 i = 0
623 license_found = False
624 while i < len(lines):
625 if is_copyright_line(lines[i], path):
626 (notice, nexti) = extract_copyright_at(lines, i, path)
627 if notice:
628 copyrights.setdefault(notice, [])
629 copyrights[notice].append(path)
630 license_found = True
631
632 i = nexti
633 else:
634 i += 1
635
636 if not license_found:
637 assert_mandatory_copyright(path)
638
639def do_check(path, format):
640 if not path.endswith('/'): # make sure the path ends with slash
641 path = path + '/'
642
643 file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME])
644 no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES])
645 copyrights = {}
646
647 for directory, sub_directories, filenames in os.walk(path):
648 # skip .git directory
649 if ".git" in sub_directories:
650 sub_directories.remove(".git")
651
652 for fname in filenames:
653 fpath = os.path.join(directory, fname)
654 if fpath in file_to_ignore:
655 file_to_ignore.remove(fpath)
656 continue
657
658 do_file(fpath, copyrights, no_copyright_files)
659
660 if len(file_to_ignore) != 0:
661 fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n"
662 + "\n".join(file_to_ignore))
663
664 if len(no_copyright_files) != 0:
665 fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n"
666 + "\n".join(no_copyright_files))
667
668 if format == Format.notice:
669 print_notice(copyrights, False)
670 elif format == Format.notice_with_filename:
671 print_notice(copyrights, True)
672 elif format == Format.html:
673 print_html(copyrights)
674 elif format == Format.json:
675 print_json(copyrights)
676
677def print_html(copyrights):
678 print('<html>')
679 print("""
680 <head>
681 <style>
682 table {
683 font-family: monospace
684 }
685
686 table tr td {
687 padding: 10px 10px 10px 10px
688 }
689 </style>
690 </head>
691 """)
692 print('<body>')
693 print('<table border="1" style="border-collapse:collapse">')
694 for notice in sorted(copyrights.keys()):
695 files = sorted(copyrights[notice])
696
697 print('<tr>')
698 print('<td>')
699 print('<ul>')
700 for file in files:
701 print('<li>%s</li>' % file)
702 print('</ul>')
703 print('</td>')
704
705 print('<td>')
706 print('<p>%s</p>' % notice.replace('\n', '<br>'))
707 print('</td>')
708
709 print('</tr>')
710
711
712 print('</table>')
713 print('</body></html>')
714
715def print_notice(copyrights, print_file):
716 # print the copyright in sorted order for stable output.
717 for notice in sorted(copyrights.keys()):
718 if print_file:
719 files = sorted(copyrights[notice])
720 print("\n".join(files))
721 print()
722 print(notice)
723 print()
724 print("-" * 67)
725 print()
726
727def print_json(copyrights):
728 print(json.dumps(copyrights))
729
730class Format(Enum):
731 notice = 'notice'
732 notice_with_filename = 'notice_with_filename'
733 html = 'html'
734 json = 'json'
735
736 def __str__(self):
737 return self.value
738
739def main():
740 parser = argparse.ArgumentParser(description="Collect notice headers.")
741 parser.add_argument("--format", dest="format", type=Format, choices=list(Format),
742 default=Format.notice, help="print filename before the license notice")
743 parser.add_argument("--target", dest="target", action='store',
744 required=True, help="target directory to collect notice headers")
745 res = parser.parse_args()
746 do_check(res.target, res.format)
747
748if __name__ == "__main__":
749 main()
750