Blame - generate_notice.py - platform/external/harfbuzz_ng

blob: 5b00e402a39ebf6bfd600bcdef2578a34c7c9ad8 [file] [log] [blame]

Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	1	#!/usr/bin/env python3
				2
				3	from enum import Enum
				4	from pathlib import Path
				5	from typing import Sequence
				6	from typing import Tuple
				7	from fontTools import ttLib
				8	import tempfile
				9	import subprocess
				10	import json
				11	import argparse
				12	import contextlib
				13	import os
				14	import re
				15	import sys
				16
				17	# list of specific files to be ignored.
				18	IGNORE_FILE_NAME = [
				19	# Exclude myself
				20	"generate_notice.py",
				21
				22	# License files
				23	"LICENSE",
				24	"LICENSE_APACHE2.TXT",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	25	"LICENSE_FSFAP.TXT",
				26	"LICENSE_GPLv2.TXT",
				27	"LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT",
				28	"LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	29	"LICENSE_HPND_SELL_VARIANT.TXT",
				30	"LICENSE_ISC.TXT",
				31	"LICENSE_MIT_MODERN_VARIANT.TXT",
				32	"LICENSE_OFL.TXT",
				33	"METADATA",
				34	"MODULE_LICENSE_MIT",
				35	"NOTICE",
				36
				37	# dictionary which has Copyright word
				38	"perf/texts/en-words.txt",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	39
				40	# broken unreadable font file for fuzzing target
				41	"test/fuzzing/fonts/sbix-extents.ttf",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	42	]
				43
				44	IGNORE_DIR_IF_NO_COPYRIGHT = [
				45	"test",
				46	"perf",
				47	]
				48
				49	NO_COPYRIGHT_FILES = [
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	50	".ci/build-win32.sh",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	51	".ci/build-win64.sh",
				52	".ci/deploy-docs.sh",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	53	".ci/publish_release_artifact.sh",
Seigo Nonaka	974d6cf	2023-10-18 11:20:17 +0900	[diff] [blame]	54	".ci/requirements-fonttools.in",
				55	".ci/requirements-fonttools.txt",
				56	".ci/requirements.in",
				57	".ci/requirements.txt",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	58	".ci/win32-cross-file.txt",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	59	".ci/win64-cross-file.txt",
				60	".circleci/config.yml",
				61	".clang-format",
				62	".codecov.yml",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	63	".editorconfig",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	64	".github/dependabot.yml",
Seigo Nonaka	c62d6f4	2023-03-01 19:52:57 +0900	[diff] [blame]	65	".github/workflows/arm-ci.yml",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	66	".github/workflows/cifuzz.yml",
				67	".github/workflows/configs-build.yml",
				68	".github/workflows/coverity-scan.yml",
				69	".github/workflows/linux-ci.yml",
				70	".github/workflows/macos-ci.yml",
				71	".github/workflows/msvc-ci.yml",
				72	".github/workflows/msys2-ci.yml",
Seigo Nonaka	974d6cf	2023-10-18 11:20:17 +0900	[diff] [blame]	73	".github/workflows/scorecard.yml",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	74	"AUTHORS",
				75	"BUILD.md",
				76	"CMakeLists.txt",
				77	"CONFIG.md",
				78	"Makefile.am",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	79	"NEWS",
				80	"OWNERS",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	81	"README",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	82	"README.android",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	83	"README.md",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	84	"README.mingw.md",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	85	"README.python.md",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	86	"RELEASING.md",
Seigo Nonaka	e3320e0	2023-05-29 16:16:05 +0900	[diff] [blame]	87	"SECURITY.md",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	88	"TESTING.md",
				89	"TEST_MAPPING",
				90	"THANKS",
				91	"autogen.sh",
				92	"configure.ac",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	93	"docs/HarfBuzz.png",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	94	"docs/HarfBuzz.svg",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	95	"docs/Makefile.am",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	96	"docs/features.dot",
				97	"docs/harfbuzz-docs.xml",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	98	"docs/harfbuzz-overrides.txt",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	99	"docs/harfbuzz-sections.txt",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	100	"docs/meson.build",
				101	"docs/repacker.md",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	102	"docs/serializer.md",
Seigo Nonaka	c62d6f4	2023-03-01 19:52:57 +0900	[diff] [blame]	103	"docs/subset-preprocessing.md",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	104	"docs/usermanual-buffers-language-script-and-direction.xml",
				105	"docs/usermanual-clusters.xml",
				106	"docs/usermanual-fonts-and-faces.xml",
				107	"docs/usermanual-getting-started.xml",
				108	"docs/usermanual-glyph-information.xml",
				109	"docs/usermanual-install-harfbuzz.xml",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	110	"docs/usermanual-integration.xml",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	111	"docs/usermanual-object-model.xml",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	112	"docs/usermanual-opentype-features.xml",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	113	"docs/usermanual-shaping-concepts.xml",
				114	"docs/usermanual-utilities.xml",
				115	"docs/usermanual-what-is-harfbuzz.xml",
				116	"docs/version.xml.in",
Seigo Nonaka	974d6cf	2023-10-18 11:20:17 +0900	[diff] [blame]	117	"docs/wasm-shaper.md",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	118	"harfbuzz.doap",
				119	"meson.build",
				120	"meson_options.txt",
				121	"mingw-configure.sh",
				122	"replace-enum-strings.cmake",
				123	"src/ArabicPUASimplified.txt",
				124	"src/ArabicPUATraditional.txt",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	125	"src/Makefile.am",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	126	"src/Makefile.sources",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	127	"src/OT/Layout/GPOS/Anchor.hh",
				128	"src/OT/Layout/GPOS/AnchorFormat1.hh",
				129	"src/OT/Layout/GPOS/AnchorFormat2.hh",
				130	"src/OT/Layout/GPOS/AnchorFormat3.hh",
				131	"src/OT/Layout/GPOS/AnchorMatrix.hh",
				132	"src/OT/Layout/GPOS/ChainContextPos.hh",
				133	"src/OT/Layout/GPOS/Common.hh",
				134	"src/OT/Layout/GPOS/ContextPos.hh",
				135	"src/OT/Layout/GPOS/CursivePos.hh",
				136	"src/OT/Layout/GPOS/CursivePosFormat1.hh",
				137	"src/OT/Layout/GPOS/ExtensionPos.hh",
				138	"src/OT/Layout/GPOS/GPOS.hh",
				139	"src/OT/Layout/GPOS/LigatureArray.hh",
				140	"src/OT/Layout/GPOS/MarkArray.hh",
				141	"src/OT/Layout/GPOS/MarkBasePos.hh",
				142	"src/OT/Layout/GPOS/MarkBasePosFormat1.hh",
				143	"src/OT/Layout/GPOS/MarkLigPos.hh",
				144	"src/OT/Layout/GPOS/MarkLigPosFormat1.hh",
				145	"src/OT/Layout/GPOS/MarkMarkPos.hh",
				146	"src/OT/Layout/GPOS/MarkMarkPosFormat1.hh",
				147	"src/OT/Layout/GPOS/MarkRecord.hh",
				148	"src/OT/Layout/GPOS/PairPos.hh",
				149	"src/OT/Layout/GPOS/PairPosFormat1.hh",
				150	"src/OT/Layout/GPOS/PairPosFormat2.hh",
				151	"src/OT/Layout/GPOS/PairSet.hh",
				152	"src/OT/Layout/GPOS/PairValueRecord.hh",
				153	"src/OT/Layout/GPOS/PosLookup.hh",
				154	"src/OT/Layout/GPOS/PosLookupSubTable.hh",
				155	"src/OT/Layout/GPOS/SinglePos.hh",
				156	"src/OT/Layout/GPOS/SinglePosFormat1.hh",
				157	"src/OT/Layout/GPOS/SinglePosFormat2.hh",
				158	"src/OT/Layout/GPOS/ValueFormat.hh",
				159	"src/OT/Layout/GSUB/AlternateSet.hh",
				160	"src/OT/Layout/GSUB/AlternateSubst.hh",
				161	"src/OT/Layout/GSUB/AlternateSubstFormat1.hh",
				162	"src/OT/Layout/GSUB/ChainContextSubst.hh",
				163	"src/OT/Layout/GSUB/Common.hh",
				164	"src/OT/Layout/GSUB/ContextSubst.hh",
				165	"src/OT/Layout/GSUB/ExtensionSubst.hh",
				166	"src/OT/Layout/GSUB/GSUB.hh",
				167	"src/OT/Layout/GSUB/Ligature.hh",
				168	"src/OT/Layout/GSUB/LigatureSet.hh",
				169	"src/OT/Layout/GSUB/LigatureSubst.hh",
				170	"src/OT/Layout/GSUB/LigatureSubstFormat1.hh",
				171	"src/OT/Layout/GSUB/MultipleSubst.hh",
				172	"src/OT/Layout/GSUB/MultipleSubstFormat1.hh",
				173	"src/OT/Layout/GSUB/ReverseChainSingleSubst.hh",
				174	"src/OT/Layout/GSUB/ReverseChainSingleSubstFormat1.hh",
				175	"src/OT/Layout/GSUB/Sequence.hh",
				176	"src/OT/Layout/GSUB/SingleSubst.hh",
				177	"src/OT/Layout/GSUB/SingleSubstFormat1.hh",
				178	"src/OT/Layout/GSUB/SingleSubstFormat2.hh",
				179	"src/OT/Layout/GSUB/SubstLookup.hh",
				180	"src/OT/Layout/GSUB/SubstLookupSubTable.hh",
				181	"src/OT/glyf/CompositeGlyph.hh",
				182	"src/OT/glyf/Glyph.hh",
				183	"src/OT/glyf/GlyphHeader.hh",
				184	"src/OT/glyf/SimpleGlyph.hh",
				185	"src/OT/glyf/SubsetGlyph.hh",
Seigo Nonaka	c62d6f4	2023-03-01 19:52:57 +0900	[diff] [blame]	186	"src/OT/glyf/VarCompositeGlyph.hh",
				187	"src/OT/glyf/composite-iter.hh",
				188	"src/OT/glyf/coord-setter.hh",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	189	"src/OT/glyf/glyf-helpers.hh",
				190	"src/OT/glyf/glyf.hh",
				191	"src/OT/glyf/loca.hh",
				192	"src/OT/glyf/path-builder.hh",
Seigo Nonaka	974d6cf	2023-10-18 11:20:17 +0900	[diff] [blame]	193	"src/addTable.py",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	194	"src/check-c-linkage-decls.py",
				195	"src/check-externs.py",
				196	"src/check-header-guards.py",
				197	"src/check-includes.py",
				198	"src/check-libstdc++.py",
				199	"src/check-static-inits.py",
				200	"src/check-symbols.py",
				201	"src/fix_get_types.py",
				202	"src/gen-arabic-joining-list.py",
				203	"src/gen-arabic-pua.py",
				204	"src/gen-arabic-table.py",
				205	"src/gen-def.py",
				206	"src/gen-emoji-table.py",
				207	"src/gen-harfbuzzcc.py",
				208	"src/gen-hb-version.py",
				209	"src/gen-indic-table.py",
				210	"src/gen-os2-unicode-ranges.py",
				211	"src/gen-ragel-artifacts.py",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	212	"src/gen-tag-table.py",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	213	"src/gen-ucd-table.py",
				214	"src/gen-use-table.py",
				215	"src/gen-vowel-constraints.py",
Seigo Nonaka	e3320e0	2023-05-29 16:16:05 +0900	[diff] [blame]	216	"src/harfbuzz-cairo.pc.in",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	217	"src/harfbuzz-config.cmake.in",
				218	"src/harfbuzz-gobject.pc.in",
				219	"src/harfbuzz-icu.pc.in",
				220	"src/harfbuzz-subset.cc",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	221	"src/harfbuzz-subset.pc.in",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	222	"src/harfbuzz.cc",
				223	"src/harfbuzz.pc.in",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	224	"src/hb-ot-shaper-arabic-joining-list.hh",
				225	"src/hb-ot-shaper-arabic-pua.hh",
				226	"src/hb-ot-shaper-arabic-table.hh",
				227	"src/hb-ot-shaper-indic-table.cc",
				228	"src/hb-ot-shaper-use-table.hh",
				229	"src/hb-ot-shaper-vowel-constraints.cc",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	230	"src/hb-ot-tag-table.hh",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	231	"src/hb-ucd-table.hh",
				232	"src/hb-unicode-emoji-table.hh",
Seigo Nonaka	e3320e0	2023-05-29 16:16:05 +0900	[diff] [blame]	233	"src/justify.py",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	234	"src/meson.build",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	235	"src/ms-use/IndicPositionalCategory-Additional.txt",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	236	"src/ms-use/IndicShapingInvalidCluster.txt",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	237	"src/ms-use/IndicSyllabicCategory-Additional.txt",
Seigo Nonaka	974d6cf	2023-10-18 11:20:17 +0900	[diff] [blame]	238	"src/relative_to.py",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	239	"src/sample.py",
				240	"src/test-use-table.cc",
				241	"src/update-unicode-tables.make",
Seigo Nonaka	974d6cf	2023-10-18 11:20:17 +0900	[diff] [blame]	242	"src/wasm/graphite/Makefile",
				243	"src/wasm/graphite/shape.cc",
				244	"src/wasm/rust/harfbuzz-wasm/Cargo.toml",
				245	"src/wasm/rust/harfbuzz-wasm/src/lib.rs",
				246	"src/wasm/sample/c/Makefile",
				247	"src/wasm/sample/c/shape-fallback.cc",
				248	"src/wasm/sample/c/shape-ot.cc",
				249	"src/wasm/sample/rust/hello-wasm/Cargo.toml",
				250	"src/wasm/sample/rust/hello-wasm/src/lib.rs",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	251	"subprojects/.gitignore",
				252	"subprojects/cairo.wrap",
				253	"subprojects/freetype2.wrap",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	254	"subprojects/glib.wrap",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	255	"subprojects/google-benchmark.wrap",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	256	"subprojects/packagefiles/ragel/meson.build",
Seigo Nonaka	2b1d6e2	2022-12-20 10:36:29 +0900	[diff] [blame]	257	"subprojects/ragel.wrap",
				258	"util/Makefile.am",
				259	"util/Makefile.sources",
				260	"util/meson.build",
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	261	]
				262
				263	class CommentType(Enum):
				264	C_STYLE_BLOCK = 1 # /* ... */
				265	C_STYLE_BLOCK_AS_LINE = 2 # /* ... */ but uses multiple lines of block comments.
				266	C_STYLE_LINE = 3 # // ...
				267	SCRIPT_STYLE_HASH = 4 # # ...
				268	OPENTYPE_NAME = 5
				269	OPENTYPE_COLLECTION_NAME = 6
				270	UNKNOWN = 10000
				271
				272
				273	# Helper function of showing error message and immediate exit.
				274	def fatal(msg: str):
				275	sys.stderr.write(str(msg))
				276	sys.stderr.write("\n")
				277	sys.exit(1)
				278
				279
				280	def warn(msg: str):
				281	sys.stderr.write(str(msg))
				282	sys.stderr.write("\n")
				283
				284	def debug(msg: str):
				285	# sys.stderr.write(str(msg))
				286	# sys.stderr.write("\n")
				287	pass
				288
				289
				290	def cleanup_and_join(out_lines: Sequence[str]):
				291	while not out_lines[-1].strip():
				292	out_lines.pop(-1)
				293
				294	# If all lines starts from empty space, strip it out.
				295	while all([len(x) == 0 or x[0] == ' ' for x in out_lines]):
				296	out_lines = [x[1:] for x in out_lines]
				297
				298	if not out_lines:
				299	fatal("Failed to get copyright info")
				300	return "\n".join(out_lines)
				301
				302
				303	def get_comment_type(copyright_line: str, path_str: str) -> CommentType:
				304	# vms_make.com contains multiple copyright header as a string constants.
				305	if copyright_line.startswith("#"):
				306	return CommentType.SCRIPT_STYLE_HASH
				307	if copyright_line.startswith("//"):
				308	return CommentType.C_STYLE_LINE
				309	return CommentType.C_STYLE_BLOCK
				310
				311	def extract_copyright_font(path_str: str) -> str:
				312	path = Path(path_str)
				313	if path.suffix in ['.ttf', '.otf', '.dfont']:
				314	return extract_from_opentype_name(path, 0)
				315	elif path.suffix in ['.ttc', '.otc']:
				316	return extract_from_opentype_collection_name(path)
				317
				318
				319	# Extract copyright notice and returns next index.
				320	def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
				321	commentType = get_comment_type(lines[i], path)
				322
				323	if commentType == CommentType.C_STYLE_BLOCK:
				324	return extract_from_c_style_block_at(lines, i, path)
				325	elif commentType == CommentType.C_STYLE_LINE:
				326	return extract_from_c_style_lines_at(lines, i, path)
				327	elif commentType == CommentType.SCRIPT_STYLE_HASH:
				328	return extract_from_script_hash_at(lines, i, path)
				329	else:
				330	fatal("Uknown comment style: %s" % lines[i])
				331
				332	def extract_from_opentype_collection_name(path: str) -> str:
				333
				334	with open(path, mode="rb") as f:
				335	head = f.read(12)
				336
				337	if head[0:4].decode() != 'ttcf':
				338	fatal('Invalid magic number for TTC file: %s' % path)
				339	numFonts = int.from_bytes(head[8:12], byteorder="big")
				340
				341	licenses = set()
				342	for i in range(0, numFonts):
				343	license = extract_from_opentype_name(path, i)
				344	licenses.add(license)
				345
				346	return '\n\n'.join(licenses)
				347
				348	def extract_from_opentype_name(path: str, index: int) -> str:
				349
				350	def get_preferred_name(nameID: int, ttf):
				351	def get_score(platID: int, encID: int):
				352	if platID == 3 and encID == 10:
				353	return 0
				354	elif platID == 0 and encID == 6:
				355	return 1
				356	elif platID == 0 and encID == 4:
				357	return 2
				358	elif platID == 3 and encID == 1:
				359	return 3
				360	elif platID == 0 and encID == 3:
				361	return 4
				362	elif platID == 0 and encID == 2:
				363	return 5
				364	elif platID == 0 and encID == 1:
				365	return 6
				366	elif platID == 0 and encID == 0:
				367	return 7
				368	else:
				369	return 10000
				370
				371	best_score = 1000000
				372	best_name = None
				373
				374	if 'name' not in ttf:
				375	return None
				376
				377	for name in ttf['name'].names:
				378	if name.nameID != nameID:
				379	continue
				380
				381	score = get_score(name.platformID, name.platEncID)
				382	if score < best_score:
				383	best_score = score
				384	best_name = name
				385
				386	return best_name
				387
				388	def get_notice_from_cff(ttf):
				389	if 'CFF ' not in ttf:
				390	return None
				391
				392	# Looks like there is no way of getting Notice line in CFF table.
				393	# Use the line that has "Copyright" in the string pool.
				394	cff = ttf['CFF '].cff
				395	for string in cff.strings:
				396	if 'Copyright' in string:
				397	return string
				398	return None
				399
				400	with contextlib.closing(ttLib.TTFont(path, 0, fontNumber=index)) as ttf:
				401	copyright = get_preferred_name(0, ttf)
				402	if not copyright:
				403	copyright = get_notice_from_cff(ttf)
				404	if not copyright:
				405	return None
				406
				407	license_description = get_preferred_name(13, ttf)
				408
				409	if license_description:
				410	copyright = str(copyright) + "\n\n" + str(license_description)
				411	else:
				412	copyright = str(copyright)
				413
				414	license_url = get_preferred_name(14, ttf)
				415
				416	if license_url:
				417	copyright = str(copyright) + "\n\n" + str(license_url)
				418	else:
				419	copyright = str(copyright)
				420
				421	return copyright
				422
				423	def extract_from_c_style_lines_at(
				424	lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
				425	def is_copyright_end(line):
				426	if line.startswith("//"):
				427	return False
				428	else:
				429	return True
				430	start = i
				431	while i < len(lines):
				432	if is_copyright_end(lines[i]):
				433	break
				434	i += 1
				435	end = i
				436
				437	if start == end:
				438	fatal("Failed to get copyright info")
				439
				440	out_lines = []
				441	for line in lines[start:end]:
				442	if line.startswith("//# "): # Andorid.bp uses //# style
				443	out_lines.append(line[4:])
				444	elif line.startswith("//#"): # Andorid.bp uses //# style
				445	out_lines.append(line[3:])
				446	elif line.startswith("// "):
				447	out_lines.append(line[3:])
				448	elif line == "//":
				449	out_lines.append(line[2:])
				450	else:
				451	out_lines.append(line)
				452
				453	return (cleanup_and_join(out_lines), i + 1)
				454
				455
				456	def extract_from_script_hash_at(
				457	lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
				458	if lines[i].strip()[0] != "#":
				459	return (None, i + 1)
				460	def is_copyright_end(lines: str, i: int) -> bool:
				461	if "#" not in lines[i]:
				462	return True
				463	# treat double spacing as end of license header
				464	if lines[i] == "#" and lines[i+1] == "#":
				465	return True
				466	return False
				467
				468	start = i
				469	while i < len(lines):
				470	if is_copyright_end(lines, i):
				471	break
				472	i += 1
				473	end = i
				474
				475	if start == end:
				476	fatal("Failed to get copyright info")
				477
				478	out_lines = []
				479	for line in lines[start:end]:
				480	if line.startswith("# "):
				481	out_lines.append(line[2:])
				482	elif line == "#":
				483	out_lines.append(line[1:])
				484	else:
				485	out_lines.append(line)
				486
				487	return (cleanup_and_join(out_lines), i + 1)
				488
				489
				490	def extract_from_c_style_block_at(
				491	lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
				492
				493	def is_copyright_end(lines: str, i: int) -> bool:
				494	if "*/" in lines[i]:
				495	return True
				496	if lines[i] == " " and lines[i + 1] == " ":
				497	return True
				498	if lines[i] == "" and lines[i + 1] == "":
				499	return True
				500	return False
				501
				502	start = i
				503	i += 1 # include at least one line
				504	while i < len(lines):
				505	if is_copyright_end(lines, i):
				506	break
				507	i += 1
				508	end = i + 1
				509
				510	out_lines = []
				511	for line in lines[start:end]:
				512	clean_line = line
				513
				514	# Strip begining "/*" chars
				515	if clean_line.startswith("/* "):
				516	clean_line = clean_line[3:]
				517	if clean_line == "/*":
				518	clean_line = clean_line[2:]
				519
				520	# Strip ending "*/" chars
				521	if clean_line.endswith(" */"):
				522	clean_line = clean_line[:-3]
				523	if clean_line.endswith("*/"):
				524	clean_line = clean_line[:-2]
				525
				526	# Strip starting " *" chars
				527	if clean_line.startswith(" * "):
				528	clean_line = clean_line[3:]
				529	if clean_line == " *":
				530	clean_line = clean_line[2:]
				531
				532	# hb-aots-tester.cpp has underline separater which can be dropped.
				533	if path.endswith("test/shape/data/aots/hb-aots-tester.cpp"):
				534	clean_line = clean_line.replace("_", "")
				535
				536	# Strip trailing spaces
				537	clean_line = clean_line.rstrip()
				538
				539	out_lines.append(clean_line)
				540
				541	return (cleanup_and_join(out_lines), i + 1)
				542
				543
				544	# Returns true if the line shows the start of copyright notice.
				545	def is_copyright_line(line: str, path: str) -> bool:
				546	if "Copyright" not in line:
				547	return False
				548
				549	# For avoiding unexpected mismatches, exclude quoted Copyright string.
				550	if "`Copyright'" in line:
				551	return False
				552	if "\"Copyright\"" in line:
				553	return False
				554
				555	if "OpCode_Copyright" in line:
				556	return False
				557
				558	if path.endswith("src/hb-ot-name.h") and "HB_OT_NAME_ID_COPYRIGHT" in line:
				559	return False
				560
				561	return True
				562
				563	def assert_mandatory_copyright(path_str: str):
				564	path = Path(path_str)
				565	toplevel_dir = str(path).split(os.sep)[0]
				566
				567	if toplevel_dir in IGNORE_DIR_IF_NO_COPYRIGHT:
				568	return
				569
				570	fatal("%s does not contain Copyright line" % path)
				571
				572
				573	# Extract the copyright notice and put it into copyrights arg.
				574	def do_file(path: str, copyrights: set, no_copyright_files: set):
				575	raw = Path(path).read_bytes()
				576	basename = os.path.basename(path)
				577	dirname = os.path.dirname(path)
				578
				579	is_font = (dirname.endswith('./test/fuzzing/fonts') or
				580	Path(path).suffix in ['.ttf', '.otf', '.dfont', '.ttc', '.otc'])
				581
				582	if is_font:
				583	notice = extract_copyright_font(path)
				584	if not notice:
				585	assert_mandatory_copyright(path)
				586	return
				587
				588	if not notice in copyrights:
				589	copyrights[notice] = []
				590	copyrights[notice].append(path)
				591	else:
				592	try:
				593	content = raw.decode("utf-8")
				594	except UnicodeDecodeError:
				595	content = raw.decode("iso-8859-1")
				596
				597	if not "Copyright" in content:
				598	if path in no_copyright_files:
				599	no_copyright_files.remove(path)
				600	else:
				601	assert_mandatory_copyright(path)
				602	return
				603
				604	lines = content.splitlines()
				605
				606	# The COPYING in the in-house dir has full OFL license with description.
				607	# Use the OFL license description body.
Seigo Nonaka	e3320e0	2023-05-29 16:16:05 +0900	[diff] [blame]	608	if path.endswith("test/shape/data/in-house/COPYING") or path.endswith("test/COPYING"):
Seigo Nonaka	246cc29	2022-12-16 00:56:26 +0900	[diff] [blame]	609	notice = cleanup_and_join(lines[9:])
				610	copyrights.setdefault(notice, [])
				611	copyrights[notice].append(path)
				612	return
				613
				614	# The COPYING in the top dir has MIT-Modern-Variant license with description.
				615	# Use the entire file as a license notice.
				616	if path.endswith("COPYING") and str(Path(path)) == 'COPYING':
				617	notice = cleanup_and_join(lines)
				618	copyrights.setdefault(notice, [])
				619	copyrights[notice].append(path)
				620	return
				621
				622	i = 0
				623	license_found = False
				624	while i < len(lines):
				625	if is_copyright_line(lines[i], path):
				626	(notice, nexti) = extract_copyright_at(lines, i, path)
				627	if notice:
				628	copyrights.setdefault(notice, [])
				629	copyrights[notice].append(path)
				630	license_found = True
				631
				632	i = nexti
				633	else:
				634	i += 1
				635
				636	if not license_found:
				637	assert_mandatory_copyright(path)
				638
				639	def do_check(path, format):
				640	if not path.endswith('/'): # make sure the path ends with slash
				641	path = path + '/'
				642
				643	file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME])
				644	no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES])
				645	copyrights = {}
				646
				647	for directory, sub_directories, filenames in os.walk(path):
				648	# skip .git directory
				649	if ".git" in sub_directories:
				650	sub_directories.remove(".git")
				651
				652	for fname in filenames:
				653	fpath = os.path.join(directory, fname)
				654	if fpath in file_to_ignore:
				655	file_to_ignore.remove(fpath)
				656	continue
				657
				658	do_file(fpath, copyrights, no_copyright_files)
				659
				660	if len(file_to_ignore) != 0:
				661	fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n"
				662	+ "\n".join(file_to_ignore))
				663
				664	if len(no_copyright_files) != 0:
				665	fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n"
				666	+ "\n".join(no_copyright_files))
				667
				668	if format == Format.notice:
				669	print_notice(copyrights, False)
				670	elif format == Format.notice_with_filename:
				671	print_notice(copyrights, True)
				672	elif format == Format.html:
				673	print_html(copyrights)
				674	elif format == Format.json:
				675	print_json(copyrights)
				676
				677	def print_html(copyrights):
				678	print('<html>')
				679	print("""
				680	<head>
				681	<style>
				682	table {
				683	font-family: monospace
				684	}
				685
				686	table tr td {
				687	padding: 10px 10px 10px 10px
				688	}
				689	</style>
				690	</head>
				691	""")
				692	print('<body>')
				693	print('<table border="1" style="border-collapse:collapse">')
				694	for notice in sorted(copyrights.keys()):
				695	files = sorted(copyrights[notice])
				696
				697	print('<tr>')
				698	print('<td>')
				699	print('<ul>')
				700	for file in files:
				701	print('<li>%s</li>' % file)
				702	print('</ul>')
				703	print('</td>')
				704
				705	print('<td>')
				706	print('<p>%s</p>' % notice.replace('\n', '<br>'))
				707	print('</td>')
				708
				709	print('</tr>')
				710
				711
				712	print('</table>')
				713	print('</body></html>')
				714
				715	def print_notice(copyrights, print_file):
				716	# print the copyright in sorted order for stable output.
				717	for notice in sorted(copyrights.keys()):
				718	if print_file:
				719	files = sorted(copyrights[notice])
				720	print("\n".join(files))
				721	print()
				722	print(notice)
				723	print()
				724	print("-" * 67)
				725	print()
				726
				727	def print_json(copyrights):
				728	print(json.dumps(copyrights))
				729
				730	class Format(Enum):
				731	notice = 'notice'
				732	notice_with_filename = 'notice_with_filename'
				733	html = 'html'
				734	json = 'json'
				735
				736	def __str__(self):
				737	return self.value
				738
				739	def main():
				740	parser = argparse.ArgumentParser(description="Collect notice headers.")
				741	parser.add_argument("--format", dest="format", type=Format, choices=list(Format),
				742	default=Format.notice, help="print filename before the license notice")
				743	parser.add_argument("--target", dest="target", action='store',
				744	required=True, help="target directory to collect notice headers")
				745	res = parser.parse_args()
				746	do_check(res.target, res.format)
				747
				748	if __name__ == "__main__":
				749	main()
				750