Ebrahim Byagowi | 8d19907 | 2020-02-19 14:56:55 +0330 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Ebrahim Byagowi | 08f1d95 | 2020-05-28 15:01:15 +0430 | [diff] [blame] | 2 | # flake8: noqa: F821 |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 3 | |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 4 | """usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt ArabicShaping.txt DerivedCoreProperties.txt UnicodeData.txt Blocks.txt Scripts.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt |
Ebrahim Byagowi | 6a390df | 2020-02-10 17:19:23 +0330 | [diff] [blame] | 5 | |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 6 | Input files: |
Ebrahim Byagowi | 6a390df | 2020-02-10 17:19:23 +0330 | [diff] [blame] | 7 | * https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt |
| 8 | * https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 9 | * https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 10 | * https://unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
Ebrahim Byagowi | 6a390df | 2020-02-10 17:19:23 +0330 | [diff] [blame] | 11 | * https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt |
Ebrahim Byagowi | 08f1d95 | 2020-05-28 15:01:15 +0430 | [diff] [blame] | 12 | * https://unicode.org/Public/UCD/latest/ucd/Blocks.txt |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 13 | * https://unicode.org/Public/UCD/latest/ucd/Scripts.txt |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 14 | * ms-use/IndicSyllabicCategory-Additional.txt |
David Corbett | c39ab82 | 2020-10-06 16:51:40 -0400 | [diff] [blame] | 15 | * ms-use/IndicPositionalCategory-Additional.txt |
Ebrahim Byagowi | 08f1d95 | 2020-05-28 15:01:15 +0430 | [diff] [blame] | 16 | """ |
| 17 | |
Behdad Esfahbod | 29f8d9f | 2023-11-09 09:29:02 -0700 | [diff] [blame] | 18 | import logging |
| 19 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) |
| 20 | |
| 21 | |
Ebrahim Byagowi | 08f1d95 | 2020-05-28 15:01:15 +0430 | [diff] [blame] | 22 | import sys |
| 23 | |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 24 | if len (sys.argv) != 10: |
Ebrahim Byagowi | 7554f61 | 2020-05-28 22:51:29 +0430 | [diff] [blame] | 25 | sys.exit (__doc__) |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 26 | |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 27 | DISABLED_SCRIPTS = { |
| 28 | 'Arabic', |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 29 | 'Lao', |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 30 | 'Samaritan', |
| 31 | 'Syriac', |
| 32 | 'Thai', |
| 33 | } |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 34 | |
Ebrahim Byagowi | ad87155 | 2020-05-29 00:11:19 +0430 | [diff] [blame] | 35 | files = [open (x, encoding='utf-8') for x in sys.argv[1:]] |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 36 | |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 37 | headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 4] |
| 38 | for j in range(7, 9): |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 39 | for line in files[j]: |
| 40 | line = line.rstrip() |
| 41 | if not line: |
| 42 | break |
| 43 | headers[j - 1].append(line) |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 44 | headers.append (["UnicodeData.txt does not have a header."]) |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 45 | |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 46 | unicode_data = [{} for _ in files] |
Ebrahim Byagowi | 6937092 | 2020-07-13 21:32:15 +0430 | [diff] [blame] | 47 | values = [{} for _ in files] |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 48 | for i, f in enumerate (files): |
| 49 | for line in f: |
| 50 | |
| 51 | j = line.find ('#') |
| 52 | if j >= 0: |
| 53 | line = line[:j] |
| 54 | |
| 55 | fields = [x.strip () for x in line.split (';')] |
| 56 | if len (fields) == 1: |
| 57 | continue |
| 58 | |
| 59 | uu = fields[0].split ('..') |
| 60 | start = int (uu[0], 16) |
| 61 | if len (uu) == 1: |
| 62 | end = start |
| 63 | else: |
| 64 | end = int (uu[1], 16) |
| 65 | |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 66 | t = fields[1 if i not in [2, 4] else 2] |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 67 | |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 68 | if i == 2: |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 69 | t = 'jt_' + t |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 70 | elif i == 3 and t != 'Default_Ignorable_Code_Point': |
| 71 | continue |
| 72 | elif i == 7 and t == 'Consonant_Final_Modifier': |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 73 | # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/336 |
| 74 | t = 'Syllable_Modifier' |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 75 | elif i == 8 and t == 'NA': |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 76 | t = 'Not_Applicable' |
| 77 | |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 78 | i0 = i if i < 7 else i - 7 |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 79 | for u in range (start, end + 1): |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 80 | unicode_data[i0][u] = t |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 81 | values[i0][t] = values[i0].get (t, 0) + end - start + 1 |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 82 | |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 83 | defaults = ('Other', 'Not_Applicable', 'jt_X', '', 'Cn', 'No_Block', 'Unknown') |
Behdad Esfahbod | ba72801 | 2015-07-21 11:57:23 +0100 | [diff] [blame] | 84 | |
Behdad Esfahbod | ba72801 | 2015-07-21 11:57:23 +0100 | [diff] [blame] | 85 | # Merge data into one dict: |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 86 | for i,v in enumerate (defaults): |
| 87 | values[i][v] = values[i].get (v, 0) + 1 |
| 88 | combined = {} |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 89 | for i,d in enumerate (unicode_data): |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 90 | for u,v in d.items (): |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 91 | if not u in combined: |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 92 | if i >= 4: |
| 93 | continue |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 94 | combined[u] = list (defaults) |
| 95 | combined[u][i] = v |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 96 | combined = {k: v for k, v in combined.items() if v[6] not in DISABLED_SCRIPTS} |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 97 | |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 98 | |
| 99 | property_names = [ |
| 100 | # General_Category |
| 101 | 'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc', |
| 102 | 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', |
| 103 | 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs', |
| 104 | # Indic_Syllabic_Category |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 105 | 'Other', |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 106 | 'Bindu', |
| 107 | 'Visarga', |
| 108 | 'Avagraha', |
| 109 | 'Nukta', |
| 110 | 'Virama', |
| 111 | 'Pure_Killer', |
David Corbett | b619698 | 2024-10-03 15:56:04 -0400 | [diff] [blame] | 112 | 'Reordering_Killer', |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 113 | 'Invisible_Stacker', |
| 114 | 'Vowel_Independent', |
| 115 | 'Vowel_Dependent', |
| 116 | 'Vowel', |
| 117 | 'Consonant_Placeholder', |
| 118 | 'Consonant', |
| 119 | 'Consonant_Dead', |
| 120 | 'Consonant_With_Stacker', |
| 121 | 'Consonant_Prefixed', |
| 122 | 'Consonant_Preceding_Repha', |
| 123 | 'Consonant_Succeeding_Repha', |
| 124 | 'Consonant_Subjoined', |
| 125 | 'Consonant_Medial', |
| 126 | 'Consonant_Final', |
| 127 | 'Consonant_Head_Letter', |
Behdad Esfahbod | 060e6b4 | 2018-06-05 17:31:46 -0700 | [diff] [blame] | 128 | 'Consonant_Initial_Postfixed', |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 129 | 'Modifying_Letter', |
| 130 | 'Tone_Letter', |
| 131 | 'Tone_Mark', |
| 132 | 'Gemination_Mark', |
| 133 | 'Cantillation_Mark', |
| 134 | 'Register_Shifter', |
| 135 | 'Syllable_Modifier', |
| 136 | 'Consonant_Killer', |
| 137 | 'Non_Joiner', |
| 138 | 'Joiner', |
| 139 | 'Number_Joiner', |
| 140 | 'Number', |
| 141 | 'Brahmi_Joining_Number', |
David Corbett | 3ca5fbd | 2022-09-21 18:13:17 -0400 | [diff] [blame] | 142 | 'Symbol_Modifier', |
David Corbett | faf09f5 | 2020-08-14 18:30:20 -0400 | [diff] [blame] | 143 | 'Hieroglyph', |
| 144 | 'Hieroglyph_Joiner', |
David Corbett | 767f937 | 2023-09-15 23:11:26 -0400 | [diff] [blame] | 145 | 'Hieroglyph_Mark_Begin', |
| 146 | 'Hieroglyph_Mark_End', |
| 147 | 'Hieroglyph_Mirror', |
| 148 | 'Hieroglyph_Modifier', |
David Corbett | faf09f5 | 2020-08-14 18:30:20 -0400 | [diff] [blame] | 149 | 'Hieroglyph_Segment_Begin', |
| 150 | 'Hieroglyph_Segment_End', |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 151 | # Indic_Positional_Category |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 152 | 'Not_Applicable', |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 153 | 'Right', |
| 154 | 'Left', |
| 155 | 'Visual_Order_Left', |
| 156 | 'Left_And_Right', |
| 157 | 'Top', |
| 158 | 'Bottom', |
| 159 | 'Top_And_Bottom', |
David Corbett | fd748fa | 2020-03-15 15:59:31 -0400 | [diff] [blame] | 160 | 'Top_And_Bottom_And_Left', |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 161 | 'Top_And_Right', |
| 162 | 'Top_And_Left', |
| 163 | 'Top_And_Left_And_Right', |
Behdad Esfahbod | ea535a1 | 2017-10-02 17:02:39 +0200 | [diff] [blame] | 164 | 'Bottom_And_Left', |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 165 | 'Bottom_And_Right', |
| 166 | 'Top_And_Bottom_And_Right', |
| 167 | 'Overstruck', |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 168 | # Joining_Type |
| 169 | 'jt_C', |
| 170 | 'jt_D', |
| 171 | 'jt_L', |
| 172 | 'jt_R', |
| 173 | 'jt_T', |
| 174 | 'jt_U', |
| 175 | 'jt_X', |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 176 | ] |
| 177 | |
| 178 | class PropertyValue(object): |
| 179 | def __init__(self, name_): |
| 180 | self.name = name_ |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 181 | def __str__(self): |
| 182 | return self.name |
| 183 | def __eq__(self, other): |
Ebrahim Byagowi | a0c58be | 2020-03-18 23:40:59 +0330 | [diff] [blame] | 184 | return self.name == (other if isinstance(other, str) else other.name) |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 185 | def __ne__(self, other): |
| 186 | return not (self == other) |
Ebrahim Byagowi | 26e0cbd | 2018-03-29 21:22:47 +0430 | [diff] [blame] | 187 | def __hash__(self): |
| 188 | return hash(str(self)) |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 189 | |
| 190 | property_values = {} |
| 191 | |
| 192 | for name in property_names: |
| 193 | value = PropertyValue(name) |
| 194 | assert value not in property_values |
| 195 | assert value not in globals() |
| 196 | property_values[name] = value |
| 197 | globals().update(property_values) |
| 198 | |
| 199 | |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 200 | def is_BASE(U, UISC, UDI, UGC, AJT): |
David Corbett | a2a1484 | 2018-06-06 12:57:28 -0400 | [diff] [blame] | 201 | return (UISC in [Number, Consonant, Consonant_Head_Letter, |
Behdad Esfahbod | 9b6312f | 2016-05-06 17:41:49 +0100 | [diff] [blame] | 202 | Tone_Letter, |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 203 | Vowel_Independent, |
David Corbett | f83496a | 2020-11-20 08:16:36 -0500 | [diff] [blame] | 204 | ] or |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 205 | # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/484 |
David Corbett | 0792690 | 2020-10-16 22:41:12 -0400 | [diff] [blame] | 206 | AJT in [jt_C, jt_D, jt_L, jt_R] and UISC != Joiner or |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 207 | (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial, |
| 208 | Consonant_Subjoined, Vowel, Vowel_Dependent])) |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 209 | def is_BASE_NUM(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 210 | return UISC == Brahmi_Joining_Number |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 211 | def is_BASE_OTHER(U, UISC, UDI, UGC, AJT): |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 212 | if UISC == Consonant_Placeholder: return True |
Behdad Esfahbod | 9b6312f | 2016-05-06 17:41:49 +0100 | [diff] [blame] | 213 | return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE] |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 214 | def is_CGJ(U, UISC, UDI, UGC, AJT): |
David Corbett | 5b0a598 | 2022-03-04 20:45:30 -0500 | [diff] [blame] | 215 | # Also includes VARIATION_SELECTOR and ZWJ |
| 216 | return UISC == Joiner or UDI and UGC in [Mc, Me, Mn] |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 217 | def is_CONS_FINAL(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 218 | return ((UISC == Consonant_Final and UGC != Lo) or |
| 219 | UISC == Consonant_Succeeding_Repha) |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 220 | def is_CONS_FINAL_MOD(U, UISC, UDI, UGC, AJT): |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 221 | return UISC == Syllable_Modifier |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 222 | def is_CONS_MED(U, UISC, UDI, UGC, AJT): |
David Corbett | 125c45e | 2019-05-15 17:02:32 -0400 | [diff] [blame] | 223 | # Consonant_Initial_Postfixed is new in Unicode 11; not in the spec. |
| 224 | return (UISC == Consonant_Medial and UGC != Lo or |
| 225 | UISC == Consonant_Initial_Postfixed) |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 226 | def is_CONS_MOD(U, UISC, UDI, UGC, AJT): |
David Corbett | 3ca5fbd | 2022-09-21 18:13:17 -0400 | [diff] [blame] | 227 | return UISC in [Nukta, Gemination_Mark, Consonant_Killer] |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 228 | def is_CONS_SUB(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 229 | return UISC == Consonant_Subjoined and UGC != Lo |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 230 | def is_CONS_WITH_STACKER(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 231 | return UISC == Consonant_With_Stacker |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 232 | def is_HALANT(U, UISC, UDI, UGC, AJT): |
David Corbett | 6059828 | 2022-06-25 11:33:44 -0400 | [diff] [blame] | 233 | return UISC == Virama and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT) |
| 234 | def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT): |
| 235 | # Split off of HALANT |
| 236 | return U == 0x0DCA |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 237 | def is_HALANT_NUM(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 238 | return UISC == Number_Joiner |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 239 | def is_HIEROGLYPH(U, UISC, UDI, UGC, AJT): |
David Corbett | faf09f5 | 2020-08-14 18:30:20 -0400 | [diff] [blame] | 240 | return UISC == Hieroglyph |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 241 | def is_HIEROGLYPH_JOINER(U, UISC, UDI, UGC, AJT): |
David Corbett | faf09f5 | 2020-08-14 18:30:20 -0400 | [diff] [blame] | 242 | return UISC == Hieroglyph_Joiner |
David Corbett | 767f937 | 2023-09-15 23:11:26 -0400 | [diff] [blame] | 243 | def is_HIEROGLYPH_MIRROR(U, UISC, UDI, UGC, AJT): |
| 244 | return UISC == Hieroglyph_Mirror |
| 245 | def is_HIEROGLYPH_MOD(U, UISC, UDI, UGC, AJT): |
| 246 | return UISC == Hieroglyph_Modifier |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 247 | def is_HIEROGLYPH_SEGMENT_BEGIN(U, UISC, UDI, UGC, AJT): |
David Corbett | 767f937 | 2023-09-15 23:11:26 -0400 | [diff] [blame] | 248 | return UISC in [Hieroglyph_Mark_Begin, Hieroglyph_Segment_Begin] |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 249 | def is_HIEROGLYPH_SEGMENT_END(U, UISC, UDI, UGC, AJT): |
David Corbett | 767f937 | 2023-09-15 23:11:26 -0400 | [diff] [blame] | 250 | return UISC in [Hieroglyph_Mark_End, Hieroglyph_Segment_End] |
David Corbett | 49fb8f9 | 2022-03-13 15:01:11 -0400 | [diff] [blame] | 251 | def is_INVISIBLE_STACKER(U, UISC, UDI, UGC, AJT): |
| 252 | # Split off of HALANT |
| 253 | return (UISC == Invisible_Stacker |
| 254 | and not is_SAKOT(U, UISC, UDI, UGC, AJT) |
| 255 | ) |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 256 | def is_ZWNJ(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 257 | return UISC == Non_Joiner |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 258 | def is_OTHER(U, UISC, UDI, UGC, AJT): |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 259 | # Also includes BASE_IND and SYM |
| 260 | return ((UGC == Po or UISC in [Consonant_Dead, Joiner, Modifying_Letter, Other]) |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 261 | and not is_BASE(U, UISC, UDI, UGC, AJT) |
| 262 | and not is_BASE_OTHER(U, UISC, UDI, UGC, AJT) |
| 263 | and not is_CGJ(U, UISC, UDI, UGC, AJT) |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 264 | and not is_SYM_MOD(U, UISC, UDI, UGC, AJT) |
David Corbett | 5b0a598 | 2022-03-04 20:45:30 -0500 | [diff] [blame] | 265 | and not is_Word_Joiner(U, UISC, UDI, UGC, AJT) |
Behdad Esfahbod | ba72801 | 2015-07-21 11:57:23 +0100 | [diff] [blame] | 266 | ) |
David Corbett | b619698 | 2024-10-03 15:56:04 -0400 | [diff] [blame] | 267 | def is_REORDERING_KILLER(U, UISC, UDI, UGC, AJT): |
| 268 | return UISC == Reordering_Killer |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 269 | def is_REPHA(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 270 | return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed] |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 271 | def is_SAKOT(U, UISC, UDI, UGC, AJT): |
David Corbett | bb50aae | 2021-10-08 13:25:46 -0400 | [diff] [blame] | 272 | # Split off of HALANT |
David Corbett | d64fb9d | 2019-05-26 11:05:54 -0400 | [diff] [blame] | 273 | return U == 0x1A60 |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 274 | def is_SYM_MOD(U, UISC, UDI, UGC, AJT): |
David Corbett | 3ca5fbd | 2022-09-21 18:13:17 -0400 | [diff] [blame] | 275 | return UISC == Symbol_Modifier |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 276 | def is_VOWEL(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 277 | return (UISC == Pure_Killer or |
David Corbett | 13bb46c | 2022-03-06 15:35:31 -0500 | [diff] [blame] | 278 | UGC != Lo and UISC in [Vowel, Vowel_Dependent]) |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 279 | def is_VOWEL_MOD(U, UISC, UDI, UGC, AJT): |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 280 | return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or |
David Corbett | 13bb46c | 2022-03-06 15:35:31 -0500 | [diff] [blame] | 281 | UGC != Lo and UISC == Bindu) |
David Corbett | 5b0a598 | 2022-03-04 20:45:30 -0500 | [diff] [blame] | 282 | def is_Word_Joiner(U, UISC, UDI, UGC, AJT): |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 283 | # Also includes Rsv |
David Corbett | 5b0a598 | 2022-03-04 20:45:30 -0500 | [diff] [blame] | 284 | return (UDI and U not in [0x115F, 0x1160, 0x3164, 0xFFA0, 0x1BCA0, 0x1BCA1, 0x1BCA2, 0x1BCA3] |
| 285 | and UISC == Other |
| 286 | and not is_CGJ(U, UISC, UDI, UGC, AJT) |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 287 | ) or UGC == Cn |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 288 | |
| 289 | use_mapping = { |
| 290 | 'B': is_BASE, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 291 | 'N': is_BASE_NUM, |
| 292 | 'GB': is_BASE_OTHER, |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 293 | 'CGJ': is_CGJ, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 294 | 'F': is_CONS_FINAL, |
| 295 | 'FM': is_CONS_FINAL_MOD, |
| 296 | 'M': is_CONS_MED, |
| 297 | 'CM': is_CONS_MOD, |
| 298 | 'SUB': is_CONS_SUB, |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 299 | 'CS': is_CONS_WITH_STACKER, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 300 | 'H': is_HALANT, |
David Corbett | 6059828 | 2022-06-25 11:33:44 -0400 | [diff] [blame] | 301 | 'HVM': is_HALANT_OR_VOWEL_MODIFIER, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 302 | 'HN': is_HALANT_NUM, |
David Corbett | 49fb8f9 | 2022-03-13 15:01:11 -0400 | [diff] [blame] | 303 | 'IS': is_INVISIBLE_STACKER, |
David Corbett | faf09f5 | 2020-08-14 18:30:20 -0400 | [diff] [blame] | 304 | 'G': is_HIEROGLYPH, |
David Corbett | 767f937 | 2023-09-15 23:11:26 -0400 | [diff] [blame] | 305 | 'HM': is_HIEROGLYPH_MOD, |
| 306 | 'HR': is_HIEROGLYPH_MIRROR, |
David Corbett | faf09f5 | 2020-08-14 18:30:20 -0400 | [diff] [blame] | 307 | 'J': is_HIEROGLYPH_JOINER, |
| 308 | 'SB': is_HIEROGLYPH_SEGMENT_BEGIN, |
| 309 | 'SE': is_HIEROGLYPH_SEGMENT_END, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 310 | 'ZWNJ': is_ZWNJ, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 311 | 'O': is_OTHER, |
David Corbett | b619698 | 2024-10-03 15:56:04 -0400 | [diff] [blame] | 312 | 'RK': is_REORDERING_KILLER, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 313 | 'R': is_REPHA, |
David Corbett | d64fb9d | 2019-05-26 11:05:54 -0400 | [diff] [blame] | 314 | 'Sk': is_SAKOT, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 315 | 'SM': is_SYM_MOD, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 316 | 'V': is_VOWEL, |
| 317 | 'VM': is_VOWEL_MOD, |
David Corbett | 5b0a598 | 2022-03-04 20:45:30 -0500 | [diff] [blame] | 318 | 'WJ': is_Word_Joiner, |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 319 | } |
| 320 | |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 321 | use_positions = { |
| 322 | 'F': { |
| 323 | 'Abv': [Top], |
| 324 | 'Blw': [Bottom], |
| 325 | 'Pst': [Right], |
| 326 | }, |
| 327 | 'M': { |
| 328 | 'Abv': [Top], |
David Corbett | fd748fa | 2020-03-15 15:59:31 -0400 | [diff] [blame] | 329 | 'Blw': [Bottom, Bottom_And_Left, Bottom_And_Right], |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 330 | 'Pst': [Right], |
David Corbett | fd748fa | 2020-03-15 15:59:31 -0400 | [diff] [blame] | 331 | 'Pre': [Left, Top_And_Bottom_And_Left], |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 332 | }, |
| 333 | 'CM': { |
| 334 | 'Abv': [Top], |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 335 | 'Blw': [Bottom, Overstruck], |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 336 | }, |
| 337 | 'V': { |
| 338 | 'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right], |
| 339 | 'Blw': [Bottom, Overstruck, Bottom_And_Right], |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 340 | 'Pst': [Right], |
| 341 | 'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right], |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 342 | }, |
| 343 | 'VM': { |
| 344 | 'Abv': [Top], |
| 345 | 'Blw': [Bottom, Overstruck], |
| 346 | 'Pst': [Right], |
| 347 | 'Pre': [Left], |
| 348 | }, |
| 349 | 'SM': { |
| 350 | 'Abv': [Top], |
| 351 | 'Blw': [Bottom], |
| 352 | }, |
| 353 | 'H': None, |
David Corbett | 767f937 | 2023-09-15 23:11:26 -0400 | [diff] [blame] | 354 | 'HM': None, |
| 355 | 'HR': None, |
David Corbett | 6059828 | 2022-06-25 11:33:44 -0400 | [diff] [blame] | 356 | 'HVM': None, |
David Corbett | 49fb8f9 | 2022-03-13 15:01:11 -0400 | [diff] [blame] | 357 | 'IS': None, |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 358 | 'B': None, |
David Corbett | d100cca | 2019-05-19 10:01:20 -0400 | [diff] [blame] | 359 | 'FM': { |
| 360 | 'Abv': [Top], |
| 361 | 'Blw': [Bottom], |
| 362 | 'Pst': [Not_Applicable], |
| 363 | }, |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 364 | 'R': None, |
David Corbett | b619698 | 2024-10-03 15:56:04 -0400 | [diff] [blame] | 365 | 'RK': None, |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 366 | 'SUB': None, |
| 367 | } |
| 368 | |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 369 | def map_to_use(data): |
| 370 | out = {} |
| 371 | items = use_mapping.items() |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 372 | for U, (UISC, UIPC, AJT, UDI, UGC, UBlock, _) in data.items(): |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 373 | |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 374 | # Resolve Indic_Syllabic_Category |
| 375 | |
David Corbett | fd748fa | 2020-03-15 15:59:31 -0400 | [diff] [blame] | 376 | # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 377 | if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark |
| 378 | |
Behdad Esfahbod | 32a4381 | 2018-10-02 18:43:29 +0200 | [diff] [blame] | 379 | # Tibetan: |
David Corbett | fd748fa | 2020-03-15 15:59:31 -0400 | [diff] [blame] | 380 | # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC |
Behdad Esfahbod | 32a4381 | 2018-10-02 18:43:29 +0200 | [diff] [blame] | 381 | if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent |
Behdad Esfahbod | 32a4381 | 2018-10-02 18:43:29 +0200 | [diff] [blame] | 382 | |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 383 | # TODO: U+1CED should only be allowed after some of |
| 384 | # the nasalization marks, maybe only for U+1CE9..U+1CF1. |
| 385 | if U == 0x1CED: UISC = Tone_Mark |
| 386 | |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 387 | values = [k for k,v in items if v(U, UISC, UDI, UGC, AJT)] |
| 388 | assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UISC, UDI, UGC, AJT, values) |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 389 | USE = values[0] |
| 390 | |
| 391 | # Resolve Indic_Positional_Category |
| 392 | |
punchcutter | 9541c9d | 2018-06-24 22:54:57 -0700 | [diff] [blame] | 393 | # TODO: https://github.com/harfbuzz/harfbuzz/pull/1037 |
punchcutter | a7eed7e | 2019-03-27 23:12:58 -0700 | [diff] [blame] | 394 | # and https://github.com/harfbuzz/harfbuzz/issues/1631 |
| 395 | if U in [0x11302, 0x11303, 0x114C1]: UIPC = Top |
David Corbett | 06f49fc | 2020-08-13 13:37:45 -0400 | [diff] [blame] | 396 | |
David Corbett | 85a9ec8 | 2024-09-14 11:44:03 -0400 | [diff] [blame] | 397 | # TODO: https://github.com/microsoft/font-tools/issues/17#issuecomment-2346952091 |
| 398 | if U == 0x113CF: UIPC = Bottom |
| 399 | |
| 400 | assert (UIPC in [Not_Applicable, Visual_Order_Left] or |
| 401 | U in {0x0F7F, 0x11A3A} or |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 402 | USE in use_positions), "%s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT) |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 403 | |
| 404 | pos_mapping = use_positions.get(USE, None) |
| 405 | if pos_mapping: |
| 406 | values = [k for k,v in pos_mapping.items() if v and UIPC in v] |
David Corbett | 8eaee2f | 2021-10-07 20:10:31 -0400 | [diff] [blame] | 407 | assert len(values) == 1, "%s %s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT, values) |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 408 | USE = USE + values[0] |
| 409 | |
| 410 | out[U] = (USE, UBlock) |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 411 | return out |
| 412 | |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 413 | use_data = map_to_use(combined) |
Behdad Esfahbod | 20e246e | 2015-07-20 15:56:19 +0100 | [diff] [blame] | 414 | |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 415 | print ("/* == Start of generated table == */") |
| 416 | print ("/*") |
| 417 | print (" * The following table is generated by running:") |
| 418 | print (" *") |
David Corbett | 3e635cf | 2021-10-08 17:13:22 -0400 | [diff] [blame] | 419 | print (" * {} IndicSyllabicCategory.txt IndicPositionalCategory.txt ArabicShaping.txt DerivedCoreProperties.txt UnicodeData.txt Blocks.txt Scripts.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt".format (sys.argv[0])) |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 420 | print (" *") |
| 421 | print (" * on files with these headers:") |
| 422 | print (" *") |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 423 | for h in headers: |
| 424 | for l in h: |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 425 | print (" * %s" % (l.strip())) |
| 426 | print (" */") |
| 427 | print () |
Behdad Esfahbod | 44be1e5 | 2022-06-03 02:54:33 -0600 | [diff] [blame] | 428 | print ("#ifndef HB_OT_SHAPER_USE_TABLE_HH") |
| 429 | print ("#define HB_OT_SHAPER_USE_TABLE_HH") |
Behdad Esfahbod | 7aad536 | 2019-06-26 13:21:03 -0700 | [diff] [blame] | 430 | print () |
Behdad Esfahbod | 59721c2 | 2021-01-29 11:34:59 -0700 | [diff] [blame] | 431 | print ('#include "hb.hh"') |
Behdad Esfahbod | 7aad536 | 2019-06-26 13:21:03 -0700 | [diff] [blame] | 432 | print () |
Behdad Esfahbod | 5bfb0b7 | 2022-06-03 02:56:41 -0600 | [diff] [blame] | 433 | print ('#include "hb-ot-shaper-use-machine.hh"') |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 434 | print () |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 435 | |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 436 | total = 0 |
| 437 | used = 0 |
| 438 | last_block = None |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 439 | def print_block (block, start, end, use_data): |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 440 | global total, used, last_block |
| 441 | if block and block != last_block: |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 442 | print () |
| 443 | print () |
| 444 | print (" /* %s */" % block) |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 445 | if start % 16: |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 446 | print (' ' * (20 + (start % 16 * 6)), end='') |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 447 | num = 0 |
| 448 | assert start % 8 == 0 |
| 449 | assert (end+1) % 8 == 0 |
| 450 | for u in range (start, end+1): |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 451 | if u % 16 == 0: |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 452 | print () |
| 453 | print (" /* %04X */" % u, end='') |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 454 | if u in use_data: |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 455 | num += 1 |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 456 | d = use_data.get (u) |
| 457 | if d is not None: |
| 458 | d = d[0] |
| 459 | elif u in unicode_data[4]: |
| 460 | d = 'O' |
| 461 | else: |
| 462 | d = 'WJ' |
| 463 | print ("%6s," % d, end='') |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 464 | |
| 465 | total += end - start + 1 |
| 466 | used += num |
| 467 | if block: |
| 468 | last_block = block |
| 469 | |
David Corbett | c33468d | 2022-03-06 12:26:37 -0500 | [diff] [blame] | 470 | uu = sorted (use_data.keys ()) |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 471 | |
| 472 | last = -100000 |
| 473 | num = 0 |
| 474 | offset = 0 |
| 475 | starts = [] |
| 476 | ends = [] |
Behdad Esfahbod | 8874eef | 2019-01-17 15:04:44 -0500 | [diff] [blame] | 477 | print ('#pragma GCC diagnostic push') |
| 478 | print ('#pragma GCC diagnostic ignored "-Wunused-macros"') |
Behdad Esfahbod | ad72555 | 2015-07-20 17:00:06 +0100 | [diff] [blame] | 479 | for k,v in sorted(use_mapping.items()): |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 480 | if k in use_positions and use_positions[k]: continue |
Behdad Esfahbod | 3bb2653 | 2021-01-28 20:36:51 -0700 | [diff] [blame] | 481 | print ("#define %s USE(%s) /* %s */" % (k, k, v.__name__[3:])) |
Behdad Esfahbod | 44910ce | 2015-07-20 18:01:10 +0100 | [diff] [blame] | 482 | for k,v in sorted(use_positions.items()): |
| 483 | if not v: continue |
| 484 | for suf in v.keys(): |
| 485 | tag = k + suf |
Behdad Esfahbod | 3bb2653 | 2021-01-28 20:36:51 -0700 | [diff] [blame] | 486 | print ("#define %s USE(%s)" % (tag, tag)) |
Behdad Esfahbod | 8874eef | 2019-01-17 15:04:44 -0500 | [diff] [blame] | 487 | print ('#pragma GCC diagnostic pop') |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 488 | print ("") |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 489 | |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 490 | |
Behdad Esfahbod | 015aecf | 2022-07-13 12:15:01 -0600 | [diff] [blame] | 491 | import packTab |
| 492 | data = {u:v[0] for u,v in use_data.items()} |
Behdad Esfahbod | 72c4e43 | 2022-11-19 13:40:33 -0700 | [diff] [blame] | 493 | |
| 494 | DEFAULT = 5 |
| 495 | COMPACT = 9 |
| 496 | for compression in (DEFAULT, COMPACT): |
| 497 | |
| 498 | logging.info(' Compression=%d:' % compression) |
| 499 | print() |
| 500 | if compression == DEFAULT: |
| 501 | print('#ifndef HB_OPTIMIZE_SIZE') |
| 502 | elif compression == COMPACT: |
| 503 | print('#else') |
| 504 | else: |
| 505 | assert False |
| 506 | print() |
| 507 | |
| 508 | code = packTab.Code('hb_use') |
| 509 | sol = packTab.pack_table(data, compression=compression, default='O') |
| 510 | logging.info(' FullCost=%d' % (sol.fullCost)) |
| 511 | sol.genCode(code, f'get_category') |
| 512 | code.print_c(linkage='static inline') |
| 513 | print () |
| 514 | |
| 515 | print('#endif') |
Behdad Esfahbod | e2c9511 | 2015-07-20 11:32:48 +0100 | [diff] [blame] | 516 | |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 517 | print () |
Behdad Esfahbod | ad71782 | 2015-07-21 16:43:27 +0100 | [diff] [blame] | 518 | for k in sorted(use_mapping.keys()): |
| 519 | if k in use_positions and use_positions[k]: continue |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 520 | print ("#undef %s" % k) |
Behdad Esfahbod | ad71782 | 2015-07-21 16:43:27 +0100 | [diff] [blame] | 521 | for k,v in sorted(use_positions.items()): |
| 522 | if not v: continue |
| 523 | for suf in v.keys(): |
| 524 | tag = k + suf |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 525 | print ("#undef %s" % tag) |
| 526 | print () |
Behdad Esfahbod | 7aad536 | 2019-06-26 13:21:03 -0700 | [diff] [blame] | 527 | print () |
Behdad Esfahbod | 44be1e5 | 2022-06-03 02:54:33 -0600 | [diff] [blame] | 528 | print ("#endif /* HB_OT_SHAPER_USE_TABLE_HH */") |
Ebrahim Byagowi | a48dd6e | 2018-03-28 19:08:19 +0430 | [diff] [blame] | 529 | print ("/* == End of generated table == */") |