| from test.test_support import run_unittest, open_urlresource |
| import unittest |
| |
| from httplib import HTTPException |
| import sys |
| import os |
| from unicodedata import normalize, unidata_version |
| |
| TESTDATAFILE = "NormalizationTest.txt" |
| TESTDATAURL = "http://www.unicode.org/Public/" + unidata_version + "/ucd/" + TESTDATAFILE |
| |
| def check_version(testfile): |
| hdr = testfile.readline() |
| return unidata_version in hdr |
| |
| class RangeError(Exception): |
| pass |
| |
| def NFC(str): |
| return normalize("NFC", str) |
| |
| def NFKC(str): |
| return normalize("NFKC", str) |
| |
| def NFD(str): |
| return normalize("NFD", str) |
| |
| def NFKD(str): |
| return normalize("NFKD", str) |
| |
| def unistr(data): |
| data = [int(x, 16) for x in data.split(" ")] |
| for x in data: |
| if x > sys.maxunicode: |
| raise RangeError |
| return u"".join([unichr(x) for x in data]) |
| |
| class NormalizationTest(unittest.TestCase): |
| def test_main(self): |
| part = None |
| part1_data = {} |
| # Hit the exception early |
| try: |
| testdata = open_urlresource(TESTDATAURL, check_version) |
| except (IOError, HTTPException): |
| self.skipTest("Could not retrieve " + TESTDATAURL) |
| for line in testdata: |
| if '#' in line: |
| line = line.split('#')[0] |
| line = line.strip() |
| if not line: |
| continue |
| if line.startswith("@Part"): |
| part = line.split()[0] |
| continue |
| try: |
| c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] |
| except RangeError: |
| # Skip unsupported characters; |
| # try atleast adding c1 if we are in part1 |
| if part == "@Part1": |
| try: |
| c1 = unistr(line.split(';')[0]) |
| except RangeError: |
| pass |
| else: |
| part1_data[c1] = 1 |
| continue |
| |
| # Perform tests |
| self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) |
| self.assertTrue(c4 == NFC(c4) == NFC(c5), line) |
| self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) |
| self.assertTrue(c5 == NFD(c4) == NFD(c5), line) |
| self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \ |
| NFKC(c3) == NFKC(c4) == NFKC(c5), |
| line) |
| self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \ |
| NFKD(c3) == NFKD(c4) == NFKD(c5), |
| line) |
| |
| # Record part 1 data |
| if part == "@Part1": |
| part1_data[c1] = 1 |
| |
| # Perform tests for all other data |
| for c in range(sys.maxunicode+1): |
| X = unichr(c) |
| if X in part1_data: |
| continue |
| self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) |
| |
| def test_bug_834676(self): |
| # Check for bug 834676 |
| normalize('NFC', u'\ud55c\uae00') |
| |
| |
| def test_main(): |
| run_unittest(NormalizationTest) |
| |
| if __name__ == "__main__": |
| test_main() |