| // Copyright 2006 The RE2 Authors. All Rights Reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Test character class manipulations. |
| |
| #include <stdio.h> |
| |
| #include "util/test.h" |
| #include "util/utf.h" |
| #include "re2/regexp.h" |
| |
| namespace re2 { |
| |
| struct CCTest { |
| struct { |
| Rune lo; |
| Rune hi; |
| } add[10]; |
| int remove; |
| struct { |
| Rune lo; |
| Rune hi; |
| } final[10]; |
| }; |
| |
| static CCTest tests[] = { |
| { { { 10, 20 }, {-1} }, -1, |
| { { 10, 20 }, {-1} } }, |
| |
| { { { 10, 20 }, { 20, 30 }, {-1} }, -1, |
| { { 10, 30 }, {-1} } }, |
| |
| { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1, |
| { { 10, 40 }, {-1} } }, |
| |
| { { { 0, 50 }, { 20, 30 }, {-1} }, -1, |
| { { 0, 50 }, {-1} } }, |
| |
| { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1, |
| { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, |
| |
| { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1, |
| { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, |
| |
| { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1, |
| { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, |
| |
| { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1, |
| { { 5, 25 }, {-1} } }, |
| |
| { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1, |
| { { 10, 23 }, {-1} } }, |
| |
| // These check boundary cases during negation. |
| { { { 0, Runemax }, {-1} }, -1, |
| { { 0, Runemax }, {-1} } }, |
| |
| { { { 0, 50 }, {-1} }, -1, |
| { { 0, 50 }, {-1} } }, |
| |
| { { { 50, Runemax }, {-1} }, -1, |
| { { 50, Runemax }, {-1} } }, |
| |
| // Check RemoveAbove. |
| { { { 50, Runemax }, {-1} }, 255, |
| { { 50, 255 }, {-1} } }, |
| |
| { { { 50, Runemax }, {-1} }, 65535, |
| { { 50, 65535 }, {-1} } }, |
| |
| { { { 50, Runemax }, {-1} }, Runemax, |
| { { 50, Runemax }, {-1} } }, |
| |
| { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255, |
| { { 50, 60 }, { 250, 255 }, {-1} } }, |
| |
| { { { 50, 60 }, {-1} }, 255, |
| { { 50, 60 }, {-1} } }, |
| |
| { { { 350, 360 }, {-1} }, 255, |
| { {-1} } }, |
| |
| { { {-1} }, 255, |
| { {-1} } }, |
| }; |
| |
| template<class CharClass> |
| static void Broke(const char *desc, const CCTest* t, CharClass* cc) { |
| if (t == NULL) { |
| printf("\t%s:", desc); |
| } else { |
| printf("\n"); |
| printf("CharClass added: [%s]", desc); |
| for (int k = 0; t->add[k].lo >= 0; k++) |
| printf(" %d-%d", t->add[k].lo, t->add[k].hi); |
| printf("\n"); |
| if (t->remove >= 0) |
| printf("Removed > %d\n", t->remove); |
| printf("\twant:"); |
| for (int k = 0; t->final[k].lo >= 0; k++) |
| printf(" %d-%d", t->final[k].lo, t->final[k].hi); |
| printf("\n"); |
| printf("\thave:"); |
| } |
| |
| for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it) |
| printf(" %d-%d", it->lo, it->hi); |
| printf("\n"); |
| } |
| |
| bool ShouldContain(CCTest *t, int x) { |
| for (int j = 0; t->final[j].lo >= 0; j++) |
| if (t->final[j].lo <= x && x <= t->final[j].hi) |
| return true; |
| return false; |
| } |
| |
| // Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder. |
| |
| CharClass* Negate(CharClass *cc) { |
| return cc->Negate(); |
| } |
| |
| void Delete(CharClass* cc) { |
| cc->Delete(); |
| } |
| |
| CharClassBuilder* Negate(CharClassBuilder* cc) { |
| CharClassBuilder* ncc = cc->Copy(); |
| ncc->Negate(); |
| return ncc; |
| } |
| |
| void Delete(CharClassBuilder* cc) { |
| delete cc; |
| } |
| |
| template<class CharClass> |
| bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) { |
| typename CharClass::iterator it = cc->begin(); |
| int size = 0; |
| for (int j = 0; t->final[j].lo >= 0; j++, ++it) { |
| if (it == cc->end() || |
| it->lo != t->final[j].lo || |
| it->hi != t->final[j].hi) { |
| Broke(desc, t, cc); |
| return false; |
| } |
| size += it->hi - it->lo + 1; |
| } |
| if (it != cc->end()) { |
| Broke(desc, t, cc); |
| return false; |
| } |
| if (cc->size() != size) { |
| Broke(desc, t, cc); |
| printf("wrong size: want %d have %d\n", size, cc->size()); |
| return false; |
| } |
| |
| for (int j = 0; j < 101; j++) { |
| if (j == 100) |
| j = Runemax; |
| if (ShouldContain(t, j) != cc->Contains(j)) { |
| Broke(desc, t, cc); |
| printf("want contains(%d)=%d, got %d\n", |
| j, ShouldContain(t, j), cc->Contains(j)); |
| return false; |
| } |
| } |
| |
| CharClass* ncc = Negate(cc); |
| for (int j = 0; j < 101; j++) { |
| if (j == 100) |
| j = Runemax; |
| if (ShouldContain(t, j) == ncc->Contains(j)) { |
| Broke(desc, t, cc); |
| Broke("ncc", NULL, ncc); |
| printf("want ncc contains(%d)!=%d, got %d\n", |
| j, ShouldContain(t, j), ncc->Contains(j)); |
| Delete(ncc); |
| return false; |
| } |
| if (ncc->size() != Runemax+1 - cc->size()) { |
| Broke(desc, t, cc); |
| Broke("ncc", NULL, ncc); |
| printf("ncc size should be %d is %d\n", |
| Runemax+1 - cc->size(), ncc->size()); |
| Delete(ncc); |
| return false; |
| } |
| } |
| Delete(ncc); |
| return true; |
| } |
| |
| TEST(TestCharClassBuilder, Adds) { |
| int nfail = 0; |
| for (int i = 0; i < arraysize(tests); i++) { |
| CharClassBuilder ccb; |
| CCTest* t = &tests[i]; |
| for (int j = 0; t->add[j].lo >= 0; j++) |
| ccb.AddRange(t->add[j].lo, t->add[j].hi); |
| if (t->remove >= 0) |
| ccb.RemoveAbove(t->remove); |
| if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)")) |
| nfail++; |
| CharClass* cc = ccb.GetCharClass(); |
| if (!CorrectCC(cc, t, "before copy (CharClass)")) |
| nfail++; |
| cc->Delete(); |
| |
| CharClassBuilder *ccb1 = ccb.Copy(); |
| if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)")) |
| nfail++; |
| cc = ccb.GetCharClass(); |
| if (!CorrectCC(cc, t, "after copy (CharClass)")) |
| nfail++; |
| cc->Delete(); |
| delete ccb1; |
| } |
| EXPECT_EQ(nfail, 0); |
| } |
| |
| } // namespace re2 |