| // Copyright 2020 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package classifier |
| |
| import ( |
| "bytes" |
| "errors" |
| "io/ioutil" |
| "log" |
| "os" |
| "path" |
| "path/filepath" |
| "sort" |
| "strings" |
| "testing" |
| "testing/iotest" |
| |
| "github.com/davecgh/go-spew/spew" |
| "github.com/google/go-cmp/cmp" |
| ) |
| |
| type scenario struct { |
| expected []string |
| data []byte |
| } |
| |
| var defaultThreshold = .8 |
| var baseLicenses = "assets" |
| |
| func classifier() (*Classifier, error) { |
| c := NewClassifier(defaultThreshold) |
| return c, c.LoadLicenses(path.Join(baseLicenses)) |
| } |
| |
| func getScenarioFilenames() ([]string, error) { |
| scenarios := "scenarios" |
| var files []string |
| err := filepath.Walk(path.Join(scenarios), func(path string, info os.FileInfo, err error) error { |
| if err != nil { |
| return err |
| } |
| if strings.HasSuffix(path, "md") || info.IsDir() { |
| return nil |
| } |
| files = append(files, path) |
| return nil |
| }) |
| |
| return files, err |
| } |
| |
| func TestMatchScenarios(t *testing.T) { |
| c, err := classifier() |
| if err != nil { |
| t.Fatalf("couldn't instantiate standard test classifier: %v", err) |
| } |
| |
| files, err := getScenarioFilenames() |
| if err != nil { |
| t.Fatalf("encountered error walking scenarios directory: %v", err) |
| } |
| |
| for _, f := range files { |
| s := readScenario(f) |
| |
| m := c.Match(s.data) |
| checkMatches(t, m.Matches, f, s.expected) |
| } |
| } |
| |
| func readScenario(path string) *scenario { |
| var s scenario |
| b, err := ioutil.ReadFile(path) |
| if err != nil { |
| log.Fatalf("Couldn't read scenario %s: %v", path, err) |
| } |
| |
| // A scenario consists of any number of comment lines, which are ignored, then a line of the form |
| // EXPECTED: A,B,C |
| // |
| // or EXPECTED:<EOL> |
| // where A,B,C is a comma-separated list of expected licenses. |
| lines := strings.SplitN(string(b), "EXPECTED:", 2) |
| // The first part of lines is description, which we ignore. We then split on a linefeed to get the |
| // list of licenses and the rest of the data content. |
| lines = strings.SplitN(lines[1], "\n", 2) |
| if lines[0] != "" { |
| s.expected = strings.Split(lines[0], ",") |
| } else { |
| s.expected = []string{} |
| } |
| s.data = []byte(lines[1]) |
| return &s |
| } |
| |
| func TestContainsAndOverlaps(t *testing.T) { |
| tests := []struct { |
| name string |
| a, b *Match |
| contains bool |
| overlaps bool |
| }{ |
| { |
| name: "no intersection", |
| a: &Match{ |
| StartLine: 1, |
| EndLine: 3, |
| }, |
| b: &Match{ |
| StartLine: 4, |
| EndLine: 5, |
| }, |
| contains: false, |
| overlaps: false, |
| }, |
| { |
| name: "overlap at end", |
| a: &Match{ |
| StartLine: 4, |
| EndLine: 10, |
| }, |
| b: &Match{ |
| StartLine: 1, |
| EndLine: 5, |
| }, |
| contains: false, |
| overlaps: true, |
| }, |
| { |
| name: "overlap at end", |
| a: &Match{ |
| StartLine: 1, |
| EndLine: 10, |
| }, |
| b: &Match{ |
| StartLine: 4, |
| EndLine: 12, |
| }, |
| contains: false, |
| overlaps: true, |
| }, |
| { |
| name: "contains", |
| a: &Match{ |
| StartLine: 1, |
| EndLine: 10, |
| }, |
| b: &Match{ |
| StartLine: 4, |
| EndLine: 7, |
| }, |
| contains: true, |
| overlaps: false, |
| }, |
| } |
| |
| for _, test := range tests { |
| t.Run(test.name, func(t *testing.T) { |
| if got := contains(test.a, test.b); got != test.contains { |
| t.Errorf("contains: got %v want %v", got, test.contains) |
| } |
| if got := overlaps(test.a, test.b); got != test.overlaps { |
| t.Errorf("overlaps: got %v want %v", got, test.overlaps) |
| } |
| }) |
| } |
| } |
| |
| func TestLicName(t *testing.T) { |
| tests := []struct { |
| name string |
| expected string |
| }{ |
| { |
| // The filename for a license |
| name: "GPL-2.0.txt", |
| expected: "GPL-2.0", |
| }, |
| { |
| // The filename for a header reference to a license |
| name: "GPL-2.0.header.txt", |
| expected: "GPL-2.0", |
| }, |
| { |
| // The filename for a variant header reference to a license |
| name: "GPL-2.0.header_a.txt", |
| expected: "GPL-2.0", |
| }, |
| { |
| // The filename for a variant license body |
| name: "Apache-2.0_no_toc.txt", |
| expected: "Apache-2.0", |
| }, |
| } |
| |
| for _, test := range tests { |
| t.Run(test.name, func(t *testing.T) { |
| |
| }) |
| } |
| } |
| |
| func TestMatchFrom(t *testing.T) { |
| tr := iotest.TimeoutReader(strings.NewReader("some data")) |
| c, err := classifier() |
| if err != nil { |
| t.Fatalf("couldn't instantiate standard Google classifier: %v", err) |
| } |
| |
| _, err = c.MatchFrom(tr) |
| if !errors.Is(err, iotest.ErrTimeout) { |
| t.Errorf("got %v want %v", err, iotest.ErrTimeout) |
| } |
| |
| files, err := getScenarioFilenames() |
| |
| if err != nil { |
| t.Fatalf("encountered error walking scenarios directory: %v", err) |
| } |
| |
| for _, f := range files { |
| s := readScenario(f) |
| r := bytes.NewReader(s.data) |
| m, err := c.MatchFrom(r) |
| if err != nil { |
| t.Errorf("unexpected error: %v", err) |
| } |
| checkMatches(t, m.Matches, f, s.expected) |
| } |
| } |
| |
| // checkMatches diffs the resulting matches against the expected content and |
| // sets test results. |
| func checkMatches(t *testing.T, m Matches, f string, e []string) { |
| found := make(map[string]bool) |
| // Uniquify the licenses found |
| for _, l := range m { |
| found[l.Name] = true |
| } |
| |
| var names []string |
| for l := range found { |
| names = append(names, l) |
| } |
| sort.Strings(names) |
| |
| if len(names) != len(e) { |
| t.Errorf("Match(%q) number matches: %v, want %v: %v", f, len(names), len(e), spew.Sdump(m)) |
| return |
| } |
| |
| for i := 0; i < len(names); i++ { |
| w := strings.TrimSpace(e[i]) |
| if got, want := names[i], w; got != want { |
| t.Errorf("Match(%q) = %q, want %q", f, got, want) |
| } |
| } |
| } |
| |
| func TestLicenseName(t *testing.T) { |
| tests := []struct { |
| input string |
| want string |
| }{ |
| { |
| input: "License/example/file.txt", |
| want: "example", |
| }, |
| { |
| input: "License/example/a.txt", |
| want: "example", |
| }, |
| { |
| input: "Header/example/header.txt", |
| want: "example", |
| }, |
| { |
| input: "Header/example/a.txt", |
| want: "example", |
| }, |
| } |
| |
| for _, tt := range tests { |
| t.Run(tt.input, func(t *testing.T) { |
| got := LicenseName(tt.input) |
| if diff := cmp.Diff(tt.want, got); diff != "" { |
| t.Errorf("Unexpected result; diff %v", diff) |
| } |
| }) |
| } |
| } |
| |
| func TestNormalize(t *testing.T) { |
| tests := []struct { |
| input string |
| want string |
| }{ |
| { |
| input: "Words With Extra Spaces are flattened out, preserving case", |
| want: "Words With Extra Spaces are flattened out preserving case", |
| }, |
| { |
| input: "", |
| want: "", |
| }, |
| { |
| input: " License ", |
| want: "License", |
| }, |
| { |
| // This tests that the line breaks in the input text are properly |
| // preserved, which is important for visual diffing. |
| input: `Preserving |
| line |
| |
| breaks is important`, |
| want: `Preserving |
| line |
| |
| breaks is important`, |
| }, |
| { |
| // This tests that soft EOL functionality doesn't affect normalized output |
| input: `This is a sentence looking construct. This is another sentence. What happens?`, |
| want: `This is a sentence looking construct This is another sentence What happens`, |
| }, |
| { |
| input: `header |
| ........................ This is oddly formatted`, |
| want: `header |
| This is oddly formatted`, |
| }, |
| { |
| input: `baseball basket- |
| ball football`, |
| want: "baseball basketball\nfootball", |
| }, |
| } |
| for _, tt := range tests { |
| t.Run(tt.input, func(t *testing.T) { |
| c, err := classifier() |
| if err != nil { |
| t.Fatalf("couldn't instantiate standard Google classifier: %v", err) |
| } |
| |
| got := c.Normalize([]byte(tt.input)) |
| if diff := cmp.Diff(tt.want, string(got)); diff != "" { |
| t.Errorf("Unexpected result; diff %v", diff) |
| } |
| }) |
| } |
| |
| } |