diff --git a/fileutils/fileutils.go b/fileutils/fileutils.go index 08b9840..3fdf692 100644 --- a/fileutils/fileutils.go +++ b/fileutils/fileutils.go @@ -6,7 +6,9 @@ import ( "io" "os" "path/filepath" + "regexp" "strings" + "text/scanner" "github.com/Sirupsen/logrus" ) @@ -92,15 +94,15 @@ func OptimizedMatches(file string, patterns []string, patDirs [][]string) (bool, pattern = pattern[1:] } - match, err := filepath.Match(pattern, file) + match, err := regexpMatch(pattern, file) if err != nil { - return false, err + return false, fmt.Errorf("Error in pattern (%s): %s", pattern, err) } if !match && parentPath != "." { // Check to see if the pattern matches one of our parent dirs. if len(patDirs[i]) <= len(parentPathDirs) { - match, _ = filepath.Match(strings.Join(patDirs[i], "/"), + match, _ = regexpMatch(strings.Join(patDirs[i], "/"), strings.Join(parentPathDirs[:len(patDirs[i])], "/")) } } @@ -117,6 +119,99 @@ func OptimizedMatches(file string, patterns []string, patDirs [][]string) (bool, return matched, nil } +// regexpMatch tries to match the logic of filepath.Match but +// does so using regexp logic. We do this so that we can expand the +// wildcard set to include other things, like "**" to mean any number +// of directories. This means that we should be backwards compatible +// with filepath.Match(). We'll end up supporting more stuff, due to +// the fact that we're using regexp, but that's ok - it does no harm. +func regexpMatch(pattern, path string) (bool, error) { + regStr := "^" + + // Do some syntax checking on the pattern. + // filepath's Match() has some really weird rules that are inconsistent + // so instead of trying to dup their logic, just call Match() for its + // error state and if there is an error in the pattern return it. + // If this becomes an issue we can remove this since its really only + // needed in the error (syntax) case - which isn't really critical. + if _, err := filepath.Match(pattern, path); err != nil { + return false, err + } + + // Go through the pattern and convert it to a regexp. + // We use a scanner so we can support utf-8 chars. + var scan scanner.Scanner + scan.Init(strings.NewReader(pattern)) + + sl := string(os.PathSeparator) + escSL := sl + if sl == `\` { + escSL += `\` + } + + for scan.Peek() != scanner.EOF { + ch := scan.Next() + + if ch == '*' { + if scan.Peek() == '*' { + // is some flavor of "**" + scan.Next() + + if scan.Peek() == scanner.EOF { + // is "**EOF" - to align with .gitignore just accept all + regStr += ".*" + } else { + // is "**" + regStr += "((.*" + escSL + ")|([^" + escSL + "]*))" + } + + // Treat **/ as ** so eat the "/" + if string(scan.Peek()) == sl { + scan.Next() + } + } else { + // is "*" so map it to anything but "/" + regStr += "[^" + escSL + "]*" + } + } else if ch == '?' { + // "?" is any char except "/" + regStr += "[^" + escSL + "]" + } else if strings.Index(".$", string(ch)) != -1 { + // Escape some regexp special chars that have no meaning + // in golang's filepath.Match + regStr += `\` + string(ch) + } else if ch == '\\' { + // escape next char. Note that a trailing \ in the pattern + // will be left alone (but need to escape it) + if sl == `\` { + // On windows map "\" to "\\", meaning an escaped backslash, + // and then just continue because filepath.Match on + // Windows doesn't allow escaping at all + regStr += escSL + continue + } + if scan.Peek() != scanner.EOF { + regStr += `\` + string(scan.Next()) + } else { + regStr += `\` + } + } else { + regStr += string(ch) + } + } + + regStr += "$" + + res, err := regexp.MatchString(regStr, path) + + // Map regexp's error to filepath's so no one knows we're not using filepath + if err != nil { + err = filepath.ErrBadPattern + } + + return res, err +} + // CopyFile copies from src to dst until either EOF is reached // on src or an error occurs. It verifies src exists and remove // the dst if it exists. diff --git a/fileutils/fileutils_test.go b/fileutils/fileutils_test.go index b544ffb..2d584c6 100644 --- a/fileutils/fileutils_test.go +++ b/fileutils/fileutils_test.go @@ -5,6 +5,8 @@ import ( "os" "path" "path/filepath" + "runtime" + "strings" "testing" ) @@ -297,6 +299,84 @@ func TestMatchesWithMalformedPatterns(t *testing.T) { } } +// Test lots of variants of patterns & strings +func TestMatches(t *testing.T) { + tests := []struct { + pattern string + text string + pass bool + }{ + {"**", "file", true}, + {"**", "file/", true}, + {"**/", "file", true}, // weird one + {"**/", "file/", true}, + {"**", "/", true}, + {"**/", "/", true}, + {"**", "dir/file", true}, + {"**/", "dir/file", false}, + {"**", "dir/file/", true}, + {"**/", "dir/file/", true}, + {"**/**", "dir/file", true}, + {"**/**", "dir/file/", true}, + {"dir/**", "dir/file", true}, + {"dir/**", "dir/file/", true}, + {"dir/**", "dir/dir2/file", true}, + {"dir/**", "dir/dir2/file/", true}, + {"**/dir2/*", "dir/dir2/file", true}, + {"**/dir2/*", "dir/dir2/file/", false}, + {"**/dir2/**", "dir/dir2/dir3/file", true}, + {"**/dir2/**", "dir/dir2/dir3/file/", true}, + {"**file", "file", true}, + {"**file", "dir/file", true}, + {"**/file", "dir/file", true}, + {"**file", "dir/dir/file", true}, + {"**/file", "dir/dir/file", true}, + {"**/file*", "dir/dir/file", true}, + {"**/file*", "dir/dir/file.txt", true}, + {"**/file*txt", "dir/dir/file.txt", true}, + {"**/file*.txt", "dir/dir/file.txt", true}, + {"**/file*.txt*", "dir/dir/file.txt", true}, + {"**/**/*.txt", "dir/dir/file.txt", true}, + {"**/**/*.txt2", "dir/dir/file.txt", false}, + {"**/*.txt", "file.txt", true}, + {"**/**/*.txt", "file.txt", true}, + {"a**/*.txt", "a/file.txt", true}, + {"a**/*.txt", "a/dir/file.txt", true}, + {"a**/*.txt", "a/dir/dir/file.txt", true}, + {"a/*.txt", "a/dir/file.txt", false}, + {"a/*.txt", "a/file.txt", true}, + {"a/*.txt**", "a/file.txt", true}, + {"a[b-d]e", "ae", false}, + {"a[b-d]e", "ace", true}, + {"a[b-d]e", "aae", false}, + {"a[^b-d]e", "aze", true}, + {".*", ".foo", true}, + {".*", "foo", false}, + {"abc.def", "abcdef", false}, + {"abc.def", "abc.def", true}, + {"abc.def", "abcZdef", false}, + {"abc?def", "abcZdef", true}, + {"abc?def", "abcdef", false}, + {"a\\*b", "a*b", true}, + {"a\\", "a", false}, + {"a\\", "a\\", false}, + {"a\\\\", "a\\", true}, + {"**/foo/bar", "foo/bar", true}, + {"**/foo/bar", "dir/foo/bar", true}, + {"**/foo/bar", "dir/dir2/foo/bar", true}, + {"abc/**", "abc", false}, + {"abc/**", "abc/def", true}, + {"abc/**", "abc/def/ghi", true}, + } + + for _, test := range tests { + res, _ := regexpMatch(test.pattern, test.text) + if res != test.pass { + t.Fatalf("Failed: %v - res:%v", test, res) + } + } +} + // An empty string should return true from Empty. func TestEmpty(t *testing.T) { empty := empty("") @@ -400,3 +480,94 @@ func TestCreateIfNotExistsFile(t *testing.T) { t.Fatalf("Should have been a file, seems it's not") } } + +// These matchTests are stolen from go's filepath Match tests. +type matchTest struct { + pattern, s string + match bool + err error +} + +var matchTests = []matchTest{ + {"abc", "abc", true, nil}, + {"*", "abc", true, nil}, + {"*c", "abc", true, nil}, + {"a*", "a", true, nil}, + {"a*", "abc", true, nil}, + {"a*", "ab/c", false, nil}, + {"a*/b", "abc/b", true, nil}, + {"a*/b", "a/c/b", false, nil}, + {"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil}, + {"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil}, + {"a*b?c*x", "abxbbxdbxebxczzx", true, nil}, + {"a*b?c*x", "abxbbxdbxebxczzy", false, nil}, + {"ab[c]", "abc", true, nil}, + {"ab[b-d]", "abc", true, nil}, + {"ab[e-g]", "abc", false, nil}, + {"ab[^c]", "abc", false, nil}, + {"ab[^b-d]", "abc", false, nil}, + {"ab[^e-g]", "abc", true, nil}, + {"a\\*b", "a*b", true, nil}, + {"a\\*b", "ab", false, nil}, + {"a?b", "a☺b", true, nil}, + {"a[^a]b", "a☺b", true, nil}, + {"a???b", "a☺b", false, nil}, + {"a[^a][^a][^a]b", "a☺b", false, nil}, + {"[a-ζ]*", "α", true, nil}, + {"*[a-ζ]", "A", false, nil}, + {"a?b", "a/b", false, nil}, + {"a*b", "a/b", false, nil}, + {"[\\]a]", "]", true, nil}, + {"[\\-]", "-", true, nil}, + {"[x\\-]", "x", true, nil}, + {"[x\\-]", "-", true, nil}, + {"[x\\-]", "z", false, nil}, + {"[\\-x]", "x", true, nil}, + {"[\\-x]", "-", true, nil}, + {"[\\-x]", "a", false, nil}, + {"[]a]", "]", false, filepath.ErrBadPattern}, + {"[-]", "-", false, filepath.ErrBadPattern}, + {"[x-]", "x", false, filepath.ErrBadPattern}, + {"[x-]", "-", false, filepath.ErrBadPattern}, + {"[x-]", "z", false, filepath.ErrBadPattern}, + {"[-x]", "x", false, filepath.ErrBadPattern}, + {"[-x]", "-", false, filepath.ErrBadPattern}, + {"[-x]", "a", false, filepath.ErrBadPattern}, + {"\\", "a", false, filepath.ErrBadPattern}, + {"[a-b-c]", "a", false, filepath.ErrBadPattern}, + {"[", "a", false, filepath.ErrBadPattern}, + {"[^", "a", false, filepath.ErrBadPattern}, + {"[^bc", "a", false, filepath.ErrBadPattern}, + {"a[", "a", false, filepath.ErrBadPattern}, // was nil but IMO its wrong + {"a[", "ab", false, filepath.ErrBadPattern}, + {"*x", "xxx", true, nil}, +} + +func errp(e error) string { + if e == nil { + return "" + } + return e.Error() +} + +// TestMatch test's our version of filepath.Match, called regexpMatch. +func TestMatch(t *testing.T) { + for _, tt := range matchTests { + pattern := tt.pattern + s := tt.s + if runtime.GOOS == "windows" { + if strings.Index(pattern, "\\") >= 0 { + // no escape allowed on windows. + continue + } + pattern = filepath.Clean(pattern) + s = filepath.Clean(s) + } + ok, err := regexpMatch(pattern, s) + if ok != tt.match || err != tt.err { + t.Fatalf("Match(%#q, %#q) = %v, %q want %v, %q", pattern, s, ok, errp(err), tt.match, errp(tt.err)) + } + } +}