From efcd575893d0bd923a799408c1131d27dbb3123f Mon Sep 17 00:00:00 2001 From: Jeff Whiting Date: Wed, 2 Sep 2020 14:44:40 -0600 Subject: [PATCH 1/3] Implementation of a pluggable regex engine. --- README.md | 57 +++++++++++++++++++++++++---- regexpProvider.go | 32 +++++++++++++++++ regexpProvider_test.go | 81 ++++++++++++++++++++++++++++++++++++++++++ schema.go | 10 +++--- schemaLoader.go | 12 ++++--- schemaLoader_test.go | 22 +++++++++++- subSchema.go | 13 ++++--- validation.go | 7 ++-- 8 files changed, 210 insertions(+), 24 deletions(-) create mode 100644 regexpProvider.go create mode 100644 regexpProvider_test.go diff --git a/README.md b/README.md index 758f26d..7da802c 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ It's also possible to pass a `ReferenceLoader` to the `Compile` function that re ```go err = sl.AddSchemas(loader3) schema, err := sl.Compile(gojsonschema.NewReferenceLoader("http://some_host.com/main.json")) -``` +``` Schemas added by `AddSchema` and `AddSchemas` are only validated when the entire schema is compiled, unless meta-schema validation is used. @@ -211,7 +211,7 @@ If autodetection is on (default), a draft-07 schema can savely reference draft-0 ## Meta-schema validation Schemas that are added using the `AddSchema`, `AddSchemas` and `Compile` can be validated against their meta-schema by setting the `Validate` property. -The following example will produce an error as `multipleOf` must be a number. If `Validate` is off (default), this error is only returned at the `Compile` step. +The following example will produce an error as `multipleOf` must be a number. If `Validate` is off (default), this error is only returned at the `Compile` step. ```go sl := gojsonschema.NewSchemaLoader() @@ -222,7 +222,7 @@ err := sl.AddSchemas(gojsonschema.NewStringLoader(`{ "multipleOf" : true }`)) ``` -``` +``` ``` Errors returned by meta-schema validation are more readable and contain more information, which helps significantly if you are developing a schema. @@ -237,7 +237,7 @@ The library handles string error codes which you can customize by creating your gojsonschema.Locale = YourCustomLocale{} ``` -However, each error contains additional contextual information. +However, each error contains additional contextual information. Newer versions of `gojsonschema` may have new additional errors, so code that uses a custom locale will need to be updated when this happens. @@ -341,7 +341,7 @@ Not all formats defined in draft-07 are available. Implemented formats are: * `json-pointer` * `relative-json-pointer` -`email`, `uri` and `uri-reference` use the same validation code as their unicode counterparts `idn-email`, `iri` and `iri-reference`. If you rely on unicode support you should use the specific +`email`, `uri` and `uri-reference` use the same validation code as their unicode counterparts `idn-email`, `iri` and `iri-reference`. If you rely on unicode support you should use the specific unicode enabled formats for the sake of interoperability as other implementations might not support unicode in the regular formats. The validation code for `uri`, `idn-email` and their relatives use mostly standard library code. @@ -452,13 +452,58 @@ func main() { } return result, err - } ``` This is especially useful if you want to add validation beyond what the json schema drafts can provide such business specific logic. +## Custom regular expression implemenation +By default this libary uses Go's builtin [regexp](https://golang.org/pkg/regexp/) package which uses the +[RE2](/~https://github.com/google/re2/wiki/Syntax) engine that is not [ECMA262](http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf) compatible. + +The regular expression library can be changed by implementing [RegexpProvider](regexpProvider.go) interface using your preferred regular expression implemenation, and setting `SchemaLoader.RegexpProvider`. + +```go +import "github.com/dlclark/regexp2" + +type Regexp2Provider struct { +} + +func (Regexp2Provider) Compile(expr string) (CompiledRegexp, error) { + return regexp2.Compile(expr) +} + +sl := gojsonschema.NewSchemaLoader() +sl.RegexpProvider = Regexp2Provider{} +loader := gojsonschema.NewStringLoader(`{ "type" : "string", "pattern": "(?=foo)bar" }`) +schema, err := sl.Compile(loader) +``` + +Note the `regex` `FormatChecker` will still use `RE2` unless it is replaced. +```go +import "github.com/dlclark/regexp2" + +Regex2FormatChecker struct{} + +// IsFormat checks if input is a correctly formatted regular expression +func (f Regex2FormatChecker) IsFormat(input interface{}) bool { + asString, ok := input.(string) + if !ok { + return true + } + + if asString == "" { + return true + } + _, err := regexp2.Compile(asString) + return err == nil +} + +//replace golang regexp format checker with regexp2 +gojsonschema.FormatCheckers.Add("regex", Regex2FormatChecker{}) +``` + ## Uses gojsonschema uses the following test suite : diff --git a/regexpProvider.go b/regexpProvider.go new file mode 100644 index 0000000..6b532a4 --- /dev/null +++ b/regexpProvider.go @@ -0,0 +1,32 @@ +package gojsonschema + +import ( + "regexp" +) + +var ( + defaultRegexProvider = golangRegexpProvider{} +) + +// RegexpProvider An interface to a regex implementation +type RegexpProvider interface { + // Compile Compiles an expression and returns a CompiledRegexp + Compile(expr string) (CompiledRegexp, error) +} + +// CompiledRegexp A compiled expression +type CompiledRegexp interface { + // MatchString Tests if the string matches the compiled expression + MatchString(s string) bool +} + +type golangRegexpProvider struct { +} + +func (golangRegexpProvider) Compile(expr string) (CompiledRegexp, error) { + return regexp.Compile(expr) +} + +func getDefaultRegexpProvider() RegexpProvider { + return defaultRegexProvider +} diff --git a/regexpProvider_test.go b/regexpProvider_test.go new file mode 100644 index 0000000..4501eda --- /dev/null +++ b/regexpProvider_test.go @@ -0,0 +1,81 @@ +// Copyright 2018 johandorland ( /~https://github.com/johandorland ) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gojsonschema + +import ( + "regexp" + "testing" + + "github.com/stretchr/testify/assert" +) + +type customCompiled struct { + pattern *regexp.Regexp + expr string + matchStringCalled int +} + +func (cc *customCompiled) MatchString(s string) bool { + cc.matchStringCalled++ + return cc.pattern.MatchString(s) +} + +type customRegexpProvider struct { + compileCalled int + compiledRegexp map[string]*customCompiled +} + +func (c *customRegexpProvider) Compile(expr string) (CompiledRegexp, error) { + c.compileCalled++ + pattern, err := regexp.Compile(expr) + if err != nil { + return nil, err + } + cc := &customCompiled{ + pattern: pattern, + expr: expr, + } + if c.compiledRegexp == nil { + c.compiledRegexp = make(map[string]*customCompiled) + } + c.compiledRegexp[expr] = cc + return cc, nil +} + +func TestCustomRegexpProvider(t *testing.T) { + // Verify that the RegexpProvider is used + loader := NewStringLoader(`{ + "patternProperties": { + "f.*o": {"type": "integer"}, + "b.*r": {"type": "string", "pattern": "^a*$"} + } + }`) + + sl := NewSchemaLoader() + customRegexpProvider := &customRegexpProvider{} + sl.RegexpProvider = customRegexpProvider + d, err := sl.Compile(loader) + assert.Nil(t, err) + assert.NotNil(t, d.regexp) + + loader = NewStringLoader(`{"foo": 1, "foooooo" : 2, "bar": "a", "baaaar": "aaaa"}`) + r, err := d.Validate(loader) + assert.Nil(t, err) + assert.Empty(t, r.errors) + assert.Equal(t, 3, customRegexpProvider.compileCalled) + assert.Equal(t, 4, customRegexpProvider.compiledRegexp["f.*o"].matchStringCalled) + assert.Equal(t, 4, customRegexpProvider.compiledRegexp["b.*r"].matchStringCalled) + assert.Equal(t, 2, customRegexpProvider.compiledRegexp["^a*$"].matchStringCalled) +} diff --git a/schema.go b/schema.go index 9e93cd7..74c4364 100644 --- a/schema.go +++ b/schema.go @@ -30,7 +30,6 @@ import ( "errors" "math/big" "reflect" - "regexp" "text/template" "github.com/xeipuuv/gojsonreference" @@ -56,6 +55,7 @@ type Schema struct { rootSchema *subSchema pool *schemaPool referencePool *schemaReferencePool + regexp RegexpProvider } func (d *Schema) parse(document interface{}, draft Draft) error { @@ -320,9 +320,9 @@ func (d *Schema) parseSchema(documentNode interface{}, currentSchema *subSchema) if isKind(m[KEY_PATTERN_PROPERTIES], reflect.Map) { patternPropertiesMap := m[KEY_PATTERN_PROPERTIES].(map[string]interface{}) if len(patternPropertiesMap) > 0 { - currentSchema.patternProperties = make(map[string]*subSchema) + currentSchema.patternProperties = make(map[string]*patternProperties) for k, v := range patternPropertiesMap { - _, err := regexp.MatchString(k, "") + pattern, err := d.regexp.Compile(k) if err != nil { return errors.New(formatErrorDescription( Locale.RegexPattern(), @@ -334,7 +334,7 @@ func (d *Schema) parseSchema(documentNode interface{}, currentSchema *subSchema) if err != nil { return errors.New(err.Error()) } - currentSchema.patternProperties[k] = newSchema + currentSchema.patternProperties[k] = &patternProperties{schema: newSchema, pattern: pattern} } } } else { @@ -652,7 +652,7 @@ func (d *Schema) parseSchema(documentNode interface{}, currentSchema *subSchema) if existsMapKey(m, KEY_PATTERN) { if isKind(m[KEY_PATTERN], reflect.String) { - regexpObject, err := regexp.Compile(m[KEY_PATTERN].(string)) + regexpObject, err := d.regexp.Compile(m[KEY_PATTERN].(string)) if err != nil { return errors.New(formatErrorDescription( Locale.MustBeValidRegex(), diff --git a/schemaLoader.go b/schemaLoader.go index 20db0c1..50a487c 100644 --- a/schemaLoader.go +++ b/schemaLoader.go @@ -23,10 +23,11 @@ import ( // SchemaLoader is used to load schemas type SchemaLoader struct { - pool *schemaPool - AutoDetect bool - Validate bool - Draft Draft + pool *schemaPool + AutoDetect bool + Validate bool + Draft Draft + RegexpProvider RegexpProvider } // NewSchemaLoader creates a new NewSchemaLoader @@ -153,6 +154,9 @@ func (sl *SchemaLoader) Compile(rootSchema JSONLoader) (*Schema, error) { } d := Schema{} + if d.regexp = sl.RegexpProvider; d.regexp == nil { + d.regexp = getDefaultRegexpProvider() + } d.pool = sl.pool d.pool.jsonLoaderFactory = rootSchema.LoaderFactory() d.documentReference = ref diff --git a/schemaLoader_test.go b/schemaLoader_test.go index 96adfca..4d0558a 100644 --- a/schemaLoader_test.go +++ b/schemaLoader_test.go @@ -15,9 +15,10 @@ package gojsonschema import ( - "github.com/stretchr/testify/require" "testing" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" ) @@ -174,3 +175,22 @@ func TestParseSchemaURL_NotMap(t *testing.T) { require.Error(t, err) assert.EqualError(t, err, "schema is invalid") } + +func TestDefaultRegexpProvider(t *testing.T) { + //Verify that when no RegexpProvider is set, the default Regexp Provider is used + loader := NewStringLoader(`{ + "patternProperties": { + "f.*o": {"type": "integer"}, + "b.*r": {"type": "string", "pattern": "^a*$"} + } + }`) + + d, err := NewSchema(loader) + assert.Nil(t, err) + assert.NotNil(t, d.regexp) + + loader = NewStringLoader(`{"foo": 1, "foooooo" : 2, "bar": "a", "baaaar": "aaaa"}`) + r, err := d.Validate(loader) + assert.Nil(t, err) + assert.Empty(t, r.errors) +} diff --git a/subSchema.go b/subSchema.go index ec77981..8e59880 100644 --- a/subSchema.go +++ b/subSchema.go @@ -27,9 +27,9 @@ package gojsonschema import ( - "github.com/xeipuuv/gojsonreference" "math/big" - "regexp" + + "github.com/xeipuuv/gojsonreference" ) // Constants @@ -76,6 +76,11 @@ const ( KEY_ELSE = "else" ) +type patternProperties struct { + schema *subSchema + pattern CompiledRegexp +} + type subSchema struct { draft *Draft @@ -113,7 +118,7 @@ type subSchema struct { // validation : string minLength *int maxLength *int - pattern *regexp.Regexp + pattern CompiledRegexp format string // validation : object @@ -123,7 +128,7 @@ type subSchema struct { dependencies map[string]interface{} additionalProperties interface{} - patternProperties map[string]*subSchema + patternProperties map[string]*patternProperties propertyNames *subSchema // validation : array diff --git a/validation.go b/validation.go index 9081bd9..b19aece 100644 --- a/validation.go +++ b/validation.go @@ -29,7 +29,6 @@ import ( "encoding/json" "math/big" "reflect" - "regexp" "strconv" "strings" "unicode/utf8" @@ -687,11 +686,11 @@ func (v *subSchema) validatePatternProperty(currentSubSchema *subSchema, key str validated := false - for pk, pv := range currentSubSchema.patternProperties { - if matches, _ := regexp.MatchString(pk, key); matches { + for _, pp := range currentSubSchema.patternProperties { + if matches := pp.pattern.MatchString(key); matches { validated = true subContext := NewJsonContext(key, context) - validationResult := pv.subValidateWithContext(value, subContext) + validationResult := pp.schema.subValidateWithContext(value, subContext) result.mergeErrors(validationResult) } } From a2f941e28386d77d7f96a4f00611c5b44eaad803 Mon Sep 17 00:00:00 2001 From: Jeff Whiting Date: Wed, 2 Sep 2020 14:48:02 -0600 Subject: [PATCH 2/3] Removed unnecessary backticks --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 7da802c..ea7e977 100644 --- a/README.md +++ b/README.md @@ -222,8 +222,6 @@ err := sl.AddSchemas(gojsonschema.NewStringLoader(`{ "multipleOf" : true }`)) ``` -``` - ``` Errors returned by meta-schema validation are more readable and contain more information, which helps significantly if you are developing a schema. From 8fa64bcf3742850fd241c3e5f6f2c63be2120129 Mon Sep 17 00:00:00 2001 From: Jeff Whiting Date: Wed, 2 Sep 2020 15:33:08 -0600 Subject: [PATCH 3/3] Updated the readme with working code. --- README.md | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ea7e977..3737164 100644 --- a/README.md +++ b/README.md @@ -465,13 +465,27 @@ The regular expression library can be changed by implementing [RegexpProvider](r ```go import "github.com/dlclark/regexp2" -type Regexp2Provider struct { +type regexp2Provider struct{} + +type regexp2CompiledRegexp struct { + compiled *regexp2.Regexp } -func (Regexp2Provider) Compile(expr string) (CompiledRegexp, error) { - return regexp2.Compile(expr) +func (c regexp2CompiledRegexp) MatchString(s string) bool { + if matched, err := c.compiled.MatchString(s); err != nil { + return false + } else { + return matched + } } +func (regexp2Provider) Compile(expr string) (gojsonschema.CompiledRegexp, error) { + if compiled, err := regexp2.Compile(expr, 0); err != nil { + return nil, err + } else { + return regexp2CompiledRegexp{compiled}, nil + } +} sl := gojsonschema.NewSchemaLoader() sl.RegexpProvider = Regexp2Provider{} loader := gojsonschema.NewStringLoader(`{ "type" : "string", "pattern": "(?=foo)bar" }`)