From 0b1c0c8ddf7c0f5de8e23a0b42ca2348efaaef78 Mon Sep 17 00:00:00 2001 From: Kristin Laemmert Date: Fri, 11 Oct 2024 09:55:01 -0400 Subject: [PATCH] feat(file domain): add support for reading arbitrary files as strings (#726) * feat(file domain): add support for reading arbitrary files as strings * support specifying file parser * adding some words about the file string format --- docs/reference/domains/file-domain.md | 116 ++++++++++++++---- src/pkg/common/schemas/validation.json | 26 ++++ src/pkg/domains/files/files.go | 115 ++++++++++++++--- src/pkg/domains/files/files_test.go | 4 + src/pkg/domains/files/spec.go | 5 +- src/pkg/domains/files/testdata/arbitrary.file | 1 + src/pkg/domains/files/testdata/baz | 1 + src/test/e2e/file_validation_test.go | 72 +++++------ .../component-definition-string-file.yaml | 89 ++++++++++++++ .../file-validations/pass/config.string | 2 + 10 files changed, 345 insertions(+), 86 deletions(-) create mode 100644 src/pkg/domains/files/testdata/arbitrary.file create mode 100644 src/pkg/domains/files/testdata/baz create mode 100644 src/test/e2e/scenarios/file-validations/pass/component-definition-string-file.yaml create mode 100644 src/test/e2e/scenarios/file-validations/pass/config.string diff --git a/docs/reference/domains/file-domain.md b/docs/reference/domains/file-domain.md index 25f5135d..387f39c8 100644 --- a/docs/reference/domains/file-domain.md +++ b/docs/reference/domains/file-domain.md @@ -11,33 +11,41 @@ domain: filepaths: - name: config path: grafana.ini + parser: ini # optionally specify which parser to use for the file type ``` ## Supported File Types -The file domain uses OPA's [conftest](https://conftest.dev) to parse files into a json-compatible format for validations. ∑Both OPA and Kyverno (using [kyverno-json](https://kyverno.github.io/kyverno-json/latest/)) can validate files parsed by the file domain. - -The file domain supports the following file formats for validation: -* CUE -* CycloneDX -* Dockerfile -* EDN -* Environment files (.env) -* HCL and HCL2 -* HOCON -* Ignore files (.gitignore, .dockerignore) -* INI -* JSON -* Jsonnet -* Property files (.properties) -* SPDX -* TextProto (Protocol Buffers) -* TOML -* VCL -* XML -* YAML +The file domain uses OPA's [conftest](https://conftest.dev) to parse files into a json-compatible format for validations. Both OPA and Kyverno (using [kyverno-json](https://kyverno.github.io/kyverno-json/latest/)) can validate files parsed by the file domain. + +The file domain includes the following file parsers: +* cue +* cyclonedx +* dockerfile +* dotenv +* edn +* hcl1 +* hcl2 +* hocon +* ignore +* ini +* json +* jsonc +* jsonnet +* properties +* spdx +* string +* textproto +* toml +* vcl +* xml +* yaml + +The file domain can also parse arbitrary file types as strings. The entire file contents will be represented as a single string. + +The file parser can usually be inferred from the file extension. However, if the file extension does not match the filetype you are parsing (for example, if you have a json file that does not have a `.json` extension), or if you wish to parse an arbitrary file type as a string, use the `parser` field in the FileSpec to specify which parser to use. The list above contains all the available parses. ## Validations -When writing validations against files, the filepath `Name` must be included as +When writing validations against files, the filepath `name` must be included as the top-level key in the validation. The placement varies between providers. Given the following ini file: @@ -124,5 +132,69 @@ provider: - validate.msg ``` +### Parsing files as arbitrary strings +Files that are parsed as strings are represented as a key-value pair where the key is the user-supplied file `name` and the value is a string representation of the file contexts, including special characters, for e.g. newlines (`\n`). + +As an example, let's parse a similar file as before as an arbitrary string. + +When reading the following multiline file contents as a string: +```server.txt +server = https +port = 3000 +``` + +The resources for validation will be formatted as a single string with newline characters: + +``` +{"config": "server = https\nport = 3000"} +``` + +And the following validation will confirm if the server is configured for https: +```validation.yaml + domain: + type: file + file-spec: + filepaths: + - name: 'config' + path: 'server.txt' + parser: string + provider: + type: opa + opa-spec: + rego: | + package validate + import rego.v1 + + # Default values + default validate := false + default msg := "Not evaluated" + + validate if { + check_server_protocol.result + } + msg = check_server_protocol.msg + + config := input["config"] + + check_server_protocol = {"result": true, "msg": msg} if { + regex.match( + `server = https\n`, + config + ) + msg := "Server protocol is set to https" + } else = {"result": false, "msg": msg} if { + regex.match( + `server = http\n`, + config + ) + msg := "Server Protocol must be https - http is disallowed" + } + + output: + validation: validate.validate + observations: + - validate.msg +``` + ## Note on Compose While the file domain is capable of referencing relative file paths in the `file-spec`, Lula does not de-reference those paths during composition. If you are composing multiple files together, you must either use absolute filepaths (including network filepaths), or ensure that all referenced filepaths are relative to the output directory of the compose command. diff --git a/src/pkg/common/schemas/validation.json b/src/pkg/common/schemas/validation.json index c452ab43..efabd43d 100644 --- a/src/pkg/common/schemas/validation.json +++ b/src/pkg/common/schemas/validation.json @@ -381,6 +381,32 @@ }, "path": { "type": "string" + }, + "parser": { + "type": "string", + "enum": [ + "cue", + "cyclonedx", + "dockerfile", + "edn", + "hcl1", + "hcl2", + "hocon", + "ignore", + "ini", + "json", + "jsonc", + "jsonnet", + "properties", + "spdx", + "textproto", + "toml", + "vcl", + "xml", + "yaml", + "dotenv", + "string" + ] } } } diff --git a/src/pkg/domains/files/files.go b/src/pkg/domains/files/files.go index 7886ff73..0d9959ab 100644 --- a/src/pkg/domains/files/files.go +++ b/src/pkg/domains/files/files.go @@ -37,35 +37,36 @@ func (d Domain) GetResources(ctx context.Context) (types.DomainResources, error) defer os.RemoveAll(dst) // make a map of rel filepaths to the user-supplied name, so we can re-key the DomainResources later on. - filenames := make(map[string]string, len(d.Spec.Filepaths)) + filenames := make(map[string]string, 0) + + // unstructuredFiles is used to store a list of files that Lula needs to parse. + unstructuredFiles := make([]FileInfo, 0) + filesWithParsers := make(map[string][]FileInfo, 0) // Copy files to a temporary location - for _, path := range d.Spec.Filepaths { - file := filepath.Join(workDir, path.Path) - bytes, err := network.Fetch(file) - if err != nil { - return nil, fmt.Errorf("error getting source files: %w", err) + for _, fi := range d.Spec.Filepaths { + if fi.Parser != "" { + if fi.Parser == "string" { + unstructuredFiles = append(unstructuredFiles, fi) + continue + } else { + filesWithParsers[fi.Parser] = append(filesWithParsers[fi.Parser], fi) + continue + } } - // We'll just use the filename when writing the file so it's easier to reference later - relname := filepath.Base(path.Path) - - err = os.WriteFile(filepath.Join(dst, relname), bytes, 0666) + file := filepath.Join(workDir, fi.Path) + relname, err := copyFile(dst, file) if err != nil { return nil, fmt.Errorf("error writing local files: %w", err) } + // and save this info for later - filenames[relname] = path.Name + filenames[relname] = fi.Name } // get a list of all the files we just downloaded in the temporary directory - files := make([]string, 0) - err = filepath.WalkDir(dst, func(path string, d fs.DirEntry, err error) error { - if !d.IsDir() { - files = append(files, path) - } - return nil - }) + files, err := listFiles(dst) if err != nil { return nil, fmt.Errorf("error walking downloaded file tree: %w", err) } @@ -79,7 +80,7 @@ func (d Domain) GetResources(ctx context.Context) (types.DomainResources, error) // clean up the resources so it's using the filepath.Name as the map key, // instead of the file path. - drs := make(types.DomainResources, len(config)) + drs := make(types.DomainResources, len(config)+len(unstructuredFiles)+len(filesWithParsers)) for k, v := range config { rel, err := filepath.Rel(dst, k) if err != nil { @@ -87,6 +88,58 @@ func (d Domain) GetResources(ctx context.Context) (types.DomainResources, error) } drs[filenames[rel]] = v } + + // Now for the custom parsing: user-specified parsers and string files. + + for parserName, filesByParser := range filesWithParsers { + // make a sub directory by parser name + parserDir, err := os.MkdirTemp(dst, parserName) + if err != nil { + return nil, err + } + + for _, fi := range filesByParser { + file := filepath.Join(workDir, fi.Path) + relname, err := copyFile(parserDir, file) + if err != nil { + return nil, fmt.Errorf("error writing local files: %w", err) + } + + // and save this info for later + filenames[relname] = fi.Name + } + + // get a list of all the files we just downloaded in the temporary directory + files, err := listFiles(parserDir) + if err != nil { + return nil, fmt.Errorf("error walking downloaded file tree: %w", err) + } + + parsedConfig, err := parser.ParseConfigurationsAs(files, parserName) + if err != nil { + return nil, err + } + + for k, v := range parsedConfig { + rel, err := filepath.Rel(parserDir, k) + if err != nil { + return nil, fmt.Errorf("error determining relative file path: %w", err) + } + drs[filenames[rel]] = v + } + } + + // add the string form of the unstructured files + for _, f := range unstructuredFiles { + // we don't need to copy these files, we'll just slurp the contents into + // a string and append that as one big DomainResource + b, err := os.ReadFile(filepath.Join(workDir, f.Path)) + if err != nil { + return nil, fmt.Errorf("error reading source files: %w", err) + } + drs[f.Name] = string(b) + } + return drs, nil } @@ -103,3 +156,27 @@ func CreateDomain(spec *Spec) (types.Domain, error) { } return Domain{spec}, nil } + +// copyFile is a helper function that copies a file from source to dst, and returns the relative file path between the two. +func copyFile(dst string, src string) (string, error) { + bytes, err := network.Fetch(src) + if err != nil { + return "", fmt.Errorf("error getting source files: %w", err) + } + + // We'll use the filename when writing the file so it's easier to reference later + relname := filepath.Base(src) + + return relname, os.WriteFile(filepath.Join(dst, relname), bytes, 0666) +} + +func listFiles(dir string) ([]string, error) { + files := make([]string, 0) + err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error { + if !d.IsDir() { + files = append(files, path) + } + return nil + }) + return files, err +} diff --git a/src/pkg/domains/files/files_test.go b/src/pkg/domains/files/files_test.go index 2824fa0d..5537e77c 100644 --- a/src/pkg/domains/files/files_test.go +++ b/src/pkg/domains/files/files_test.go @@ -16,7 +16,9 @@ func TestGetResource(t *testing.T) { d := Domain{Spec: &Spec{Filepaths: []FileInfo{ {Name: "foo.yaml", Path: "foo.yaml"}, {Name: "bar.json", Path: "bar.json"}, + {Name: "baz", Path: "baz", Parser: "json"}, {Name: "arbitraryname", Path: "nested-directory/baz.hcl2"}, + {Name: "stringtheory", Path: "arbitrary.file", Parser: "string"}, }}} resources, err := d.GetResources(context.WithValue(context.Background(), types.LulaValidationWorkDir, "testdata")) @@ -24,9 +26,11 @@ func TestGetResource(t *testing.T) { if diff := cmp.Diff(resources, types.DomainResources{ "bar.json": map[string]interface{}{"cat": "Cheetarah"}, "foo.yaml": "cat = Li Shou", + "baz": map[string]interface{}{"lizard": "Snakob"}, "arbitraryname": map[string]any{ "resource": map[string]any{"catname": map[string]any{"blackcat": map[string]any{"name": "robin"}}}, }, + "stringtheory": "hello there!", }); diff != "" { t.Fatalf("wrong result:\n%s\n", diff) } diff --git a/src/pkg/domains/files/spec.go b/src/pkg/domains/files/spec.go index 45dd78d0..d245b659 100644 --- a/src/pkg/domains/files/spec.go +++ b/src/pkg/domains/files/spec.go @@ -5,6 +5,7 @@ type Spec struct { } type FileInfo struct { - Name string `json:"name" yaml:"name"` - Path string `json:"path" yaml:"path"` + Name string `json:"name" yaml:"name"` + Path string `json:"path" yaml:"path"` + Parser string `json:"parser,omitempty" yaml:"parser,omitempty"` } diff --git a/src/pkg/domains/files/testdata/arbitrary.file b/src/pkg/domains/files/testdata/arbitrary.file new file mode 100644 index 00000000..f01621db --- /dev/null +++ b/src/pkg/domains/files/testdata/arbitrary.file @@ -0,0 +1 @@ +hello there! \ No newline at end of file diff --git a/src/pkg/domains/files/testdata/baz b/src/pkg/domains/files/testdata/baz new file mode 100644 index 00000000..d5fd94d2 --- /dev/null +++ b/src/pkg/domains/files/testdata/baz @@ -0,0 +1 @@ +{ "lizard": "Snakob"} \ No newline at end of file diff --git a/src/test/e2e/file_validation_test.go b/src/test/e2e/file_validation_test.go index 275378a4..d8e78ae3 100644 --- a/src/test/e2e/file_validation_test.go +++ b/src/test/e2e/file_validation_test.go @@ -2,10 +2,13 @@ package test import ( "context" + "fmt" "testing" "github.com/defenseunicorns/lula/src/cmd/validate" "github.com/defenseunicorns/lula/src/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestFileValidation(t *testing.T) { @@ -26,9 +29,7 @@ func TestFileValidation(t *testing.T) { } result := assessment.Results[0] - if result.Findings == nil { - t.Fatal("Expected findings to be not nil") - } + assert.NotNil(t, result, "Expected findings to be not nil") for _, finding := range *result.Findings { state := finding.Target.Status.State @@ -40,78 +41,63 @@ func TestFileValidation(t *testing.T) { t.Run("success - kyverno", func(t *testing.T) { ctx := context.WithValue(context.Background(), types.LulaValidationWorkDir, passDir) assessment, err := validate.ValidateOnPath(ctx, passDir+kyvernoFile, "") - if err != nil { - t.Fatal(err) - } + assert.NoError(t, err) + assert.NotEmpty(t, assessment.Results, "Expected greater than zero results") - if len(assessment.Results) == 0 { - t.Fatal("Expected greater than zero results") + result := assessment.Results[0] + assert.NotNil(t, result, "Expected findings to be not nil") + + for _, finding := range *result.Findings { + state := finding.Target.Status.State + assert.Equal(t, "satisfied", state, fmt.Sprintf("State should be satisfied, but got %s", state)) } + }) + t.Run("success - arbitrary file contexnts", func(t *testing.T) { + ctx := context.WithValue(context.Background(), types.LulaValidationWorkDir, passDir) + assessment, err := validate.ValidateOnPath(ctx, passDir+"/component-definition-string-file.yaml", "") + assert.NoError(t, err) + assert.NotEmpty(t, assessment.Results, "Expected greater than zero results") result := assessment.Results[0] - if result.Findings == nil { - t.Fatal("Expected findings to be not nil") - } + assert.NotNil(t, result, "Expected findings to be not nil") for _, finding := range *result.Findings { state := finding.Target.Status.State - if state != "satisfied" { - t.Fatal("State should be satisfied, but got :", state) - } + assert.Equal(t, "satisfied", state, fmt.Sprintf("State should be satisfied, but got %s", state)) } }) t.Run("fail - opa", func(t *testing.T) { ctx := context.WithValue(context.Background(), types.LulaValidationWorkDir, failDir) assessment, err := validate.ValidateOnPath(ctx, failDir+oscalFile, "") - if err != nil { - t.Fatal(err) - } - - if len(assessment.Results) == 0 { - t.Fatal("Expected greater than zero results") - } + assert.NoError(t, err) + assert.NotEmpty(t, assessment.Results, "Expected greater than zero results") result := assessment.Results[0] - if result.Findings == nil { - t.Fatal("Expected findings to be not nil") - } + assert.NotNil(t, result, "Expected findings to be not nil") for _, finding := range *result.Findings { state := finding.Target.Status.State - if state != "not-satisfied" { - t.Fatal("State should be non-satisfied, but got :", state) - } + assert.Equal(t, "not-satisfied", state, fmt.Sprintf("State should not be satisfied, but got %s", state)) } }) t.Run("fail - kyverno", func(t *testing.T) { ctx := context.WithValue(context.Background(), types.LulaValidationWorkDir, failDir) assessment, err := validate.ValidateOnPath(ctx, failDir+kyvernoFile, "") - if err != nil { - t.Fatal(err) - } - - if len(assessment.Results) == 0 { - t.Fatal("Expected greater than zero results") - } + assert.NoError(t, err) + assert.NotEmpty(t, assessment.Results, "Expected greater than zero results") result := assessment.Results[0] - if result.Findings == nil { - t.Fatal("Expected findings to be not nil") - } + assert.NotNil(t, result, "Expected findings to be not nil") for _, finding := range *result.Findings { state := finding.Target.Status.State - if state != "not-satisfied" { - t.Fatal("State should be non-satisfied, but got :", state) - } + assert.Equal(t, "not-satisfied", state, fmt.Sprintf("State should not be satisfied, but got %s", state)) } }) t.Run("invalid input", func(t *testing.T) { ctx := context.WithValue(context.Background(), types.LulaValidationWorkDir, "scenarios/file-validations/invalid") _, err := validate.ValidateOnPath(ctx, "scenarios/file-validations/invalid/oscal-component.yaml", "") - if err == nil { - t.Fatal("expected error, got success") - } + require.Error(t, err) }) } diff --git a/src/test/e2e/scenarios/file-validations/pass/component-definition-string-file.yaml b/src/test/e2e/scenarios/file-validations/pass/component-definition-string-file.yaml new file mode 100644 index 00000000..4ca4fca8 --- /dev/null +++ b/src/test/e2e/scenarios/file-validations/pass/component-definition-string-file.yaml @@ -0,0 +1,89 @@ +component-definition: + uuid: E6A291A4-2BC8-43A0-B4B2-FD67CAAE1F8F + metadata: + title: OSCAL Demo Tool + last-modified: "2022-09-13T12:00:00Z" + version: "20220913" + oscal-version: 1.1.1 + parties: + # Should be consistent across all of the packages, but where is ground truth? + - uuid: C18F4A9F-A402-415B-8D13-B51739D689FF + type: organization + name: Defense Unicorns + links: + - href: /~https://github.com/defenseunicorns/lula + rel: website + components: + - uuid: A9D5204C-7E5B-4C43-BD49-34DF759B9F04 + type: software + title: lula + description: | + Defense Unicorns lula + purpose: Validate compliance controls + responsible-roles: + - role-id: provider + party-uuids: + - C18F4A9F-A402-415B-8D13-B51739D689FF # matches parties entry for Defense Unicorns + control-implementations: + - uuid: A584FEDC-8CEA-4B0C-9F07-85C2C4AE751A + source: /~https://github.com/defenseunicorns/lula + description: Validate generic security requirements + implemented-requirements: + - uuid: 42C2FFDC-5F05-44DF-A67F-EEC8660AEFFD + control-id: ID-1 + description: >- + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum + dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + links: + - href: "#88AB3470-B96B-4D7C-BC36-02BF9563C46C" + rel: lula + back-matter: + resources: + - uuid: 88AB3470-B96B-4D7C-BC36-02BF9563C46C + rlinks: + - href: lula + description: | + domain: + type: file + file-spec: + filepaths: + - Name: config + path: config.string + parser: string + provider: + type: opa + opa-spec: + rego: | + package validate + import rego.v1 + + # Default values + default validate := false + default msg := "Not evaluated" + + validate if { + check_server_protocol.result + } + msg = check_server_protocol.msg + + config := input["config"] + + check_server_protocol = {"result": true, "msg": msg} if { + regex.match( + `server = https\n`, + config + ) + msg := "Server protocol is set to https" + } else = {"result": false, "msg": msg} if { + regex.match( + `server = http\n`, + config + ) + msg := "Server Protocol must be https - http is disallowed" + } + + output: + validation: validate.validate + observations: + - validate.msg \ No newline at end of file diff --git a/src/test/e2e/scenarios/file-validations/pass/config.string b/src/test/e2e/scenarios/file-validations/pass/config.string new file mode 100644 index 00000000..800fb7cb --- /dev/null +++ b/src/test/e2e/scenarios/file-validations/pass/config.string @@ -0,0 +1,2 @@ +server = https +something = else \ No newline at end of file