Skip to content

Commit

Permalink
Unbag bagged SIPs, fixes #94
Browse files Browse the repository at this point in the history
Unbag bagged SIPs after validation to avoid issues with identification
and double bagging.

[skip codecov]
  • Loading branch information
djjuhasz committed Dec 13, 2024
1 parent e1b9a28 commit a682cec
Show file tree
Hide file tree
Showing 9 changed files with 392 additions and 21 deletions.
4 changes: 4 additions & 0 deletions cmd/worker/workercmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ func (m *Main) Run(ctx context.Context) error {
temporalsdk_workflow.RegisterOptions{Name: m.cfg.Temporal.WorkflowName},
)

w.RegisterActivityWithOptions(
activities.NewUnbag().Execute,
temporalsdk_activity.RegisterOptions{Name: activities.UnbagName},
)
w.RegisterActivityWithOptions(
activities.NewIdentifySIP().Execute,
temporalsdk_activity.RegisterOptions{Name: activities.IdentifySIPName},
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ require (
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.18.2
github.com/stretchr/testify v1.9.0
go.artefactual.dev/tools v0.14.0
go.artefactual.dev/tools v0.17.0
go.temporal.io/sdk v1.26.1
go.uber.org/mock v0.4.0
gocloud.dev v0.39.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.artefactual.dev/tools v0.14.0 h1:ESLbemsnkdIPmYXtz0uZTcPqVnTUXIEZd9DSTRyTZqY=
go.artefactual.dev/tools v0.14.0/go.mod h1:5RJ7ObocHZv/zQFYFv/zG9cW/UVRGPFywcJx/oQ+TG8=
go.artefactual.dev/tools v0.17.0 h1:7X/qZYKyKT8RxVjBsksqvalQ8F4wcor6jcA0ewjc92M=
go.artefactual.dev/tools v0.17.0/go.mod h1:lsu0JcKFEJanNdrf5/IFjjzxul4pazG1dDHnLX9Nkvs=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 h1:9G6E0TXzGFVfTnawRzrPl83iHOAV7L8NJiR8RSGYV1g=
Expand Down
75 changes: 75 additions & 0 deletions internal/activities/unbag.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package activities

import (
"context"
"errors"
"fmt"
"os"
"path/filepath"

"go.artefactual.dev/tools/fsutil"
)

const UnbagName = "unbag"

type (
Unbag struct{}

UnbagParams struct {
Path string
}

UnbagResult struct {
Path string
}
)

func NewUnbag() *Unbag {
return &Unbag{}
}

func (a *Unbag) Execute(ctx context.Context, params *UnbagParams) (*UnbagResult, error) {
if _, err := os.Stat(filepath.Join(params.Path, "bagit.txt")); err != nil {
// Do nothing if not a bag (bagit.txt doesn't exist).
return &UnbagResult{Path: params.Path}, nil
}
if _, err := os.Stat(filepath.Join(params.Path, "data")); err != nil {
return nil, errors.New("missing data directory")
}

entries, err := os.ReadDir(params.Path)
if err != nil {
return nil, fmt.Errorf("read dir: %v", err)
}

// Delete everything except the data directory.
for _, e := range entries {
if e.Name() != "data" {
if err := os.RemoveAll(filepath.Join(params.Path, e.Name())); err != nil {
return nil, fmt.Errorf("delete: %v", err)
}
}
}

// Move the data directory contents to the SIP root.
entries, err = os.ReadDir(filepath.Join(params.Path, "data"))
if err != nil {
return nil, fmt.Errorf("read data dir: %v", err)
}

for _, e := range entries {
if err = fsutil.Move(
filepath.Join(params.Path, "data", e.Name()),
filepath.Join(params.Path, e.Name()),
); err != nil {
return nil, fmt.Errorf("move: %v", err)
}
}

// Delete the empty data directory.
if err := os.Remove(filepath.Join(params.Path, "data")); err != nil {
return nil, fmt.Errorf("remove data dir: %v", err)
}

return &UnbagResult{Path: params.Path}, nil
}
117 changes: 117 additions & 0 deletions internal/activities/unbag_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package activities_test

import (
"testing"

temporalsdk_activity "go.temporal.io/sdk/activity"
temporalsdk_testsuite "go.temporal.io/sdk/testsuite"
"gotest.tools/v3/assert"
"gotest.tools/v3/fs"

"github.com/artefactual-sdps/preprocessing-sfa/internal/activities"
)

func TestUnbag(t *testing.T) {
t.Parallel()

tests := []struct {
name string
path string
params func(string) activities.UnbagParams
result func(string) activities.UnbagResult
wantFS fs.Manifest
wantErr string
}{
{
name: "Unbags a bag",
path: fs.NewDir(t, "enduro-test",
fs.WithDir("data",
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("additional"),
),
fs.WithFile("bagit.txt", ""),
fs.WithFile("manifest-md5.txt", ""),
).Path(),
params: func(path string) activities.UnbagParams {
return activities.UnbagParams{Path: path}
},
result: func(path string) activities.UnbagResult {
return activities.UnbagResult{Path: path}
},
wantFS: fs.Expected(t,
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("additional"),
),
},
{
name: "Does nothing when path is not a bag",
path: fs.NewDir(t, "enduro-test",
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("additional"),
).Path(),
params: func(path string) activities.UnbagParams {
return activities.UnbagParams{Path: path}
},
result: func(path string) activities.UnbagResult {
return activities.UnbagResult{Path: path}
},
wantFS: fs.Expected(t,
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("additional"),
),
},
{
name: "Errors when bag is missing data dir",
path: fs.NewDir(t, "enduro-test",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("additional"),
),
fs.WithFile("bagit.txt", ""),
).Path(),
params: func(path string) activities.UnbagParams {
return activities.UnbagParams{Path: path}
},
wantErr: "activity error (type: unbag, scheduledEventID: 0, startedEventID: 0, identity: ): missing data directory",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

ts := &temporalsdk_testsuite.WorkflowTestSuite{}
env := ts.NewTestActivityEnvironment()
env.RegisterActivityWithOptions(
activities.NewUnbag().Execute,
temporalsdk_activity.RegisterOptions{Name: activities.UnbagName},
)

var res activities.UnbagResult
future, err := env.ExecuteActivity(activities.UnbagName, tt.params(tt.path))

if tt.wantErr != "" {
if err == nil {
t.Errorf("error is nil, expecting: %q", tt.wantErr)
} else {
assert.ErrorContains(t, err, tt.wantErr)
}

return
}
assert.NilError(t, err)

future.Get(&res)
assert.DeepEqual(t, res, tt.result(tt.path))
})
}
}
24 changes: 24 additions & 0 deletions internal/localact/is_bag.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package localact

import (
"context"
"path/filepath"

"github.com/artefactual-sdps/preprocessing-sfa/internal/fsutil"
)

type (
IsBagParams struct {
Path string
}

IsBagResult struct {
IsBag bool
}
)

func IsBag(ctx context.Context, params *IsBagParams) (*IsBagResult, error) {
return &IsBagResult{
IsBag: fsutil.FileExists(filepath.Join(params.Path, "bagit.txt")),
}, nil
}
52 changes: 52 additions & 0 deletions internal/localact/is_bag_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package localact_test

import (
"testing"

temporalsdk_testsuite "go.temporal.io/sdk/testsuite"
"gotest.tools/v3/assert"
"gotest.tools/v3/fs"

"github.com/artefactual-sdps/preprocessing-sfa/internal/localact"
)

func TestIsBag(t *testing.T) {
t.Parallel()

bagPath := fs.NewDir(t, "ppsfa-test",
fs.WithFile("bagit.txt", ""),
).Path()
emptyPath := fs.NewDir(t, "ppsfa-test").Path()

type test struct {
name string
params localact.IsBagParams
want localact.IsBagResult
}
for _, tt := range []test{
{
name: "Is a bag",
params: localact.IsBagParams{Path: bagPath},
want: localact.IsBagResult{IsBag: true},
},
{
name: "Is not a bag",
params: localact.IsBagParams{Path: emptyPath},
want: localact.IsBagResult{IsBag: false},
},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

ts := &temporalsdk_testsuite.WorkflowTestSuite{}
env := ts.NewTestActivityEnvironment()

var res localact.IsBagResult
enc, err := env.ExecuteLocalActivity(localact.IsBag, &tt.params)
assert.NilError(t, err)

enc.Get(&res)
assert.DeepEqual(t, res, tt.want)
})
}
}
Loading

0 comments on commit a682cec

Please sign in to comment.