From cf5b96b4404776898b343791d3a8e8f35c57d1d5 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Fri, 3 Sep 2021 16:04:11 +0200 Subject: [PATCH 01/17] Implement Walk method to recursively walk into a container --- pkg/storage/utils/walk/walk.go | 86 ++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 pkg/storage/utils/walk/walk.go diff --git a/pkg/storage/utils/walk/walk.go b/pkg/storage/utils/walk/walk.go new file mode 100644 index 0000000000..d431b7a208 --- /dev/null +++ b/pkg/storage/utils/walk/walk.go @@ -0,0 +1,86 @@ +package walk + +import ( + "context" + "path/filepath" + + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/cs3org/reva/pkg/errtypes" +) + +type WalkFunc func(path string, info *provider.ResourceInfo, err error) error + +func Walk(ctx context.Context, root string, gtw gateway.GatewayAPIClient, fn WalkFunc) error { + info, err := stat(ctx, root, gtw) + + if err != nil { + return fn(root, nil, err) + } + + err = walkRecursively(ctx, root, info, gtw, fn) + + if err == filepath.SkipDir { + return nil + } + + return err +} + +func walkRecursively(ctx context.Context, path string, info *provider.ResourceInfo, gtw gateway.GatewayAPIClient, fn WalkFunc) error { + + if info.Type != provider.ResourceType_RESOURCE_TYPE_CONTAINER { + return fn(path, info, nil) + } + + list, err := readDir(ctx, path, gtw) + errFn := fn(path, info, err) + + if err != nil || errFn != nil { + return errFn + } + + for _, file := range list { + err = walkRecursively(ctx, file.Path, file, gtw, fn) + if err != nil && (file.Type != provider.ResourceType_RESOURCE_TYPE_CONTAINER || err != filepath.SkipDir) { + return err + } + } + + return nil +} + +func readDir(ctx context.Context, path string, gtw gateway.GatewayAPIClient) ([]*provider.ResourceInfo, error) { + resp, err := gtw.ListContainer(ctx, &provider.ListContainerRequest{ + Ref: &provider.Reference{ + Path: path, + }, + }) + + switch { + case err != nil: + return nil, err + case resp.Status.Code != rpc.Code_CODE_OK: + return nil, errtypes.InternalError(resp.Status.Message) + } + + return resp.Infos, nil +} + +func stat(ctx context.Context, path string, gtw gateway.GatewayAPIClient) (*provider.ResourceInfo, error) { + resp, err := gtw.Stat(ctx, &provider.StatRequest{ + Ref: &provider.Reference{ + Path: path, + }, + }) + + switch { + case err != nil: + return nil, err + case resp.Status.Code != rpc.Code_CODE_OK: + return nil, errtypes.InternalError(resp.Status.Message) + } + + return resp.Info, nil +} From 0c952bc8d59b6d5e65eaa8c5d51e34e6298ba417 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Fri, 3 Sep 2021 16:17:19 +0200 Subject: [PATCH 02/17] Implement createTar to generate a tar file given a list of files/folders --- internal/http/services/archiver/archiver.go | 163 ++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 internal/http/services/archiver/archiver.go diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go new file mode 100644 index 0000000000..a785b6855b --- /dev/null +++ b/internal/http/services/archiver/archiver.go @@ -0,0 +1,163 @@ +// Copyright 2018-2021 CERN +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// In applying this license, CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +package archiver + +import ( + "archive/tar" + "context" + "io" + "net/http" + "time" + + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/cs3org/reva/internal/http/services/datagateway" + "github.com/cs3org/reva/pkg/errtypes" + "github.com/cs3org/reva/pkg/rhttp" + "github.com/cs3org/reva/pkg/rhttp/global" + "github.com/cs3org/reva/pkg/storage/utils/walk" + "github.com/rs/zerolog" +) + +type svc struct { + config *Config + httpClient *http.Client + gtwClient gateway.GatewayAPIClient +} + +// Config holds the config options that need to be passed down to all ocdav handlers +type Config struct { + Prefix string `mapstructure:"prefix"` + GatewaySvc string `mapstructure:"gatewaysvc"` +} + +func init() { + global.Register("archiver", New) +} + +func New(conf map[string]interface{}, log *zerolog.Logger) (global.Service, error) { + return nil, nil +} + +func (s *svc) Handler() http.Handler { + return nil +} + +func (s *svc) Prefix() string { + return s.config.Prefix +} + +func (s *svc) Close() error { + return nil +} + +func (s *svc) Unprotected() []string { + return nil +} + +func (s *svc) createTar(ctx context.Context, files []string, dst io.Writer) error { + w := tar.NewWriter(dst) + + for _, root := range files { + + err := walk.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error { + if err != nil { + return err + } + + tarHeader := tar.Header{ + Name: path, + ModTime: time.Unix(int64(info.Mtime.Seconds), 0), + } + + isDir := info.Type == provider.ResourceType_RESOURCE_TYPE_CONTAINER + + if isDir { + // the resource is a folder + tarHeader.Mode = 0755 + tarHeader.Typeflag = tar.TypeDir + } else { + tarHeader.Mode = 0644 + tarHeader.Typeflag = tar.TypeReg + tarHeader.Size = int64(info.Size) + } + + err = w.WriteHeader(&tarHeader) + if err != nil { + return err + } + + if !isDir { + err = s.downloadFile(ctx, path, w) + if err != nil { + return err + } + } + return nil + }) + + if err != nil { + return err + } + + } + return nil +} + +func (s *svc) downloadFile(ctx context.Context, path string, dst io.Writer) error { + downReq, err := s.gtwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{ + Ref: &provider.Reference{ + Path: path, + }, + }) + + switch { + case err != nil: + return err + case downReq.Status.Code != rpc.Code_CODE_OK: + return errtypes.InternalError(downReq.Status.Message) + } + + var endpoint, token string + for _, p := range downReq.Protocols { + if p.Protocol == "simple" { + endpoint, token = p.DownloadEndpoint, p.Token + } + } + + httpReq, err := rhttp.NewRequest(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return err + } + httpReq.Header.Set(datagateway.TokenTransportHeader, token) + + httpRes, err := s.httpClient.Do(httpReq) + if err != nil { + return err + } + defer httpRes.Body.Close() + + if httpRes.StatusCode != http.StatusOK { + return errtypes.InternalError(httpRes.Status) + } + + _, err = io.Copy(dst, httpRes.Body) + return err +} From fd5eb9439d8685d73744de2a7cccbe281f3408eb Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Fri, 3 Sep 2021 16:39:37 +0200 Subject: [PATCH 03/17] Implement createZip to generate a zip file given a list of files/folders --- internal/http/services/archiver/archiver.go | 58 ++++++++++++++++++--- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index a785b6855b..6ccca53e13 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -20,6 +20,7 @@ package archiver import ( "archive/tar" + "archive/zip" "context" "io" "net/http" @@ -82,7 +83,7 @@ func (s *svc) createTar(ctx context.Context, files []string, dst io.Writer) erro return err } - tarHeader := tar.Header{ + header := tar.Header{ Name: path, ModTime: time.Unix(int64(info.Mtime.Seconds), 0), } @@ -91,15 +92,15 @@ func (s *svc) createTar(ctx context.Context, files []string, dst io.Writer) erro if isDir { // the resource is a folder - tarHeader.Mode = 0755 - tarHeader.Typeflag = tar.TypeDir + header.Mode = 0755 + header.Typeflag = tar.TypeDir } else { - tarHeader.Mode = 0644 - tarHeader.Typeflag = tar.TypeReg - tarHeader.Size = int64(info.Size) + header.Mode = 0644 + header.Typeflag = tar.TypeReg + header.Size = int64(info.Size) } - err = w.WriteHeader(&tarHeader) + err = w.WriteHeader(&header) if err != nil { return err } @@ -121,6 +122,49 @@ func (s *svc) createTar(ctx context.Context, files []string, dst io.Writer) erro return nil } +func (s *svc) createZip(ctx context.Context, files []string, dst io.Writer) error { + w := zip.NewWriter(dst) + + for _, root := range files { + + err := walk.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error { + if err != nil { + return err + } + + header := zip.FileHeader{ + Name: path, + Modified: time.Unix(int64(info.Mtime.Seconds), 0), + } + + isDir := info.Type == provider.ResourceType_RESOURCE_TYPE_CONTAINER + + if isDir { + header.Name += "/" + } + + dst, err := w.CreateHeader(&header) + if err != nil { + return err + } + + if !isDir { + err = s.downloadFile(ctx, path, dst) + if err != nil { + return err + } + } + return nil + }) + + if err != nil { + return err + } + + } + return nil +} + func (s *svc) downloadFile(ctx context.Context, path string, dst io.Writer) error { downReq, err := s.gtwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{ Ref: &provider.Reference{ From aafc8b7a6ec0490647e3c312db1c0b09924a1550 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Fri, 3 Sep 2021 18:18:13 +0200 Subject: [PATCH 04/17] Implement service handler --- internal/http/services/archiver/archiver.go | 85 ++++++++++++++++++++- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index 6ccca53e13..6f7003fb10 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -22,8 +22,11 @@ import ( "archive/tar" "archive/zip" "context" + "fmt" "io" "net/http" + "path" + "strings" "time" gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" @@ -31,9 +34,11 @@ import ( provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" "github.com/cs3org/reva/internal/http/services/datagateway" "github.com/cs3org/reva/pkg/errtypes" + "github.com/cs3org/reva/pkg/rgrpc/todo/pool" "github.com/cs3org/reva/pkg/rhttp" "github.com/cs3org/reva/pkg/rhttp/global" "github.com/cs3org/reva/pkg/storage/utils/walk" + "github.com/mitchellh/mapstructure" "github.com/rs/zerolog" ) @@ -47,6 +52,8 @@ type svc struct { type Config struct { Prefix string `mapstructure:"prefix"` GatewaySvc string `mapstructure:"gatewaysvc"` + Timeout int64 `mapstructure:"timeout"` + Insecure bool `mapstructure:"insecure"` } func init() { @@ -54,11 +61,83 @@ func init() { } func New(conf map[string]interface{}, log *zerolog.Logger) (global.Service, error) { - return nil, nil + c := &Config{} + err := mapstructure.Decode(conf, c) + if err != nil { + return nil, err + } + + c.init() + + gtw, err := pool.GetGatewayServiceClient(c.GatewaySvc) + if err != nil { + return nil, err + } + + return &svc{ + config: c, + gtwClient: gtw, + httpClient: rhttp.GetHTTPClient( + rhttp.Timeout(time.Duration(c.Timeout*int64(time.Second))), + rhttp.Insecure(c.Insecure), + ), + }, nil +} + +func (c *Config) init() { + if c.Prefix == "" { + c.Prefix = "download_archive" + } } func (s *svc) Handler() http.Handler { - return nil + return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + // get the dir and files to archive from the URL + ctx := r.Context() + v := r.URL.Query() + if _, ok := v["dir"]; !ok { + rw.WriteHeader(http.StatusBadRequest) + return + } + dir := v["dir"][0] + + names, ok := v["files"] + if !ok { + names = []string{} + } + + // append to the files name the dir + files := []string{} + for _, f := range names { + files = append(files, path.Join(dir, f)) + } + + archiveName := "download" + if len(files) == 0 { + // we need to archive the whole dir + files = append(files, dir) + archiveName = dir + } + + ua := r.Header.Get("User-Agent") + isWindows := strings.Contains(ua, "Windows") + + rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", archiveName)) + rw.Header().Set("Content-Transfer-Encoding", "binary") + rw.WriteHeader(http.StatusOK) + + var err error + if isWindows { + err = s.createZip(ctx, files, rw) + } else { + err = s.createTar(ctx, files, rw) + } + if err != nil { + rw.WriteHeader(http.StatusInternalServerError) + return + } + + }) } func (s *svc) Prefix() string { @@ -141,6 +220,8 @@ func (s *svc) createZip(ctx context.Context, files []string, dst io.Writer) erro if isDir { header.Name += "/" + } else { + header.UncompressedSize64 = info.Size } dst, err := w.CreateHeader(&header) From 14a6c1fe447ed38c501299f18415c3c7a859e760 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 14:38:13 +0200 Subject: [PATCH 05/17] Add check on NotFound code after Stat and ListContainer calls --- pkg/storage/utils/walk/walk.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg/storage/utils/walk/walk.go b/pkg/storage/utils/walk/walk.go index d431b7a208..0ccaf3a7d1 100644 --- a/pkg/storage/utils/walk/walk.go +++ b/pkg/storage/utils/walk/walk.go @@ -2,11 +2,13 @@ package walk import ( "context" + "fmt" "path/filepath" gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/cs3org/reva/pkg/errtypes" ) @@ -61,8 +63,10 @@ func readDir(ctx context.Context, path string, gtw gateway.GatewayAPIClient) ([] switch { case err != nil: return nil, err + case resp.Status.Code == rpc.Code_CODE_NOT_FOUND: + return nil, errtypes.NotFound(path) case resp.Status.Code != rpc.Code_CODE_OK: - return nil, errtypes.InternalError(resp.Status.Message) + return nil, errtypes.InternalError(fmt.Sprintf("error reading dir %s", path)) } return resp.Infos, nil @@ -78,8 +82,10 @@ func stat(ctx context.Context, path string, gtw gateway.GatewayAPIClient) (*prov switch { case err != nil: return nil, err + case resp.Status.Code == rpc.Code_CODE_NOT_FOUND: + return nil, errtypes.NotFound(path) case resp.Status.Code != rpc.Code_CODE_OK: - return nil, errtypes.InternalError(resp.Status.Message) + return nil, errtypes.InternalError(fmt.Sprintf("error getting stats from %s", path)) } return resp.Info, nil From 9f46ca582bfcf2441355b3b2e7cf68e4d1f64b59 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 14:38:27 +0200 Subject: [PATCH 06/17] Load "archiver" service --- internal/http/services/loader/loader.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/http/services/loader/loader.go b/internal/http/services/loader/loader.go index 73ac5e2719..07edbc6aec 100644 --- a/internal/http/services/loader/loader.go +++ b/internal/http/services/loader/loader.go @@ -21,6 +21,7 @@ package loader import ( // Load core HTTP services _ "github.com/cs3org/reva/internal/http/services/appprovider" + _ "github.com/cs3org/reva/internal/http/services/archiver" _ "github.com/cs3org/reva/internal/http/services/datagateway" _ "github.com/cs3org/reva/internal/http/services/dataprovider" _ "github.com/cs3org/reva/internal/http/services/helloworld" From 31162eb484221d489cc6b46d48397eaea158614f Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 14:41:32 +0200 Subject: [PATCH 07/17] Some fixes in zip and tar creation --- internal/http/services/archiver/archiver.go | 50 +++++++++++++-------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index 6f7003fb10..8067d48f2a 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -38,6 +38,8 @@ import ( "github.com/cs3org/reva/pkg/rhttp" "github.com/cs3org/reva/pkg/rhttp/global" "github.com/cs3org/reva/pkg/storage/utils/walk" + "github.com/gdexlab/go-render/render" + ua "github.com/mileusna/useragent" "github.com/mitchellh/mapstructure" "github.com/rs/zerolog" ) @@ -46,6 +48,7 @@ type svc struct { config *Config httpClient *http.Client gtwClient gateway.GatewayAPIClient + log *zerolog.Logger } // Config holds the config options that need to be passed down to all ocdav handlers @@ -81,6 +84,7 @@ func New(conf map[string]interface{}, log *zerolog.Logger) (global.Service, erro rhttp.Timeout(time.Duration(c.Timeout*int64(time.Second))), rhttp.Insecure(c.Insecure), ), + log: log, }, nil } @@ -101,7 +105,7 @@ func (s *svc) Handler() http.Handler { } dir := v["dir"][0] - names, ok := v["files"] + names, ok := v["file"] if !ok { names = []string{} } @@ -109,28 +113,30 @@ func (s *svc) Handler() http.Handler { // append to the files name the dir files := []string{} for _, f := range names { - files = append(files, path.Join(dir, f)) + p := path.Join(dir, f) + files = append(files, strings.TrimSuffix(p, "/")) } archiveName := "download" if len(files) == 0 { // we need to archive the whole dir files = append(files, dir) - archiveName = dir + archiveName = path.Base(dir) } - ua := r.Header.Get("User-Agent") - isWindows := strings.Contains(ua, "Windows") + s.log.Debug().Msg("Requested the following files/folders to archive: " + render.Render(files)) + + userAgent := ua.Parse(r.Header.Get("User-Agent")) rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", archiveName)) rw.Header().Set("Content-Transfer-Encoding", "binary") rw.WriteHeader(http.StatusOK) var err error - if isWindows { - err = s.createZip(ctx, files, rw) + if userAgent.OS == ua.Windows { + err = s.createZip(ctx, dir, files, rw) } else { - err = s.createTar(ctx, files, rw) + err = s.createTar(ctx, dir, files, rw) } if err != nil { rw.WriteHeader(http.StatusInternalServerError) @@ -152,7 +158,7 @@ func (s *svc) Unprotected() []string { return nil } -func (s *svc) createTar(ctx context.Context, files []string, dst io.Writer) error { +func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io.Writer) error { w := tar.NewWriter(dst) for _, root := range files { @@ -162,8 +168,10 @@ func (s *svc) createTar(ctx context.Context, files []string, dst io.Writer) erro return err } + fileName := strings.TrimPrefix(path, dir) + header := tar.Header{ - Name: path, + Name: fileName, ModTime: time.Unix(int64(info.Mtime.Seconds), 0), } @@ -198,10 +206,10 @@ func (s *svc) createTar(ctx context.Context, files []string, dst io.Writer) erro } } - return nil + return w.Close() } -func (s *svc) createZip(ctx context.Context, files []string, dst io.Writer) error { +func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io.Writer) error { w := zip.NewWriter(dst) for _, root := range files { @@ -211,8 +219,14 @@ func (s *svc) createZip(ctx context.Context, files []string, dst io.Writer) erro return err } + fileName := strings.TrimPrefix(strings.Trim(path, dir), "/") + + if fileName == "" { + return nil + } + header := zip.FileHeader{ - Name: path, + Name: fileName, Modified: time.Unix(int64(info.Mtime.Seconds), 0), } @@ -243,11 +257,11 @@ func (s *svc) createZip(ctx context.Context, files []string, dst io.Writer) erro } } - return nil + return w.Close() } func (s *svc) downloadFile(ctx context.Context, path string, dst io.Writer) error { - downReq, err := s.gtwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{ + downResp, err := s.gtwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{ Ref: &provider.Reference{ Path: path, }, @@ -256,12 +270,12 @@ func (s *svc) downloadFile(ctx context.Context, path string, dst io.Writer) erro switch { case err != nil: return err - case downReq.Status.Code != rpc.Code_CODE_OK: - return errtypes.InternalError(downReq.Status.Message) + case downResp.Status.Code != rpc.Code_CODE_OK: + return errtypes.InternalError(downResp.Status.Message) } var endpoint, token string - for _, p := range downReq.Protocols { + for _, p := range downResp.Protocols { if p.Protocol == "simple" { endpoint, token = p.DownloadEndpoint, p.Token } From 630df03f446ad5e4470176029d5afda315c14ffb Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 14:43:07 +0200 Subject: [PATCH 08/17] Get gateway address from shared config --- internal/http/services/archiver/archiver.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index 8067d48f2a..959bd787fb 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -37,6 +37,7 @@ import ( "github.com/cs3org/reva/pkg/rgrpc/todo/pool" "github.com/cs3org/reva/pkg/rhttp" "github.com/cs3org/reva/pkg/rhttp/global" + "github.com/cs3org/reva/pkg/sharedconf" "github.com/cs3org/reva/pkg/storage/utils/walk" "github.com/gdexlab/go-render/render" ua "github.com/mileusna/useragent" @@ -92,6 +93,8 @@ func (c *Config) init() { if c.Prefix == "" { c.Prefix = "download_archive" } + + c.GatewaySvc = sharedconf.GetGatewaySVC(c.GatewaySvc) } func (s *svc) Handler() http.Handler { From c8f92c0fd4a4055bb9381c7371b64d23c832c420 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 17:42:01 +0200 Subject: [PATCH 09/17] Add MaxNumFiles and MaxSize to the service config in order to limit the dimensions of the requested archive --- internal/http/services/archiver/archiver.go | 44 ++++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index 959bd787fb..8138bb102d 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -54,12 +54,19 @@ type svc struct { // Config holds the config options that need to be passed down to all ocdav handlers type Config struct { - Prefix string `mapstructure:"prefix"` - GatewaySvc string `mapstructure:"gatewaysvc"` - Timeout int64 `mapstructure:"timeout"` - Insecure bool `mapstructure:"insecure"` + Prefix string `mapstructure:"prefix"` + GatewaySvc string `mapstructure:"gatewaysvc"` + Timeout int64 `mapstructure:"timeout"` + Insecure bool `mapstructure:"insecure"` + MaxNumFiles int64 `mapstructure:"max_num_files"` + MaxSize int64 `mapstructure:"max_size"` } +var ( + errMaxFileCount = errtypes.InternalError("reached max files count") + errMaxSize = errtypes.InternalError("reached max total files size") +) + func init() { global.Register("archiver", New) } @@ -133,7 +140,6 @@ func (s *svc) Handler() http.Handler { rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", archiveName)) rw.Header().Set("Content-Transfer-Encoding", "binary") - rw.WriteHeader(http.StatusOK) var err error if userAgent.OS == ua.Windows { @@ -141,7 +147,13 @@ func (s *svc) Handler() http.Handler { } else { err = s.createTar(ctx, dir, files, rw) } + if err == errMaxFileCount || err == errMaxSize { + s.log.Error().Msg(err.Error()) + rw.WriteHeader(http.StatusRequestEntityTooLarge) + return + } if err != nil { + s.log.Error().Msg(err.Error()) rw.WriteHeader(http.StatusInternalServerError) return } @@ -164,6 +176,8 @@ func (s *svc) Unprotected() []string { func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io.Writer) error { w := tar.NewWriter(dst) + var filesCount, sizeFiles int64 + for _, root := range files { err := walk.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error { @@ -171,6 +185,15 @@ func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io. return err } + filesCount += 1 + if filesCount > s.config.MaxNumFiles { + return errMaxFileCount + } + sizeFiles += int64(info.Size) + if sizeFiles > s.config.MaxSize { + return errMaxSize + } + fileName := strings.TrimPrefix(path, dir) header := tar.Header{ @@ -215,6 +238,8 @@ func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io. func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io.Writer) error { w := zip.NewWriter(dst) + var filesCount, sizeFiles int64 + for _, root := range files { err := walk.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error { @@ -222,6 +247,15 @@ func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io. return err } + filesCount += 1 + if filesCount > s.config.MaxNumFiles { + return errMaxFileCount + } + sizeFiles += int64(info.Size) + if sizeFiles > s.config.MaxSize { + return errMaxSize + } + fileName := strings.TrimPrefix(strings.Trim(path, dir), "/") if fileName == "" { From c8982947a1d7802cf5891123fd8dbbe74c899518 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 17:44:47 +0200 Subject: [PATCH 10/17] Add some comments --- internal/http/services/archiver/archiver.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index 8138bb102d..864f4e7f72 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -173,6 +173,7 @@ func (s *svc) Unprotected() []string { return nil } +// create a new tar containing the files in the `files` list, which are in the directory `dir` func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io.Writer) error { w := tar.NewWriter(dst) @@ -235,6 +236,7 @@ func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io. return w.Close() } +// create a new zip containing the files in the `files` list, which are in the directory `dir` func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io.Writer) error { w := zip.NewWriter(dst) From 31d2ac74ba39b7058469216feac73298664b1d09 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 17:47:51 +0200 Subject: [PATCH 11/17] Add file extension to the generated archive file --- internal/http/services/archiver/archiver.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index 864f4e7f72..6a4615d099 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -127,6 +127,8 @@ func (s *svc) Handler() http.Handler { files = append(files, strings.TrimSuffix(p, "/")) } + userAgent := ua.Parse(r.Header.Get("User-Agent")) + archiveName := "download" if len(files) == 0 { // we need to archive the whole dir @@ -134,9 +136,13 @@ func (s *svc) Handler() http.Handler { archiveName = path.Base(dir) } - s.log.Debug().Msg("Requested the following files/folders to archive: " + render.Render(files)) + if userAgent.OS == ua.Windows { + archiveName += ".zip" + } else { + archiveName += ".tar" + } - userAgent := ua.Parse(r.Header.Get("User-Agent")) + s.log.Debug().Msg("Requested the following files/folders to archive: " + render.Render(files)) rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", archiveName)) rw.Header().Set("Content-Transfer-Encoding", "binary") From c5b05c3a80b80ed34c38d0e6627da1b0d006f873 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 17:50:34 +0200 Subject: [PATCH 12/17] Add copyright in walk.go --- pkg/storage/utils/walk/walk.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pkg/storage/utils/walk/walk.go b/pkg/storage/utils/walk/walk.go index 0ccaf3a7d1..88de81c5cd 100644 --- a/pkg/storage/utils/walk/walk.go +++ b/pkg/storage/utils/walk/walk.go @@ -1,3 +1,21 @@ +// Copyright 2018-2021 CERN +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// In applying this license, CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + package walk import ( From a5c5d42bf90db1cadcb70f9bedd406d2e7160c4f Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 18:08:43 +0200 Subject: [PATCH 13/17] Add comments in walk.go --- pkg/storage/utils/walk/walk.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/storage/utils/walk/walk.go b/pkg/storage/utils/walk/walk.go index 88de81c5cd..ad89c7a678 100644 --- a/pkg/storage/utils/walk/walk.go +++ b/pkg/storage/utils/walk/walk.go @@ -30,8 +30,17 @@ import ( "github.com/cs3org/reva/pkg/errtypes" ) +// WalkFunc is the type of function called by Walk to visit each file or directory +// +// Each time the Walk function meet a file/folder path is set to the full path of this. +// The err argument reports an error related to the path, and the function can decide the action to +// do with this. +// +// The error result returned by the function controls how Walk continues. If the function returns the special value SkipDir, Walk skips the current directory. +// Otherwise, if the function returns a non-nil error, Walk stops entirely and returns that error. type WalkFunc func(path string, info *provider.ResourceInfo, err error) error +// Walk walks the file tree rooted at root, calling fn for each file or folder in the tree, including the root. func Walk(ctx context.Context, root string, gtw gateway.GatewayAPIClient, fn WalkFunc) error { info, err := stat(ctx, root, gtw) From f89141fb3c81df7eef67fb007468404162352b74 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 18:09:46 +0200 Subject: [PATCH 14/17] Add comment and changed += 1 with ++ --- internal/http/services/archiver/archiver.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index 6a4615d099..3f6353af33 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -71,6 +71,7 @@ func init() { global.Register("archiver", New) } +// New creates a new archiver service func New(conf map[string]interface{}, log *zerolog.Logger) (global.Service, error) { c := &Config{} err := mapstructure.Decode(conf, c) @@ -192,7 +193,7 @@ func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io. return err } - filesCount += 1 + filesCount++ if filesCount > s.config.MaxNumFiles { return errMaxFileCount } @@ -255,7 +256,7 @@ func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io. return err } - filesCount += 1 + filesCount++ if filesCount > s.config.MaxNumFiles { return errMaxFileCount } From 4cf27c5505312ed060c504234b149bc08d8f492e Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 18:11:48 +0200 Subject: [PATCH 15/17] Add changelog --- changelog/unreleased/archiver-service.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 changelog/unreleased/archiver-service.md diff --git a/changelog/unreleased/archiver-service.md b/changelog/unreleased/archiver-service.md new file mode 100644 index 0000000000..831c77a8f9 --- /dev/null +++ b/changelog/unreleased/archiver-service.md @@ -0,0 +1,8 @@ +Enhancement: Implement folder download as archive + +Adds a new http service which will create an archive +(platform dependent, zip in windows and tar in linux) given a list of file. + + +https://github.com/cs3org/reva/issues/1698 +https://github.com/cs3org/reva/pull/2066 From 0364d0108ed7e3189c50fa50de00c93c4627e40b Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte Date: Mon, 13 Sep 2021 18:15:17 +0200 Subject: [PATCH 16/17] Changed package name --- internal/http/services/archiver/archiver.go | 6 +++--- pkg/storage/utils/{walk/walk.go => walker/walker.go} | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename pkg/storage/utils/{walk/walk.go => walker/walker.go} (99%) diff --git a/internal/http/services/archiver/archiver.go b/internal/http/services/archiver/archiver.go index 3f6353af33..8cfeab83d7 100644 --- a/internal/http/services/archiver/archiver.go +++ b/internal/http/services/archiver/archiver.go @@ -38,7 +38,7 @@ import ( "github.com/cs3org/reva/pkg/rhttp" "github.com/cs3org/reva/pkg/rhttp/global" "github.com/cs3org/reva/pkg/sharedconf" - "github.com/cs3org/reva/pkg/storage/utils/walk" + "github.com/cs3org/reva/pkg/storage/utils/walker" "github.com/gdexlab/go-render/render" ua "github.com/mileusna/useragent" "github.com/mitchellh/mapstructure" @@ -188,7 +188,7 @@ func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io. for _, root := range files { - err := walk.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error { + err := walker.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error { if err != nil { return err } @@ -251,7 +251,7 @@ func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io. for _, root := range files { - err := walk.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error { + err := walker.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error { if err != nil { return err } diff --git a/pkg/storage/utils/walk/walk.go b/pkg/storage/utils/walker/walker.go similarity index 99% rename from pkg/storage/utils/walk/walk.go rename to pkg/storage/utils/walker/walker.go index ad89c7a678..7a4d3b2d1d 100644 --- a/pkg/storage/utils/walk/walk.go +++ b/pkg/storage/utils/walker/walker.go @@ -16,7 +16,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -package walk +package walker import ( "context" From f6939a5ac489e3cc05cd19a4d707cc94d18b7c69 Mon Sep 17 00:00:00 2001 From: Gianmaria Del Monte <39946305+gmgigi96@users.noreply.github.com> Date: Mon, 13 Sep 2021 23:06:06 +0200 Subject: [PATCH 17/17] Update archiver-service.md --- changelog/unreleased/archiver-service.md | 1 - 1 file changed, 1 deletion(-) diff --git a/changelog/unreleased/archiver-service.md b/changelog/unreleased/archiver-service.md index 831c77a8f9..d65ab1c91c 100644 --- a/changelog/unreleased/archiver-service.md +++ b/changelog/unreleased/archiver-service.md @@ -3,6 +3,5 @@ Enhancement: Implement folder download as archive Adds a new http service which will create an archive (platform dependent, zip in windows and tar in linux) given a list of file. - https://github.com/cs3org/reva/issues/1698 https://github.com/cs3org/reva/pull/2066