Skip to content

Commit

Permalink
Implement folder download as archive (cs3org#2066)
Browse files Browse the repository at this point in the history
  • Loading branch information
gmgigi96 authored and glpatcern committed Sep 23, 2021
1 parent eb225ac commit fb4f1f5
Show file tree
Hide file tree
Showing 4 changed files with 475 additions and 0 deletions.
7 changes: 7 additions & 0 deletions changelog/unreleased/archiver-service.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Enhancement: Implement folder download as archive

Adds a new http service which will create an archive
(platform dependent, zip in windows and tar in linux) given a list of file.

https://github.com/cs3org/reva/issues/1698
https://github.com/cs3org/reva/pull/2066
348 changes: 348 additions & 0 deletions internal/http/services/archiver/archiver.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,348 @@
// Copyright 2018-2021 CERN
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// In applying this license, CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

package archiver

import (
"archive/tar"
"archive/zip"
"context"
"fmt"
"io"
"net/http"
"path"
"strings"
"time"

gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1"
rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1"
provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
"github.com/cs3org/reva/internal/http/services/datagateway"
"github.com/cs3org/reva/pkg/errtypes"
"github.com/cs3org/reva/pkg/rgrpc/todo/pool"
"github.com/cs3org/reva/pkg/rhttp"
"github.com/cs3org/reva/pkg/rhttp/global"
"github.com/cs3org/reva/pkg/sharedconf"
"github.com/cs3org/reva/pkg/storage/utils/walker"
"github.com/gdexlab/go-render/render"
ua "github.com/mileusna/useragent"
"github.com/mitchellh/mapstructure"
"github.com/rs/zerolog"
)

type svc struct {
config *Config
httpClient *http.Client
gtwClient gateway.GatewayAPIClient
log *zerolog.Logger
}

// Config holds the config options that need to be passed down to all ocdav handlers
type Config struct {
Prefix string `mapstructure:"prefix"`
GatewaySvc string `mapstructure:"gatewaysvc"`
Timeout int64 `mapstructure:"timeout"`
Insecure bool `mapstructure:"insecure"`
MaxNumFiles int64 `mapstructure:"max_num_files"`
MaxSize int64 `mapstructure:"max_size"`
}

var (
errMaxFileCount = errtypes.InternalError("reached max files count")
errMaxSize = errtypes.InternalError("reached max total files size")
)

func init() {
global.Register("archiver", New)
}

// New creates a new archiver service
func New(conf map[string]interface{}, log *zerolog.Logger) (global.Service, error) {
c := &Config{}
err := mapstructure.Decode(conf, c)
if err != nil {
return nil, err
}

c.init()

gtw, err := pool.GetGatewayServiceClient(c.GatewaySvc)
if err != nil {
return nil, err
}

return &svc{
config: c,
gtwClient: gtw,
httpClient: rhttp.GetHTTPClient(
rhttp.Timeout(time.Duration(c.Timeout*int64(time.Second))),
rhttp.Insecure(c.Insecure),
),
log: log,
}, nil
}

func (c *Config) init() {
if c.Prefix == "" {
c.Prefix = "download_archive"
}

c.GatewaySvc = sharedconf.GetGatewaySVC(c.GatewaySvc)
}

func (s *svc) Handler() http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
// get the dir and files to archive from the URL
ctx := r.Context()
v := r.URL.Query()
if _, ok := v["dir"]; !ok {
rw.WriteHeader(http.StatusBadRequest)
return
}
dir := v["dir"][0]

names, ok := v["file"]
if !ok {
names = []string{}
}

// append to the files name the dir
files := []string{}
for _, f := range names {
p := path.Join(dir, f)
files = append(files, strings.TrimSuffix(p, "/"))
}

userAgent := ua.Parse(r.Header.Get("User-Agent"))

archiveName := "download"
if len(files) == 0 {
// we need to archive the whole dir
files = append(files, dir)
archiveName = path.Base(dir)
}

if userAgent.OS == ua.Windows {
archiveName += ".zip"
} else {
archiveName += ".tar"
}

s.log.Debug().Msg("Requested the following files/folders to archive: " + render.Render(files))

rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", archiveName))
rw.Header().Set("Content-Transfer-Encoding", "binary")

var err error
if userAgent.OS == ua.Windows {
err = s.createZip(ctx, dir, files, rw)
} else {
err = s.createTar(ctx, dir, files, rw)
}
if err == errMaxFileCount || err == errMaxSize {
s.log.Error().Msg(err.Error())
rw.WriteHeader(http.StatusRequestEntityTooLarge)
return
}
if err != nil {
s.log.Error().Msg(err.Error())
rw.WriteHeader(http.StatusInternalServerError)
return
}

})
}

func (s *svc) Prefix() string {
return s.config.Prefix
}

func (s *svc) Close() error {
return nil
}

func (s *svc) Unprotected() []string {
return nil
}

// create a new tar containing the files in the `files` list, which are in the directory `dir`
func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io.Writer) error {
w := tar.NewWriter(dst)

var filesCount, sizeFiles int64

for _, root := range files {

err := walker.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error {
if err != nil {
return err
}

filesCount++
if filesCount > s.config.MaxNumFiles {
return errMaxFileCount
}
sizeFiles += int64(info.Size)
if sizeFiles > s.config.MaxSize {
return errMaxSize
}

fileName := strings.TrimPrefix(path, dir)

header := tar.Header{
Name: fileName,
ModTime: time.Unix(int64(info.Mtime.Seconds), 0),
}

isDir := info.Type == provider.ResourceType_RESOURCE_TYPE_CONTAINER

if isDir {
// the resource is a folder
header.Mode = 0755
header.Typeflag = tar.TypeDir
} else {
header.Mode = 0644
header.Typeflag = tar.TypeReg
header.Size = int64(info.Size)
}

err = w.WriteHeader(&header)
if err != nil {
return err
}

if !isDir {
err = s.downloadFile(ctx, path, w)
if err != nil {
return err
}
}
return nil
})

if err != nil {
return err
}

}
return w.Close()
}

// create a new zip containing the files in the `files` list, which are in the directory `dir`
func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io.Writer) error {
w := zip.NewWriter(dst)

var filesCount, sizeFiles int64

for _, root := range files {

err := walker.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error {
if err != nil {
return err
}

filesCount++
if filesCount > s.config.MaxNumFiles {
return errMaxFileCount
}
sizeFiles += int64(info.Size)
if sizeFiles > s.config.MaxSize {
return errMaxSize
}

fileName := strings.TrimPrefix(strings.Trim(path, dir), "/")

if fileName == "" {
return nil
}

header := zip.FileHeader{
Name: fileName,
Modified: time.Unix(int64(info.Mtime.Seconds), 0),
}

isDir := info.Type == provider.ResourceType_RESOURCE_TYPE_CONTAINER

if isDir {
header.Name += "/"
} else {
header.UncompressedSize64 = info.Size
}

dst, err := w.CreateHeader(&header)
if err != nil {
return err
}

if !isDir {
err = s.downloadFile(ctx, path, dst)
if err != nil {
return err
}
}
return nil
})

if err != nil {
return err
}

}
return w.Close()
}

func (s *svc) downloadFile(ctx context.Context, path string, dst io.Writer) error {
downResp, err := s.gtwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{
Ref: &provider.Reference{
Path: path,
},
})

switch {
case err != nil:
return err
case downResp.Status.Code != rpc.Code_CODE_OK:
return errtypes.InternalError(downResp.Status.Message)
}

var endpoint, token string
for _, p := range downResp.Protocols {
if p.Protocol == "simple" {
endpoint, token = p.DownloadEndpoint, p.Token
}
}

httpReq, err := rhttp.NewRequest(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return err
}
httpReq.Header.Set(datagateway.TokenTransportHeader, token)

httpRes, err := s.httpClient.Do(httpReq)
if err != nil {
return err
}
defer httpRes.Body.Close()

if httpRes.StatusCode != http.StatusOK {
return errtypes.InternalError(httpRes.Status)
}

_, err = io.Copy(dst, httpRes.Body)
return err
}
1 change: 1 addition & 0 deletions internal/http/services/loader/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package loader
import (
// Load core HTTP services
_ "github.com/cs3org/reva/internal/http/services/appprovider"
_ "github.com/cs3org/reva/internal/http/services/archiver"
_ "github.com/cs3org/reva/internal/http/services/datagateway"
_ "github.com/cs3org/reva/internal/http/services/dataprovider"
_ "github.com/cs3org/reva/internal/http/services/helloworld"
Expand Down
Loading

0 comments on commit fb4f1f5

Please sign in to comment.