Skip to content

Commit

Permalink
add list utility to list local files not in iso
Browse files Browse the repository at this point in the history
Signed-off-by: Dweb Fan <[email protected]>
  • Loading branch information
dwebfan committed May 5, 2024
1 parent ba59924 commit edddae1
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 10 deletions.
36 changes: 34 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ Pre-requsition commands:

Features:

- :heavy_check_mark: pack all photos/videos into multiple ISOs and upload to Glacier
- :heavy_check_mark: pack all photos/videos into multiple ISOs and upload to S3
- :heavy_check_mark: metadata to track which file is in which iso
- [ ] backup files not in ISO to staging station, Google drive
- :heavy_check_mark: backup files not in ISO to staging station, Google drive
- [ ] pack all photos/videos into multiple ISOs and upload to Glancier
- [ ] encrypt iso files before upload to Glacier, Google drive
- [ ] metadata to track which files are in staging station
- [ ] daemon running mode to watch folder change only, avoid scanning all folder daily
Expand All @@ -71,6 +72,37 @@ Also welcome to try our free Photo backup applications. https://lomorage.com.
- Multipart upload to S3
- Resume upload if one part was fail
- Self define iso size
- Encryption all files before upload

# Security Model
The security model is from repository [filecrypt](https://github.com/kisom/filecrypt). Refer book [Practical Cryptography With Go](https://leanpub.com/gocrypto/read) for more detail.

This program assumes that an attacker does not currently have access
to either the machine the archive is generated on, or on the machine
it is unpacked on. It is intended for medium to long-term storage of
sensitive data at rest on removeable media that may be used to load data
onto a variety of platforms (Windows, OS X, Linux, OpenBSD), where the
threat of losing the storage medium is considerably higher than losing a
secured laptop that the archive is generated on.

Key derivation is done by pairing a password with a randomly-chosen
256-bit salt using the scrypt parameters N=2^20, r=8, p=1. This makes
it astronomically unlikely that the same key will be derived from the
same passphrase. The key is used as a NaCl secretbox key; the nonce for
encryption is randomly generated. It is thought that this will be highly
unlikely to cause nonce reuse issues.

The primary weaknesses might come from an attack on the passphrase or
via cryptanalysis of the ciphertext. The ciphertext is produced using
NaCl appended to a random salt, so it is unlikely this will produce any
meaningful information. One exception might be if this program is used
to encrypt a known set of files, and the attacker compares the length of
the archive to a list of known file sizes.

An attack on the passphrase will most likely come via a successful
dictionary attack. The large salt and high scrypt parameters will
deter attackers without the large resources required to brute force
this. Dictionary attacks will also be expensive for these same reasons.

# Pre-requisition
## AWS Glacier API Access ID and Access Secret
Expand Down
2 changes: 1 addition & 1 deletion cmd/lomob/iso.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func mkISO(ctx *cli.Context) error {
return err
}

files, err := db.ListFilesNotInISO()
files, err := db.ListFilesNotInISOOrCloud()
if err != nil {
return err
}
Expand Down
36 changes: 36 additions & 0 deletions cmd/lomob/list-local.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,39 @@ func printDirsByTable(dirs map[int]*types.DirInfo) {
common.FormatTime(d.ModTime), d.Path)
}
}

func listFilesNotInIso(ctx *cli.Context) error {
err := initDB(ctx.GlobalString("db"))
if err != nil {
return err
}

scanRootDirs, err := db.ListScanRootDirs()
if err != nil {
return err
}

files, err := db.ListFilesNotInISOOrCloud()
if err != nil {
return err
}
writer := tabwriter.NewWriter(os.Stdout, 0, 0, 4, ' ', tabwriter.TabIndent)
defer writer.Flush()

fmt.Fprint(writer, "In Cloud\tPath\n")

for _, f := range files {
scanRootDir, ok := scanRootDirs[f.DirID]
if !ok {
logrus.Warnf("%s not found root scan dir %d", f.Name, f.DirID)
continue
}
if f.IsoID == types.IsoIDCloud {
fmt.Fprintf(writer, "Y\t%s\n", filepath.Join(scanRootDir, f.Name))
} else {
fmt.Fprintf(writer, " \t%s\n", filepath.Join(scanRootDir, f.Name))
}

}
return nil
}
11 changes: 11 additions & 0 deletions cmd/lomob/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,17 @@ func main() {
},
},
},
{
Name: "files",
Action: listFilesNotInIso,
Usage: "List all files not packed in ISO including the ones uploaded in google drive",
Flags: []cli.Flag{
cli.BoolFlag{
Name: "no-cloud",
Usage: "List all files not in google drive or packed in ISO",
},
},
},
},
},
{
Expand Down
13 changes: 12 additions & 1 deletion cmd/lomob/upload-files.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/lomorage/lomo-backup/common/gcloud"
"github.com/lomorage/lomo-backup/common/types"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)
Expand Down Expand Up @@ -51,11 +52,15 @@ func uploadFiles(ctx *cli.Context) error {
return err
}

fileInfos, err := db.ListFilesNotInISO()
fileInfos, err := db.ListFilesNotInISOAndCloud()
if err != nil {
return err
}

if len(fileInfos) == 0 {
fmt.Println("No files need to be uploaded to google drive")
}

// root folder is
type dirInfoInCloud struct {
folderID string
Expand Down Expand Up @@ -147,8 +152,14 @@ func uploadFiles(ctx *cli.Context) error {
if err != nil {
logrus.Warnf("Close %s: %s", fullLocalPath, err)
}
err = db.UpdateFileIsoID(types.IsoIDCloud, f.ID)
if err != nil {
return err
}
}

fmt.Printf("%d files are uploaded to google drive\n", len(fileInfos))

return nil
}

Expand Down
52 changes: 46 additions & 6 deletions common/dbx/iso.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,17 @@ import (
//_ "github.com/mattn/go-sqlite3"
)

var listFilesNotInIsoOrCloudStmt = "select d.scan_root_dir_id, d.path, f.name, f.id, f.iso_id, f.size, f.mod_time from files as f" +
" inner join dirs as d on f.dir_id=d.id where f.iso_id=0 or f.iso_id=" + strconv.Itoa(types.IsoIDCloud) +
" order by f.dir_id, f.id"

const (
listFilesNotInIsoStmt = "select d.scan_root_dir_id, d.path, f.name, f.id, f.size, f.mod_time from files as f" +
listFilesNotInIsoAndCloudStmt = "select d.scan_root_dir_id, d.path, f.name, f.id, f.size, f.mod_time from files as f" +
" inner join dirs as d on f.dir_id=d.id where f.iso_id=0 order by f.dir_id, f.id"
getTotalFileSizeNotInIsoStmt = "select sum(size) from files where iso_id=0"
getTotalFilesInIsoStmt = "select sum(size), count(size) from files where iso_id=?"
updateIsoIDStmt = "update files set iso_id=%d where id in (%s)"
updateBatchFilesIsoIDStmt = "update files set iso_id=%d where id in (%s)"
updateFileIsoIDStmt = "update files set iso_id=? where id=?"

getIsoByNameStmt = "select id, size, hash_hex, hash_base64, region, bucket, upload_id, upload_key," +
" create_time from isos where name=?"
Expand All @@ -37,12 +42,12 @@ const (
updatePartUploadEtagStatusStmt = "update parts set etag=?, status=? where iso_id=? and part_no=?"
)

func (db *DB) ListFilesNotInISO() ([]*types.FileInfo, error) {
func (db *DB) ListFilesNotInISOAndCloud() ([]*types.FileInfo, error) {
files := []*types.FileInfo{}

err := db.retryIfLocked("list files not in ISO",
err := db.retryIfLocked("list files not in ISO and cloud",
func(tx *sql.Tx) error {
rows, err := tx.Query(listFilesNotInIsoStmt)
rows, err := tx.Query(listFilesNotInIsoAndCloudStmt)
if err != nil {
return nil
}
Expand All @@ -63,6 +68,32 @@ func (db *DB) ListFilesNotInISO() ([]*types.FileInfo, error) {
return files, err
}

func (db *DB) ListFilesNotInISOOrCloud() ([]*types.FileInfo, error) {
files := []*types.FileInfo{}

err := db.retryIfLocked("list files not in ISO or cloud",
func(tx *sql.Tx) error {
rows, err := tx.Query(listFilesNotInIsoOrCloudStmt)
if err != nil {
return nil
}
for rows.Next() {
var path, name string
f := &types.FileInfo{}
err = rows.Scan(&f.DirID, &path, &name, &f.ID, &f.IsoID, &f.Size, &f.ModTime)
if err != nil {
return err
}
f.Name = filepath.Join(path, name)

files = append(files, f)
}
return rows.Err()
},
)
return files, err
}

func (db *DB) TotalFileSizeNotInISO() (uint64, error) {
var totalSize uint64
err := db.retryIfLocked("total file size not in ISO",
Expand Down Expand Up @@ -155,7 +186,7 @@ func (db *DB) CreateIsoWithFileIDs(iso *types.ISOInfo, fileIDs string) (int, int
return err
}

res, err = tx.Exec(fmt.Sprintf(updateIsoIDStmt, isoID, fileIDs))
res, err = tx.Exec(fmt.Sprintf(updateBatchFilesIsoIDStmt, isoID, fileIDs))
if err != nil {
return err
}
Expand All @@ -166,6 +197,15 @@ func (db *DB) CreateIsoWithFileIDs(iso *types.ISOInfo, fileIDs string) (int, int
return int(isoID), int(updatedFiles), err
}

func (db *DB) UpdateFileIsoID(isoID, fileID int) error {
return db.retryIfLocked(fmt.Sprintf("file %d's iso ID %d", fileID, isoID),
func(tx *sql.Tx) error {
_, err := tx.Exec(updateFileIsoIDStmt, isoID, fileID)
return err
},
)
}

func (db *DB) UpdateIsoBase64Hash(isoID int, hash string) error {
return db.retryIfLocked(fmt.Sprintf("update iso %d base 64 hash %s", isoID, hash),
func(tx *sql.Tx) error {
Expand Down
3 changes: 3 additions & 0 deletions common/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ package types

import "time"

// IsoIDCloud is to flag the file is uploaded into cloud and not packed in ISO yet
const IsoIDCloud = -1

type IsoStatus int

const (
Expand Down

0 comments on commit edddae1

Please sign in to comment.