Skip to content

Commit

Permalink
externalconn,nodelocal: add external ExternalStorage provider
Browse files Browse the repository at this point in the history
In #84310 we added the ability to create an external connection
to represent a `nodelocal` endpoint. This diff is the second piece
of the puzzle that allows systems in CRDB to interact with the
external connection object.

We introduce an `external` URI scheme to the `ExternalStorage`
registry. URIs with an `external` scheme are required to contain a
host component referring to the unique name of an existing extenral
connection object. Optionally, the URI can also contain a path
component that will be appended to the endpoint that was specified
at the time the external connection object was created. This is necessary
for operations such as backup and restore that read/write to subdirectories
in the endpoint inputted by the user. A nice UX win is the abilility to
have a single external connection object for the base bucket, and then
interact with all the subdirectories without having to create an object
for each directory. In the future we may want to clamp down on this, and
allow the user to specify which objects permit subdirectory access.

The `external://<object-name>/<optional-path>` URI is parsed and the
underlying object is fetched from the `system.external_connections` table.
The resource represented by the object is then `Dial()`ed to return an
`ExternalStorage` handle that can be used to read, write, list etc. With
this change all bulk operations and cdc are able to use external connections
to represent a `nodelocal` endpoint. For example, a backup can now be run as:

```
CREATE EXTERNAL CONNECTION foo AS 'nodelocal://1/foo';
BACKUP INTO 'external://foo';
RESTORE FROM LATEST IN 'external://foo';
```

Gradually, we will add support for all other external storage endpoints
as well.

Note, we do not register limiter settings for the `external` provider
`ExternalStorage` interface, nor do we wrap it with an ioRecorder. This is
because the underlying resource of the external connection object will already
have its registered limiters and recorder.

Informs: #84753

Release note (sql change): Bulk operations and CDC will accept an `external`
scheme URI that points to a previously created External Connection object,
These operations can then interact with the underlying resource represented by
the object as they did before.
  • Loading branch information
adityamaru committed Jul 27, 2022
1 parent c4e9819 commit cd0fb93
Show file tree
Hide file tree
Showing 20 changed files with 607 additions and 106 deletions.
6 changes: 6 additions & 0 deletions pkg/ccl/backupccl/datadriven_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,12 @@ func TestDataDriven(t *testing.T) {
}
return ""

case "switch-server":
var name string
d.ScanArgs(t, "name", &name)
lastCreatedServer = name
return ""

case "exec-sql":
server := lastCreatedServer
user := "root"
Expand Down
216 changes: 216 additions & 0 deletions pkg/ccl/backupccl/testdata/backup-restore/external-connections
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
new-server name=s1
----

subtest basic-backup-nodelocal

exec-sql
CREATE EXTERNAL CONNECTION 'conn-foo' AS 'nodelocal://1/foo';
----

exec-sql
CREATE DATABASE d;
CREATE SCHEMA d.schema;
CREATE TABLE d.schema.foo (id INT PRIMARY KEY);
INSERT INTO d.schema.foo VALUES (1), (2), (3);
----

# Cluster backup.
exec-sql
BACKUP INTO 'external://conn-foo/cluster';
----

query-sql
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN
'external://conn-foo/cluster'] ORDER BY object_name;
----
bank table full
comments table full
d database full
data database full
database_role_settings table full
defaultdb database full
external_connections table full
foo table full
locations table full
postgres database full
public schema full
public schema full
public schema full
public schema full
role_members table full
role_options table full
scheduled_jobs table full
schema schema full
settings table full
system database full
tenant_settings table full
ui table full
users table full
zones table full

# Database backup.
exec-sql
BACKUP DATABASE d INTO 'external://conn-foo/database';
----

query-sql
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN
'external://conn-foo/database'] ORDER BY object_name;
----
d database full
foo table full
public schema full
schema schema full

# Table backup.
exec-sql
BACKUP TABLE d.schema.foo INTO 'external://conn-foo/table';
----

exec-sql
INSERT INTO d.schema.foo VALUES (4), (5), (6);
----

# Incremental table backup.
exec-sql
BACKUP TABLE d.schema.foo INTO LATEST IN 'external://conn-foo/table';
----

query-sql
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN
'external://conn-foo/table'] ORDER BY (object_name, backup_type);
----
d database full
d database incremental
foo table full
foo table incremental
schema schema full
schema schema incremental

subtest end

subtest basic-restore-nodelocal

new-server name=s2 share-io-dir=s1
----

# Cluster restore.
exec-sql
CREATE EXTERNAL CONNECTION 'conn-foo' AS 'nodelocal://1/foo';
----

exec-sql
RESTORE FROM LATEST IN 'external://conn-foo/cluster';
----

query-sql
SELECT * FROM d.schema.foo
----
1
2
3

exec-sql
DROP DATABASE d CASCADE
----

# Cluster restore.
exec-sql
RESTORE DATABASE d FROM LATEST IN 'external://conn-foo/database'
----

query-sql
SELECT * FROM d.schema.foo
----
1
2
3

exec-sql
DROP DATABASE d CASCADE
----

# Cluster restore.
exec-sql
RESTORE TABLE d.schema.foo FROM LATEST IN 'external://conn-foo/table' WITH into_db = 'defaultdb'
----

query-sql
SELECT * FROM defaultdb.schema.foo
----
1
2
3
4
5
6

exec-sql
DROP DATABASE d CASCADE
----
pq: database "d" does not exist

subtest end

subtest incremental-location-backup-restore-nodelocal

switch-server name=s1
----

exec-sql
CREATE EXTERNAL CONNECTION full AS 'nodelocal://1/full'
----

exec-sql
CREATE EXTERNAL CONNECTION inc AS 'nodelocal://1/inc'
----

# Take a full backup.
exec-sql
BACKUP DATABASE d INTO 'external://full';
----

# Take an incremental with an explicit location.
exec-sql
BACKUP DATABASE d INTO LATEST IN 'external://full' WITH incremental_location = 'external://inc';
----

query-sql
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN 'external://full' WITH
incremental_location = 'external://inc'] ORDER BY (object_name, backup_type);
----
d database full
d database incremental
foo table full
foo table incremental
public schema full
public schema incremental
schema schema full
schema schema incremental

# Ensure you can also specify an incremental location as a path to the same
# external connection URI.
exec-sql
BACKUP DATABASE d INTO 'external://full/nested';
----

# Take an incremental with an explicit location that is a subdir of the external
# connection endpoint.
exec-sql
BACKUP DATABASE d INTO LATEST IN 'external://full/nested' WITH incremental_location = 'external://inc/nested';
----

query-sql
SELECT object_name, object_type, backup_type FROM [SHOW BACKUP LATEST IN 'external://full/nested'
WITH incremental_location = 'external://inc/nested'] ORDER BY (object_name, backup_type);
----
d database full
d database incremental
foo table full
foo table incremental
public schema full
public schema incremental
schema schema full
schema schema incremental

subtest end
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ CREATE EXTERNAL CONNECTION foo AS 'nodelocal://1/foo/bar';

inspect-system-table
----
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}}
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}, "provider": "nodelocal"}

# Try to create another External Connection with the same name.
exec-sql
Expand All @@ -21,8 +21,8 @@ CREATE EXTERNAL CONNECTION bar123 AS 'nodelocal://1/baz';

inspect-system-table
----
bar123 STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/baz"}}}
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}}
bar123 STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/baz"}}, "provider": "nodelocal"}
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}, "provider": "nodelocal"}

# Drop an External Connection that does not exist.
exec-sql
Expand All @@ -35,7 +35,7 @@ DROP EXTERNAL CONNECTION bar123;

inspect-system-table
----
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}}
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}, "provider": "nodelocal"}

exec-sql
DROP EXTERNAL CONNECTION foo;
Expand Down Expand Up @@ -67,7 +67,7 @@ CREATE EXTERNAL CONNECTION privileged AS 'nodelocal://1/foo'

inspect-system-table
----
privileged STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo"}}}
privileged STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo"}}, "provider": "nodelocal"}

exec-sql
REVOKE SYSTEM EXTERNALCONNECTION FROM testuser;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CREATE EXTERNAL CONNECTION foo AS 'nodelocal://1/foo/bar';

inspect-system-table
----
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}}
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}, "provider": "nodelocal"}

# Try to create another External Connection with the same name.
exec-sql
Expand All @@ -24,8 +24,8 @@ CREATE EXTERNAL CONNECTION bar123 AS 'nodelocal://1/baz';

inspect-system-table
----
bar123 STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/baz"}}}
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}}
bar123 STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/baz"}}, "provider": "nodelocal"}
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}, "provider": "nodelocal"}

# Drop an External Connection that does not exist.
exec-sql
Expand All @@ -38,7 +38,7 @@ DROP EXTERNAL CONNECTION bar123;

inspect-system-table
----
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}}
foo STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo/bar"}}, "provider": "nodelocal"}

exec-sql
DROP EXTERNAL CONNECTION foo;
Expand Down Expand Up @@ -70,7 +70,7 @@ CREATE EXTERNAL CONNECTION privileged AS 'nodelocal://1/foo'

inspect-system-table
----
privileged STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo"}}}
privileged STORAGE {"nodelocal": {"cfg": {"nodeId": 1, "path": "/foo"}}, "provider": "nodelocal"}

exec-sql
REVOKE SYSTEM EXTERNALCONNECTION FROM testuser;
Expand Down
16 changes: 16 additions & 0 deletions pkg/cloud/cloudpb/external_storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ enum ExternalStorageProvider {
reserved 6;
userfile = 7;
null = 8;
external = 9;
}

message LocalFileConfig {
Expand All @@ -33,6 +34,20 @@ message LocalFileConfig {
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID"];
}

// ExternalConnectionConfig is the ExternalStorage configuration for the
// `external` provider.
message ExternalConnectionConfig {
// Name identifies the External Connection object.
string name = 1;
// User interacting with the external storage. This is used to check access
// privileges of the external connection object.
string user = 2;
// Path will be appended to the endpoint of the resource represented by the
// external connection object. It is used to access subdirectories/buckets of
// the external resource.
string path = 3;
}

message ExternalStorage {
ExternalStorageProvider provider = 1;

Expand Down Expand Up @@ -120,5 +135,6 @@ message ExternalStorage {
Azure AzureConfig = 6;
reserved 7;
FileTable FileTableConfig = 8 [(gogoproto.nullable) = false];
ExternalConnectionConfig external_connection_config = 9 [(gogoproto.nullable) = false];
}

2 changes: 2 additions & 0 deletions pkg/cloud/external_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ type ExternalStorageContext struct {
BlobClientFactory blobs.BlobClientFactory
InternalExecutor sqlutil.InternalExecutor
DB *kv.DB
Options []ExternalStorageOption
Limiters Limiters
}

// ExternalStorageOptions holds dependencies and values that can be
Expand Down
3 changes: 3 additions & 0 deletions pkg/cloud/externalconn/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ go_library(
name = "externalconn",
srcs = [
"connection.go",
"connection_storage.go",
"impl_registry.go",
"record.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/cloud/externalconn",
visibility = ["//visibility:public"],
deps = [
"//pkg/cloud",
"//pkg/cloud/cloudpb",
"//pkg/cloud/externalconn/connectionpb",
"//pkg/kv",
"//pkg/security/username",
Expand Down
Loading

0 comments on commit cd0fb93

Please sign in to comment.