Skip to content

Commit

Permalink
[core]: Add helper to rewrite blob storage urls
Browse files Browse the repository at this point in the history
In the low-latency branch, EventGrid gives us urls like
`https://<account>.blob.core.windows.net/<container>/<path>`. The create
items function expect `blob://<account>/<container>/<path>` URLs. This
function implements that.
  • Loading branch information
Tom Augspurger committed Apr 7, 2023
1 parent 51d67bf commit 8abc9ae
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/reference/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Helper functions and classes for dealing with local or remote file systems.
pctasks.core.storage.StorageFactory
pctasks.core.storage.blob.BlobUri
pctasks.core.storage.blob.BlobStorage
pctasks.core.storage.blob.maybe_rewrite_blob_storage_url
```

## `pctasks.task`
Expand Down
60 changes: 60 additions & 0 deletions pctasks/core/pctasks/core/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,3 +661,63 @@ def from_connection_string(
return cls.from_account_key(
f"blob://{credential.account_name}/{container_name}", credential.account_key
)


def maybe_rewrite_blob_storage_url(url: str) -> str:
"""
Rewrite HTTP blob-storage URLs to blob:// URLs.
If `url` isn't a blob-storage style URL, it's returned unmodified.
Parameters
----------
url: str
The URL (or path) to a file.
Returns
-------
str
The rewritten URL. Blob Storage URLs are modified to use the `blob://`
style. Non-blob storage URLs are returned unmodified.
Examples
--------
Blob storage URLs are rewritten
>>> maybe_rewrite_blob_storage_url(
... "https://example.blob.core.windows.net/container/path/file.txt"
... )
'blob://example/container/path/file.txt'
Azurite-style URLs *are* rewritten
>>> maybe_rewrite_blob_storage_url(
... "https://azurite:10000/devstoreaccount1/container/path/file.txt"
... )
'blob://azurite:10000/devstoreaccount1/container/path/file.txt'
>>> maybe_rewrite_blob_storage_url(
... "https://localhost:10000/devstoreaccount1/container/path/file.txt"
... )
'blob://localhost:10000/devstoreaccount1/container/path/file.txt'
Local paths are not affected
>>> maybe_rewrite_blob_storage_url("path/file.txt")
'path/file.txt'
"""
parsed = urlparse(url)

if parsed.netloc.endswith(".blob.core.windows.net"):
account = parsed.netloc.split(".", 1)[0].strip("/")
# TODO: this could maybe fail if the path is just to the container.
container, path = parsed.path.strip("/").split("/", 1)

url = f"blob://{account}/{container}/{path}"

elif parsed.netloc.startswith(("azurite", "localhost", "127.0.0.1")):
# should we *just* do port 10000?
# Look at BlobStorage.__init__ maybe
url = f"blob://{parsed.path.strip('/')}"

return url
26 changes: 26 additions & 0 deletions pctasks/core/tests/storage/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from pathlib import Path
from typing import Dict, List, Tuple

import pytest

from pctasks.core.storage.blob import maybe_rewrite_blob_storage_url
from pctasks.dev.blob import temp_azurite_blob_storage
from pctasks.dev.constants import AZURITE_ACCOUNT_NAME, TEST_DATA_CONTAINER

Expand Down Expand Up @@ -100,3 +103,26 @@ def test_blob_download_timeout():
storage_stream_downloader._request_options.pop("timeout", None)
is None
)


@pytest.mark.parametrize(
"url, expected",
[
(
"https://example.blob.core.windows.net/container/path/file.txt",
"blob://example/container/path/file.txt",
),
(
"https://azurite:10000/devstoreaccount1/container/path/file.txt",
"blob://devstoreaccount1/container/path/file.txt",
),
(
"https://localhost:10000/devstoreaccount1/container/path/file.txt",
"blob://devstoreaccount1/container/path/file.txt",
),
("path/file.txt", "path/file.txt"),
],
)
def test_maybe_rewrite_blob_storage_url(url, expected):
result = maybe_rewrite_blob_storage_url(url)
assert result == expected

0 comments on commit 8abc9ae

Please sign in to comment.