Skip to content

Commit

Permalink
Bitbucket API access (#151)
Browse files Browse the repository at this point in the history
* Get bitbucket's files using their API (#71)

* Retire the old bitbucket auth method

* Update schema docs due bitbucket access changes

* [Bitbucket API Access] Update changelog
  • Loading branch information
tcurvelo authored and manycoding committed Sep 12, 2019
1 parent f46937c commit 67a24b8
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 51 deletions.
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Note that the top-most release is changes in the unreleased master branch on Git
## [0.3.7dev] (Work In Progress)
### Added
- **Anomalies** to see significant deviations in fields coverage across multiple jobs, #138
- Support to **Bitbucket API**, in order to access files from private repositories, #71


## [0.3.6] (2019-07-12)
Expand Down Expand Up @@ -64,7 +65,7 @@ Note that the top-most release is changes in the unreleased master branch on Git
- Passed rules marked with green PASSED.
### Fixed
- Online documentation now renders graphs https://arche.readthedocs.io/en/latest/, #41
- Error colours are back in `report_all()`.
- Error colours are back in `report_all()`.
### Removed
- Deprecated `Arche.basic_json_schema()`, use `basic_json_schema()`
- Removed Quickstart.md as redundant - documentation lives in notebooks
Expand Down
49 changes: 39 additions & 10 deletions docs/source/nbs/Schema.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Github\n",
"For github, you just specify the raw link which will contain a token on the end. The token expires after 5 minutes.\n",
"\n",
"```a.schema = \"https://raw.githubusercontent.com/manycoding/repo/master/schema.json?token=AJ6jjTtZtWZr5zyw7DuWduieMJ2ms1ks5ctRC6wA%3%3D\"```"
Expand All @@ -166,6 +167,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Bitbucket\n",
"For bitbucket, you have to set up `BITBUCKET_USER` and `BITBUCKET_PASSWORD` environment variables.\n",
"For example, in Jupyter it looks like:"
]
Expand All @@ -184,8 +186,42 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"And then you can use raw links\n",
"```a.schema = \"https://bitbucket.org/user/repo/raw/HEAD/schema.json\"```"
"Besides the user's username and password, you can use [Bitbucket's app passwords](https://confluence.atlassian.com/bitbucket/app-passwords-828781300.html).\n",
"\n",
"It supports both regular URL am raw links:\n",
"```\n",
"a.schema = \"https://bitbucket.org/user/repo/raw/HEAD/schema.json\"\n",
"```\n",
"or\n",
"```\n",
"a.schema = \"https://bitbucket.org/user/repo/src/HEAD/schema.json\"\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Optionally, you can set `BITBUCKET_NETLOC` and `BITBUCKET_API_NETLOC` when you wish to access files from a self-hosted Bitbucket server. Eg.:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"env: BITBUCKET_NETLOC=bitbucket.org\n",
"env: BITBUCKET_API_NETLOC=api.bitbucket.org\n"
]
}
],
"source": [
"%env BITBUCKET_NETLOC=bitbucket.org\n",
"%env BITBUCKET_API_NETLOC=api.bitbucket.org"
]
},
{
Expand Down Expand Up @@ -254,13 +290,6 @@
"source": [
"a.schema.raw"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -279,7 +308,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
"version": "3.7.4"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down
18 changes: 0 additions & 18 deletions src/arche/readers/schema.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from collections import defaultdict
from enum import Enum
import json
import os
import pprint
from typing import Dict, List, Union
import urllib.request

from arche.tools import s3
import perfect_jsonschema
Expand All @@ -17,19 +15,6 @@
TaggedFields = Dict[str, List[str]]


def set_auth() -> None:
if "BITBUCKET_USER" in os.environ and "BITBUCKET_PASSWORD" in os.environ:
auth_handler = urllib.request.HTTPBasicAuthHandler()
auth_handler.add_password(
realm="Bitbucket.org HTTP",
uri="https://bitbucket.org",
user=os.getenv("BITBUCKET_USER"),
passwd=os.getenv("BITBUCKET_PASSWORD"),
)
opener = urllib.request.build_opener(auth_handler)
urllib.request.install_opener(opener)


class Tag(Enum):
unique = (0,)
category = (1,)
Expand Down Expand Up @@ -117,6 +102,3 @@ def read(schema_source: SchemaSource) -> RawSchema:
@staticmethod
def from_url(path: str) -> RawSchema:
return json.loads(s3.get_contents(path))


set_auth()
36 changes: 36 additions & 0 deletions src/arche/tools/bitbucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import base64
import os
import re
from typing import Dict
import urllib


NETLOC = os.getenv("BITBUCKET_NETLOC") or "bitbucket.org"
API_NETLOC = os.getenv("BITBUCKET_API_NETLOC") or "api.bitbucket.org"
USER = os.getenv("BITBUCKET_USER")
PASS = os.getenv("BITBUCKET_PASSWORD")


def prepare_request(url: str) -> urllib.request.Request:
if not USER or not PASS:
msg = "Credentials not found: `BITBUCKET_USER` or `BITBUCKET_PASSWORD` not set."
raise ValueError(msg)

api_url = convert_to_api_url(url, NETLOC, API_NETLOC)
return urllib.request.Request(api_url, headers=get_auth_header(USER, PASS))


def convert_to_api_url(url: str, netloc: str, api_netloc: str) -> str:
"""Support both regular and raw URLs"""
try:
user, repo, path = re.search(
f"https://{netloc}/(.*?)/(.*?)/(?:raw|src)/(.*)", url
).groups()
except AttributeError:
raise ValueError("Not a valid bitbucket URL: {url}")
return f"https://{api_netloc}/2.0/repositories/{user}/{repo}/src/{path}"


def get_auth_header(username: str, password: str) -> Dict[str, str]:
base64string = base64.b64encode(f"{username}:{password}".encode())
return {"Authorization": f"Basic {base64string.decode()}"}
8 changes: 7 additions & 1 deletion src/arche/tools/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from urllib.parse import quote, urlparse
import urllib.request

from arche.tools import bitbucket
import boto3


Expand Down Expand Up @@ -57,6 +58,11 @@ def get_contents(url: str) -> str:
if o.scheme == "s3":
return get_contents_from_bucket(netloc, relative_path)
if o.scheme == "https":
with urllib.request.urlopen(url) as f:
if o.netloc == bitbucket.NETLOC:
request = bitbucket.prepare_request(url)
else:
request = urllib.request.Request(url)

with urllib.request.urlopen(request) as f:
return f.read().decode("utf-8")
raise ValueError(f"'{o.scheme}://' scheme is not allowed")
22 changes: 1 addition & 21 deletions tests/readers/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import defaultdict

from arche.readers.schema import Schema, set_auth
from arche.readers.schema import Schema
from jsonschema.exceptions import SchemaError
import pytest

Expand Down Expand Up @@ -116,26 +116,6 @@ def test_schema_repr():
)


def test_set_auth_skipped(mocker):
mocked_install = mocker.patch(
"arche.readers.schema.urllib.request.install_opener", autospec=True
)
set_auth()
mocked_install.assert_not_called()


def test_set_auth(mocker):
mocked_install = mocker.patch(
"arche.readers.schema.urllib.request.install_opener", autospec=True
)
mocker.patch.dict(
"arche.readers.schema.os.environ",
{"BITBUCKET_USER": "user", "BITBUCKET_PASSWORD": "pass"},
)
set_auth()
mocked_install.assert_called_once()


def test_schema(get_schema):
s = Schema(get_schema)
assert s.allowed_tags == {
Expand Down
86 changes: 86 additions & 0 deletions tests/test_bitbucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from arche.tools import bitbucket
import pytest


urls = [
(
"https://bitbucket.org/scrapinghub/customer/src/master/customer/schemas/ecommerce.json",
"https://api.bitbucket.org/2.0/repositories/scrapinghub/customer/src/master"
"/customer/schemas/ecommerce.json",
),
(
"https://bitbucket.org/scrapinghub/customer/raw/"
"9c4b0bf46f2012ab38bc066e1ebe774d72856013/customer/schemas/ecommerce.json",
"https://api.bitbucket.org/2.0/repositories/scrapinghub/customer/src/"
"9c4b0bf46f2012ab38bc066e1ebe774d72856013/customer/schemas/ecommerce.json",
),
]


@pytest.mark.parametrize(
["url", "expected"],
[
(
"https://bitbucket.org/scrapinghub/customer/src/master/customer/schemas/"
"ecommerce.json",
"https://api.bitbucket.org/2.0/repositories/scrapinghub/customer/src/"
"master/customer/schemas/ecommerce.json",
),
(
"https://bitbucket.org/scrapinghub/customer/raw/"
"9c4b0bf46f2012ab38bc066e1ebe774d72856013/customer/schemas/"
"ecommerce.json",
"https://api.bitbucket.org/2.0/repositories/scrapinghub/customer/src/"
"9c4b0bf46f2012ab38bc066e1ebe774d72856013/customer/schemas/"
"ecommerce.json",
),
],
)
def test_convert_to_api_url(url, expected):
api_url = bitbucket.convert_to_api_url(url, bitbucket.NETLOC, bitbucket.API_NETLOC)
assert api_url == expected


@pytest.mark.parametrize(
"url",
[
"https://bitbucket.org/ecommerce.json",
"https://bitbucket.org/user/ecommerce.json",
"https://bitbucket.org/user/repo/ecommerce.json",
"https://bitbucket.org/user/repo/foobar/ecommerce.json",
],
)
def test_convert_to_api_url_using_an_invalid_url(url):
with pytest.raises(ValueError):
bitbucket.convert_to_api_url(url, bitbucket.NETLOC, bitbucket.API_NETLOC)


@pytest.mark.parametrize(
"credentials,expected",
[(("foo", "bar"), "Zm9vOmJhcg=="), (("alice", "secret"), "YWxpY2U6c2VjcmV0")],
)
def test_get_auth_header(credentials, expected):
assert bitbucket.get_auth_header(*credentials) == {
"Authorization": f"Basic {expected}"
}


def test_prepare_request():
bitbucket.USER = "foo"
bitbucket.PASS = "bar"

url = (
"https://bitbucket.org/scrapinghub/customer/src/master/customer/schemas/"
"ecommerce.json"
)
req = bitbucket.prepare_request(url)

assert "api.bitbucket.org" == req.host
assert "Authorization" in req.headers


def test_prepare_request_raises_an_error_when_no_credentials_found():
bitbucket.USER = bitbucket.PASS = None

with pytest.raises(ValueError):
bitbucket.prepare_request("foo")
18 changes: 18 additions & 0 deletions tests/tools/test_s3.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import arche.tools.bitbucket as bitbucket
import arche.tools.s3 as s3
import pytest

Expand All @@ -10,6 +11,23 @@ def test_get_contents(mocker):
assert s3.get_contents("s3://bucket/file") == contents


@pytest.mark.parametrize(
"path",
[
"https://bitbucket.org/user/repo/src/branch/file",
"https://bitbucket.org/user/repo/src/branch/dir1/file.ext",
],
)
def test_get_contents_from_bitbucket(mocker, path):
mocked_urlopen = mocker.patch(
"arche.tools.s3.urllib.request.urlopen", autospec=True
)
bitbucket.USER = "X"
bitbucket.PASS = "Y"
s3.get_contents(path)
mocked_urlopen.assert_called_once()


def test_get_contents_fails_on_bad_file(mocker):
cm = mocker.MagicMock()
cm.__enter__.return_value = cm
Expand Down

0 comments on commit 67a24b8

Please sign in to comment.