Skip to content

Commit

Permalink
use is_based_on from dataset api GET dataset call
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidSubiros committed Mar 8, 2023
1 parent 3cf2c9a commit 6ec5b21
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 39 deletions.
1 change: 1 addition & 0 deletions clients/datasetapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ import (
// DatasetApiClient defines the zebedee client
type DatasetClient interface {
Checker(context.Context, *healthcheck.CheckState) error
GetDatasetCurrentAndNext(ctx context.Context, userAuthToken, serviceAuthToken, collectionID, datasetID string) (m datasetclient.Dataset, err error)
GetVersionMetadata(ctx context.Context, userAuthToken, serviceAuthToken, collectionID, datasetID, edition, version string) (metadata datasetclient.Metadata, err error)
}
72 changes: 70 additions & 2 deletions clients/mock/datasetapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 17 additions & 4 deletions features/publish_data_dataset.feature
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ Feature: Data extractor should listen to the relevant topic and publish extracte
Scenario: When searching for the extracted dataset generic metadata I get the expected result
Given dp-dataset-api is healthy
And zebedee is healthy
And the following dataset with dataset-id "cphi01" is available in dp-dataset-api
"""
{
"id": "cphi01"
}
"""
And the following metadata with dataset-id "cphi01", edition "timeseries" and version "version" is available in dp-dataset-api
"""
{
Expand Down Expand Up @@ -39,13 +45,20 @@ Feature: Data extractor should listen to the relevant topic and publish extracte
Scenario: "When searching for the extracted dataset cantabular-type metadata I get the expected result"
Given dp-dataset-api is healthy
And zebedee is healthy
And the following dataset with dataset-id "my-cantabular-dataset" is available in dp-dataset-api
"""
{
"current": {
"is_based_on": {
"@type": "cantabular_flexible_table",
"@id": "UR_HH"
}
}
}
"""
And the following metadata with dataset-id "my-cantabular-dataset", edition "my-edition" and version "my-version" is available in dp-dataset-api
"""
{
"is_based_on": {
"@type": "cantabular_flexible_table",
"@id": "UR_HH"
},
"release_date": "releasedate",
"title": "title",
"description": "description",
Expand Down
15 changes: 14 additions & 1 deletion features/steps/steps.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ func (c *Component) RegisterSteps(ctx *godog.ScenarioContext) {
ctx.Step(`^zebedee is healthy`, c.zebedeeIsHealthy)
ctx.Step(`^zebedee is unhealthy`, c.zebedeeIsUnhealthy)
ctx.Step(`^the following published data for uri "([^"]*)" is available in zebedee$`, c.theFollowingZebedeeResponseIsAvailable)
ctx.Step(`^the following dataset with dataset-id "([^"]*)" is available in dp-dataset-api$`, c.theFollowingDatasetResponseIsAvailable)
ctx.Step(`^the following metadata with dataset-id "([^"]*)", edition "([^"]*)" and version "([^"]*)" is available in dp-dataset-api$`, c.theFollowingDatasetMetadataResponseIsAvailable)
ctx.Step(`^this content-updated event is queued, to be consumed$`, c.thisContentUpdatedEventIsQueued)
ctx.Step(`^no search-data-import events are produced`, c.noEventsAreProduced)
Expand Down Expand Up @@ -85,8 +86,20 @@ func (c *Component) theFollowingZebedeeResponseIsAvailable(uriString string, zeb
return nil
}

// theFollowingDatasetResponseIsAvailable generate a mocked response for dataset API
// GET /datasets/{id} with the provided metadata response
func (c *Component) theFollowingDatasetResponseIsAvailable(id string, ds *godog.DocString) error {
c.DatasetAPI.NewHandler().
Get(fmt.Sprintf("/datasets/%s", id)).
Reply(http.StatusOK).
BodyString(ds.Content).
AddHeader("Etag", c.testETag)

return nil
}

// theFollowingDatasetMetadataResponseIsAvailable generate a mocked response for dataset API
// GET /dataset/{id}/editions/{edition}/versions/{version}/metadata with the provided metadata response
// GET /datasets/{id}/editions/{edition}/versions/{version}/metadata with the provided metadata response
func (c *Component) theFollowingDatasetMetadataResponseIsAvailable(id, edition, version string, metadata *godog.DocString) error {
c.DatasetAPI.NewHandler().
Get(fmt.Sprintf("/datasets/%s/editions/%s/versions/%s/metadata", id, edition, version)).
Expand Down
12 changes: 10 additions & 2 deletions handler/datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,20 @@ func (h *ContentPublished) handleDatasetDataType(ctx context.Context, cpEvent *m
DataType: DefaultType,
}

// Make a call to DatasetAPI
// Call DatasetAPI to get dataset, with is_based_on
dataset, err := h.DatasetCli.GetDatasetCurrentAndNext(ctx, "", h.Cfg.ServiceAuthToken, cpEvent.CollectionID, datasetID)
if err != nil {
log.Error(ctx, "cannot get dataset from dataset api: %s", err)
return err
}

// Call DatasetAPI to obtain metadata
datasetMetadataPublished, err := h.DatasetCli.GetVersionMetadata(ctx, "", h.Cfg.ServiceAuthToken, cpEvent.CollectionID, datasetID, edition, version)
if err != nil {
log.Error(ctx, "cannot get dataset published contents version %s from api", err)
return err
}

log.Info(ctx, "successfully obtained metadata from dataset api", log.Data{
"collection_id": cpEvent.CollectionID,
"dataset_id": datasetID,
Expand All @@ -50,7 +58,7 @@ func (h *ContentPublished) handleDatasetDataType(ctx context.Context, cpEvent *m
})

// Map data returned by Dataset to the kafka Event structure, including Cantabular fields
if err := searchDataImport.MapDatasetMetadataValues(ctx, &datasetMetadataPublished); err != nil {
if err := searchDataImport.MapDatasetMetadataValues(ctx, &dataset, &datasetMetadataPublished); err != nil {
return fmt.Errorf("failed to map dataset metadata values :%w", err)
}

Expand Down
29 changes: 26 additions & 3 deletions handler/datasets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
)

func TestHandleDatasetDataTypeErrors(t *testing.T) {
Convey("Given an empty handler and a ContentPbulished event with a malformed URI", t, func() {
Convey("Given an empty handler and a ContentPublished event with a malformed URI", t, func() {
h := &ContentPublished{}
cpEvent := models.ContentPublished{
URI: "wrong%%uri",
Expand All @@ -28,8 +28,29 @@ func TestHandleDatasetDataTypeErrors(t *testing.T) {
})
})

Convey("Given a handler with a dataset api mock that fails to return metadata", t, func() {
Convey("Given a handler with a dataset api mock that fails to return a dataset", t, func() {
datasetMock := &clientMock.DatasetClientMock{
GetDatasetCurrentAndNextFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string) (dataset.Dataset, error) {
return dataset.Dataset{}, errors.New("dataset api error")
},
}
h := &ContentPublished{
DatasetCli: datasetMock,
Cfg: &config.Config{
ServiceAuthToken: "testToken",
},
}

Convey("Then the dataset handler fails with the expected error when a valid event is handled", func() {
err := h.handleDatasetDataType(ctx, &testDatasetEvent)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldEqual, "dataset api error")
})
})

Convey("Given a handler with a dataset api mock that successfully returns a dataset but fails to return metadata", t, func() {
datasetMock := &clientMock.DatasetClientMock{
GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc,
GetVersionMetadataFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (dataset.Metadata, error) {
return dataset.Metadata{}, errors.New("dataset api error")
},
Expand All @@ -50,6 +71,7 @@ func TestHandleDatasetDataTypeErrors(t *testing.T) {

Convey("Given a handler with a dataset api mock that returns metadata with a malformed latest version link", t, func() {
datasetMock := &clientMock.DatasetClientMock{
GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc,
GetVersionMetadataFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (dataset.Metadata, error) {
return dataset.Metadata{
DatasetLinks: dataset.Links{
Expand All @@ -76,7 +98,8 @@ func TestHandleDatasetDataTypeErrors(t *testing.T) {

Convey("Given a handler with a valid dataset api mock and a producer that fails to send a message", t, func() {
datasetMock := &clientMock.DatasetClientMock{
GetVersionMetadataFunc: getVersionMetadataFunc,
GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc,
GetVersionMetadataFunc: getVersionMetadataFunc,
}
producerMock := &kafkatest.IProducerMock{
SendFunc: func(schema *avro.Schema, event interface{}) error {
Expand Down
7 changes: 6 additions & 1 deletion handler/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ var (
data := mockDatasetAPIJSONResponse
return data, nil
}
getDatasetCurrentAndNextFunc = func(ctx context.Context, userAuthToken, serviceAuthToken, collectionID, datasetID string) (dataset.Dataset, error) {
return dataset.Dataset{}, nil
}

cfg, _ = config.Get()
)
Expand Down Expand Up @@ -131,7 +134,8 @@ func TestHandle(t *testing.T) {

Convey("Given an event handler with a working dataset api client and kafka producer", t, func() {
var datasetMock = &clientMock.DatasetClientMock{
GetVersionMetadataFunc: getVersionMetadataFunc,
GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc,
GetVersionMetadataFunc: getVersionMetadataFunc,
}
var producerMock = &kafkatest.IProducerMock{
SendFunc: func(schema *avro.Schema, event interface{}) error {
Expand Down Expand Up @@ -165,6 +169,7 @@ func TestHandle(t *testing.T) {

Convey("Given an event handler with a failing dataset api client", t, func() {
var datasetMock = &clientMock.DatasetClientMock{
GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc,
GetVersionMetadataFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (dataset.Metadata, error) {
return dataset.Metadata{}, errors.New("dataset api error")
},
Expand Down
18 changes: 9 additions & 9 deletions models/mapper_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ var PopulationTypes = map[string]string{
"UR": "All usual residents",
}

func (s *SearchDataImport) MapDatasetMetadataValues(ctx context.Context, metadata *dataset.Metadata) error {
func (s *SearchDataImport) MapDatasetMetadataValues(ctx context.Context, ds *dataset.Dataset, metadata *dataset.Metadata) error {
if metadata == nil {
return fmt.Errorf("nil metadata cannot be mapped")
}
Expand All @@ -63,20 +63,20 @@ func (s *SearchDataImport) MapDatasetMetadataValues(ctx context.Context, metadat
s.Keywords = *metadata.Keywords
}

s.PopulateCantabularFields(ctx, metadata)
s.PopulateCantabularFields(ctx, ds, metadata)

return nil
}

// PopulateCantabularFields checks if the provided dataset metadata corresponds to a Cantabular Data type,
// if it does, it populates the dimensions array of SearchDataImport with the dimension names, labels and processed labels,
// and assigns the population type corresponding to the 'IsBasedOn' id value.
func (s *SearchDataImport) PopulateCantabularFields(ctx context.Context, metadata *dataset.Metadata) {
if metadata.DatasetDetails.IsBasedOn == nil {
return // is_based_on not present in Dataset
func (s *SearchDataImport) PopulateCantabularFields(ctx context.Context, ds *dataset.Dataset, metadata *dataset.Metadata) {
if metadata == nil || ds == nil || ds.Current == nil || ds.Current.IsBasedOn == nil {
return // nil input or is_based_on not present in Dataset
}

t := metadata.DatasetDetails.IsBasedOn.Type
t := ds.Current.IsBasedOn.Type
if _, isCantabular := CantabularTypes[t]; !isCantabular {
return // Dataset type is not Cantabular
}
Expand All @@ -102,17 +102,17 @@ func (s *SearchDataImport) PopulateCantabularFields(ctx context.Context, metadat
})
}

popTypeLabel, ok := PopulationTypes[metadata.DatasetDetails.IsBasedOn.ID]
popTypeLabel, ok := PopulationTypes[ds.Current.IsBasedOn.ID]
if !ok {
log.Warn(ctx, "population type not identified",
log.Data{
"pop_type": metadata.DatasetDetails.IsBasedOn.ID,
"pop_type": ds.Current.IsBasedOn.ID,
"valid_types": PopulationTypes,
},
)
}
s.PopulationType = PopulationType{
Name: metadata.DatasetDetails.IsBasedOn.ID,
Name: ds.Current.IsBasedOn.ID,
Label: popTypeLabel,
}
}
Expand Down
Loading

0 comments on commit 6ec5b21

Please sign in to comment.