diff --git a/clients/datasetapi.go b/clients/datasetapi.go index e3863a8..9f78630 100644 --- a/clients/datasetapi.go +++ b/clients/datasetapi.go @@ -12,5 +12,6 @@ import ( // DatasetApiClient defines the zebedee client type DatasetClient interface { Checker(context.Context, *healthcheck.CheckState) error + GetDatasetCurrentAndNext(ctx context.Context, userAuthToken, serviceAuthToken, collectionID, datasetID string) (m datasetclient.Dataset, err error) GetVersionMetadata(ctx context.Context, userAuthToken, serviceAuthToken, collectionID, datasetID, edition, version string) (metadata datasetclient.Metadata, err error) } diff --git a/clients/mock/datasetapi.go b/clients/mock/datasetapi.go index fc2c4d3..7f656a2 100644 --- a/clients/mock/datasetapi.go +++ b/clients/mock/datasetapi.go @@ -24,6 +24,9 @@ var _ clients.DatasetClient = &DatasetClientMock{} // CheckerFunc: func(contextMoqParam context.Context, checkState *healthcheck.CheckState) error { // panic("mock out the Checker method") // }, +// GetDatasetCurrentAndNextFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string) (datasetclient.Dataset, error) { +// panic("mock out the GetDatasetCurrentAndNext method") +// }, // GetVersionMetadataFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (datasetclient.Metadata, error) { // panic("mock out the GetVersionMetadata method") // }, @@ -37,6 +40,9 @@ type DatasetClientMock struct { // CheckerFunc mocks the Checker method. CheckerFunc func(contextMoqParam context.Context, checkState *healthcheck.CheckState) error + // GetDatasetCurrentAndNextFunc mocks the GetDatasetCurrentAndNext method. + GetDatasetCurrentAndNextFunc func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string) (datasetclient.Dataset, error) + // GetVersionMetadataFunc mocks the GetVersionMetadata method. GetVersionMetadataFunc func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (datasetclient.Metadata, error) @@ -49,6 +55,19 @@ type DatasetClientMock struct { // CheckState is the checkState argument value. CheckState *healthcheck.CheckState } + // GetDatasetCurrentAndNext holds details about calls to the GetDatasetCurrentAndNext method. + GetDatasetCurrentAndNext []struct { + // Ctx is the ctx argument value. + Ctx context.Context + // UserAuthToken is the userAuthToken argument value. + UserAuthToken string + // ServiceAuthToken is the serviceAuthToken argument value. + ServiceAuthToken string + // CollectionID is the collectionID argument value. + CollectionID string + // DatasetID is the datasetID argument value. + DatasetID string + } // GetVersionMetadata holds details about calls to the GetVersionMetadata method. GetVersionMetadata []struct { // Ctx is the ctx argument value. @@ -67,8 +86,9 @@ type DatasetClientMock struct { Version string } } - lockChecker sync.RWMutex - lockGetVersionMetadata sync.RWMutex + lockChecker sync.RWMutex + lockGetDatasetCurrentAndNext sync.RWMutex + lockGetVersionMetadata sync.RWMutex } // Checker calls CheckerFunc. @@ -107,6 +127,54 @@ func (mock *DatasetClientMock) CheckerCalls() []struct { return calls } +// GetDatasetCurrentAndNext calls GetDatasetCurrentAndNextFunc. +func (mock *DatasetClientMock) GetDatasetCurrentAndNext(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string) (datasetclient.Dataset, error) { + if mock.GetDatasetCurrentAndNextFunc == nil { + panic("DatasetClientMock.GetDatasetCurrentAndNextFunc: method is nil but DatasetClient.GetDatasetCurrentAndNext was just called") + } + callInfo := struct { + Ctx context.Context + UserAuthToken string + ServiceAuthToken string + CollectionID string + DatasetID string + }{ + Ctx: ctx, + UserAuthToken: userAuthToken, + ServiceAuthToken: serviceAuthToken, + CollectionID: collectionID, + DatasetID: datasetID, + } + mock.lockGetDatasetCurrentAndNext.Lock() + mock.calls.GetDatasetCurrentAndNext = append(mock.calls.GetDatasetCurrentAndNext, callInfo) + mock.lockGetDatasetCurrentAndNext.Unlock() + return mock.GetDatasetCurrentAndNextFunc(ctx, userAuthToken, serviceAuthToken, collectionID, datasetID) +} + +// GetDatasetCurrentAndNextCalls gets all the calls that were made to GetDatasetCurrentAndNext. +// Check the length with: +// +// len(mockedDatasetClient.GetDatasetCurrentAndNextCalls()) +func (mock *DatasetClientMock) GetDatasetCurrentAndNextCalls() []struct { + Ctx context.Context + UserAuthToken string + ServiceAuthToken string + CollectionID string + DatasetID string +} { + var calls []struct { + Ctx context.Context + UserAuthToken string + ServiceAuthToken string + CollectionID string + DatasetID string + } + mock.lockGetDatasetCurrentAndNext.RLock() + calls = mock.calls.GetDatasetCurrentAndNext + mock.lockGetDatasetCurrentAndNext.RUnlock() + return calls +} + // GetVersionMetadata calls GetVersionMetadataFunc. func (mock *DatasetClientMock) GetVersionMetadata(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (datasetclient.Metadata, error) { if mock.GetVersionMetadataFunc == nil { diff --git a/features/publish_data_dataset.feature b/features/publish_data_dataset.feature index 340a6f9..a0528f0 100644 --- a/features/publish_data_dataset.feature +++ b/features/publish_data_dataset.feature @@ -4,6 +4,12 @@ Feature: Data extractor should listen to the relevant topic and publish extracte Scenario: When searching for the extracted dataset generic metadata I get the expected result Given dp-dataset-api is healthy And zebedee is healthy + And the following dataset with dataset-id "cphi01" is available in dp-dataset-api + """ + { + "id": "cphi01" + } + """ And the following metadata with dataset-id "cphi01", edition "timeseries" and version "version" is available in dp-dataset-api """ { @@ -39,13 +45,20 @@ Feature: Data extractor should listen to the relevant topic and publish extracte Scenario: "When searching for the extracted dataset cantabular-type metadata I get the expected result" Given dp-dataset-api is healthy And zebedee is healthy + And the following dataset with dataset-id "my-cantabular-dataset" is available in dp-dataset-api + """ + { + "current": { + "is_based_on": { + "@type": "cantabular_flexible_table", + "@id": "UR_HH" + } + } + } + """ And the following metadata with dataset-id "my-cantabular-dataset", edition "my-edition" and version "my-version" is available in dp-dataset-api """ { - "is_based_on": { - "@type": "cantabular_flexible_table", - "@id": "UR_HH" - }, "release_date": "releasedate", "title": "title", "description": "description", diff --git a/features/steps/steps.go b/features/steps/steps.go index ab311ee..6ad04ac 100644 --- a/features/steps/steps.go +++ b/features/steps/steps.go @@ -21,6 +21,7 @@ func (c *Component) RegisterSteps(ctx *godog.ScenarioContext) { ctx.Step(`^zebedee is healthy`, c.zebedeeIsHealthy) ctx.Step(`^zebedee is unhealthy`, c.zebedeeIsUnhealthy) ctx.Step(`^the following published data for uri "([^"]*)" is available in zebedee$`, c.theFollowingZebedeeResponseIsAvailable) + ctx.Step(`^the following dataset with dataset-id "([^"]*)" is available in dp-dataset-api$`, c.theFollowingDatasetResponseIsAvailable) ctx.Step(`^the following metadata with dataset-id "([^"]*)", edition "([^"]*)" and version "([^"]*)" is available in dp-dataset-api$`, c.theFollowingDatasetMetadataResponseIsAvailable) ctx.Step(`^this content-updated event is queued, to be consumed$`, c.thisContentUpdatedEventIsQueued) ctx.Step(`^no search-data-import events are produced`, c.noEventsAreProduced) @@ -85,8 +86,20 @@ func (c *Component) theFollowingZebedeeResponseIsAvailable(uriString string, zeb return nil } +// theFollowingDatasetResponseIsAvailable generate a mocked response for dataset API +// GET /datasets/{id} with the provided metadata response +func (c *Component) theFollowingDatasetResponseIsAvailable(id string, ds *godog.DocString) error { + c.DatasetAPI.NewHandler(). + Get(fmt.Sprintf("/datasets/%s", id)). + Reply(http.StatusOK). + BodyString(ds.Content). + AddHeader("Etag", c.testETag) + + return nil +} + // theFollowingDatasetMetadataResponseIsAvailable generate a mocked response for dataset API -// GET /dataset/{id}/editions/{edition}/versions/{version}/metadata with the provided metadata response +// GET /datasets/{id}/editions/{edition}/versions/{version}/metadata with the provided metadata response func (c *Component) theFollowingDatasetMetadataResponseIsAvailable(id, edition, version string, metadata *godog.DocString) error { c.DatasetAPI.NewHandler(). Get(fmt.Sprintf("/datasets/%s/editions/%s/versions/%s/metadata", id, edition, version)). diff --git a/handler/datasets.go b/handler/datasets.go index f76174d..1b5a181 100644 --- a/handler/datasets.go +++ b/handler/datasets.go @@ -36,12 +36,20 @@ func (h *ContentPublished) handleDatasetDataType(ctx context.Context, cpEvent *m DataType: DefaultType, } - // Make a call to DatasetAPI + // Call DatasetAPI to get dataset, with is_based_on + dataset, err := h.DatasetCli.GetDatasetCurrentAndNext(ctx, "", h.Cfg.ServiceAuthToken, cpEvent.CollectionID, datasetID) + if err != nil { + log.Error(ctx, "cannot get dataset from dataset api: %s", err) + return err + } + + // Call DatasetAPI to obtain metadata datasetMetadataPublished, err := h.DatasetCli.GetVersionMetadata(ctx, "", h.Cfg.ServiceAuthToken, cpEvent.CollectionID, datasetID, edition, version) if err != nil { log.Error(ctx, "cannot get dataset published contents version %s from api", err) return err } + log.Info(ctx, "successfully obtained metadata from dataset api", log.Data{ "collection_id": cpEvent.CollectionID, "dataset_id": datasetID, @@ -50,7 +58,7 @@ func (h *ContentPublished) handleDatasetDataType(ctx context.Context, cpEvent *m }) // Map data returned by Dataset to the kafka Event structure, including Cantabular fields - if err := searchDataImport.MapDatasetMetadataValues(ctx, &datasetMetadataPublished); err != nil { + if err := searchDataImport.MapDatasetMetadataValues(ctx, &dataset, &datasetMetadataPublished); err != nil { return fmt.Errorf("failed to map dataset metadata values :%w", err) } diff --git a/handler/datasets_test.go b/handler/datasets_test.go index f08d496..df6485e 100644 --- a/handler/datasets_test.go +++ b/handler/datasets_test.go @@ -15,7 +15,7 @@ import ( ) func TestHandleDatasetDataTypeErrors(t *testing.T) { - Convey("Given an empty handler and a ContentPbulished event with a malformed URI", t, func() { + Convey("Given an empty handler and a ContentPublished event with a malformed URI", t, func() { h := &ContentPublished{} cpEvent := models.ContentPublished{ URI: "wrong%%uri", @@ -28,8 +28,29 @@ func TestHandleDatasetDataTypeErrors(t *testing.T) { }) }) - Convey("Given a handler with a dataset api mock that fails to return metadata", t, func() { + Convey("Given a handler with a dataset api mock that fails to return a dataset", t, func() { datasetMock := &clientMock.DatasetClientMock{ + GetDatasetCurrentAndNextFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string) (dataset.Dataset, error) { + return dataset.Dataset{}, errors.New("dataset api error") + }, + } + h := &ContentPublished{ + DatasetCli: datasetMock, + Cfg: &config.Config{ + ServiceAuthToken: "testToken", + }, + } + + Convey("Then the dataset handler fails with the expected error when a valid event is handled", func() { + err := h.handleDatasetDataType(ctx, &testDatasetEvent) + So(err, ShouldNotBeNil) + So(err.Error(), ShouldEqual, "dataset api error") + }) + }) + + Convey("Given a handler with a dataset api mock that successfully returns a dataset but fails to return metadata", t, func() { + datasetMock := &clientMock.DatasetClientMock{ + GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc, GetVersionMetadataFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (dataset.Metadata, error) { return dataset.Metadata{}, errors.New("dataset api error") }, @@ -50,6 +71,7 @@ func TestHandleDatasetDataTypeErrors(t *testing.T) { Convey("Given a handler with a dataset api mock that returns metadata with a malformed latest version link", t, func() { datasetMock := &clientMock.DatasetClientMock{ + GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc, GetVersionMetadataFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (dataset.Metadata, error) { return dataset.Metadata{ DatasetLinks: dataset.Links{ @@ -76,7 +98,8 @@ func TestHandleDatasetDataTypeErrors(t *testing.T) { Convey("Given a handler with a valid dataset api mock and a producer that fails to send a message", t, func() { datasetMock := &clientMock.DatasetClientMock{ - GetVersionMetadataFunc: getVersionMetadataFunc, + GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc, + GetVersionMetadataFunc: getVersionMetadataFunc, } producerMock := &kafkatest.IProducerMock{ SendFunc: func(schema *avro.Schema, event interface{}) error { diff --git a/handler/handler_test.go b/handler/handler_test.go index 538c1a3..56bfc0b 100644 --- a/handler/handler_test.go +++ b/handler/handler_test.go @@ -50,6 +50,9 @@ var ( data := mockDatasetAPIJSONResponse return data, nil } + getDatasetCurrentAndNextFunc = func(ctx context.Context, userAuthToken, serviceAuthToken, collectionID, datasetID string) (dataset.Dataset, error) { + return dataset.Dataset{}, nil + } cfg, _ = config.Get() ) @@ -131,7 +134,8 @@ func TestHandle(t *testing.T) { Convey("Given an event handler with a working dataset api client and kafka producer", t, func() { var datasetMock = &clientMock.DatasetClientMock{ - GetVersionMetadataFunc: getVersionMetadataFunc, + GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc, + GetVersionMetadataFunc: getVersionMetadataFunc, } var producerMock = &kafkatest.IProducerMock{ SendFunc: func(schema *avro.Schema, event interface{}) error { @@ -165,6 +169,7 @@ func TestHandle(t *testing.T) { Convey("Given an event handler with a failing dataset api client", t, func() { var datasetMock = &clientMock.DatasetClientMock{ + GetDatasetCurrentAndNextFunc: getDatasetCurrentAndNextFunc, GetVersionMetadataFunc: func(ctx context.Context, userAuthToken string, serviceAuthToken string, collectionID string, datasetID string, edition string, version string) (dataset.Metadata, error) { return dataset.Metadata{}, errors.New("dataset api error") }, diff --git a/models/mapper_dataset.go b/models/mapper_dataset.go index 1d283cb..931a1d7 100644 --- a/models/mapper_dataset.go +++ b/models/mapper_dataset.go @@ -41,7 +41,7 @@ var PopulationTypes = map[string]string{ "UR": "All usual residents", } -func (s *SearchDataImport) MapDatasetMetadataValues(ctx context.Context, metadata *dataset.Metadata) error { +func (s *SearchDataImport) MapDatasetMetadataValues(ctx context.Context, ds *dataset.Dataset, metadata *dataset.Metadata) error { if metadata == nil { return fmt.Errorf("nil metadata cannot be mapped") } @@ -63,7 +63,7 @@ func (s *SearchDataImport) MapDatasetMetadataValues(ctx context.Context, metadat s.Keywords = *metadata.Keywords } - s.PopulateCantabularFields(ctx, metadata) + s.PopulateCantabularFields(ctx, ds, metadata) return nil } @@ -71,12 +71,12 @@ func (s *SearchDataImport) MapDatasetMetadataValues(ctx context.Context, metadat // PopulateCantabularFields checks if the provided dataset metadata corresponds to a Cantabular Data type, // if it does, it populates the dimensions array of SearchDataImport with the dimension names, labels and processed labels, // and assigns the population type corresponding to the 'IsBasedOn' id value. -func (s *SearchDataImport) PopulateCantabularFields(ctx context.Context, metadata *dataset.Metadata) { - if metadata.DatasetDetails.IsBasedOn == nil { - return // is_based_on not present in Dataset +func (s *SearchDataImport) PopulateCantabularFields(ctx context.Context, ds *dataset.Dataset, metadata *dataset.Metadata) { + if metadata == nil || ds == nil || ds.Current == nil || ds.Current.IsBasedOn == nil { + return // nil input or is_based_on not present in Dataset } - t := metadata.DatasetDetails.IsBasedOn.Type + t := ds.Current.IsBasedOn.Type if _, isCantabular := CantabularTypes[t]; !isCantabular { return // Dataset type is not Cantabular } @@ -102,17 +102,17 @@ func (s *SearchDataImport) PopulateCantabularFields(ctx context.Context, metadat }) } - popTypeLabel, ok := PopulationTypes[metadata.DatasetDetails.IsBasedOn.ID] + popTypeLabel, ok := PopulationTypes[ds.Current.IsBasedOn.ID] if !ok { log.Warn(ctx, "population type not identified", log.Data{ - "pop_type": metadata.DatasetDetails.IsBasedOn.ID, + "pop_type": ds.Current.IsBasedOn.ID, "valid_types": PopulationTypes, }, ) } s.PopulationType = PopulationType{ - Name: metadata.DatasetDetails.IsBasedOn.ID, + Name: ds.Current.IsBasedOn.ID, Label: popTypeLabel, } } diff --git a/models/mapper_dataset_test.go b/models/mapper_dataset_test.go index 8f0430d..fba0743 100644 --- a/models/mapper_dataset_test.go +++ b/models/mapper_dataset_test.go @@ -42,10 +42,11 @@ func TestMapDatasetMetadataValues(t *testing.T) { }, }, } + ds := &dataset.Dataset{} Convey("Then all expected fields are mapped to a SearchDataImport model", func() { s := models.SearchDataImport{} - err := s.MapDatasetMetadataValues(ctx, metadata) + err := s.MapDatasetMetadataValues(ctx, ds, metadata) So(err, ShouldBeNil) So(s, ShouldResemble, models.SearchDataImport{ ReleaseDate: testReleaseDate, @@ -61,7 +62,7 @@ func TestMapDatasetMetadataValues(t *testing.T) { Convey("trying to map a nil metadata value returns the expected error", t, func() { s := models.SearchDataImport{} - err := s.MapDatasetMetadataValues(ctx, nil) + err := s.MapDatasetMetadataValues(ctx, nil, nil) So(err, ShouldNotBeNil) So(err.Error(), ShouldEqual, "nil metadata cannot be mapped") @@ -81,7 +82,7 @@ func TestMapDatasetMetadataValues(t *testing.T) { Convey("Then trying to map the values to a search data import event fails with the expected error", func() { s := models.SearchDataImport{} - err := s.MapDatasetMetadataValues(ctx, metadata) + err := s.MapDatasetMetadataValues(ctx, nil, metadata) So(err, ShouldNotBeNil) So(err.Error(), ShouldEqual, "error parsing the metadata uri: parse \"wrong£%$@\": invalid URL escape \"%$@\"") @@ -95,12 +96,13 @@ func TestMapDatasetMetadataValues(t *testing.T) { func TestPopulateCantabularFields(t *testing.T) { Convey("Given a dataset Metadata without is_based_on field", t, func() { metadata := &dataset.Metadata{} + ds := &dataset.Dataset{} Convey("When PopulateCantabularFields is called on a valid search data import struct", func() { s := &models.SearchDataImport{ Summary: testSummary, } - s.PopulateCantabularFields(ctx, metadata) + s.PopulateCantabularFields(ctx, ds, metadata) Convey("Then the search data import is not modified", func() { So(*s, ShouldResemble, models.SearchDataImport{ @@ -111,7 +113,8 @@ func TestPopulateCantabularFields(t *testing.T) { }) Convey("Given a dataset Metadata with is_based_on field, but a non-cantabular type", t, func() { - metadata := &dataset.Metadata{ + metadata := &dataset.Metadata{} + ds := &dataset.Dataset{ DatasetDetails: dataset.DatasetDetails{ IsBasedOn: &dataset.IsBasedOn{ Type: "non-cantabular", @@ -123,7 +126,7 @@ func TestPopulateCantabularFields(t *testing.T) { s := &models.SearchDataImport{ Summary: testSummary, } - s.PopulateCantabularFields(ctx, metadata) + s.PopulateCantabularFields(ctx, ds, metadata) Convey("Then the search data import is not modified", func() { So(*s, ShouldResemble, models.SearchDataImport{ @@ -137,11 +140,6 @@ func TestPopulateCantabularFields(t *testing.T) { areaTypeTrue := true areaTypeFalse := false metadata := &dataset.Metadata{ - DatasetDetails: dataset.DatasetDetails{ - IsBasedOn: &dataset.IsBasedOn{ - Type: "cantabular_flexible_table", - }, - }, Version: dataset.Version{ Dimensions: []dataset.VersionDimension{ {ID: "dim1", Label: "label 1 (10 categories)"}, @@ -151,15 +149,22 @@ func TestPopulateCantabularFields(t *testing.T) { }, }, } + ds := &dataset.Dataset{ + Current: &dataset.DatasetDetails{ + IsBasedOn: &dataset.IsBasedOn{ + Type: "cantabular_flexible_table", + }, + }, + } Convey("When PopulateCantabularFields is called on a valid search data import struct", func() { - s := &models.SearchDataImport{ + s := models.SearchDataImport{ Summary: testSummary, } - s.PopulateCantabularFields(ctx, metadata) + s.PopulateCantabularFields(ctx, ds, metadata) Convey("Then only the non-area-type dimensions are populated, with the expected values", func() { - So(*s, ShouldResemble, models.SearchDataImport{ + So(s, ShouldResemble, models.SearchDataImport{ Summary: testSummary, DataType: "cantabular_flexible_table", Dimensions: []models.Dimension{ @@ -167,14 +172,16 @@ func TestPopulateCantabularFields(t *testing.T) { {Name: "dim2", RawLabel: "label 2 (12 Categories)", Label: "label 2"}, {Name: "dim4", RawLabel: "label 4 (1 category)", Label: "label 4"}, }, + PopulationType: models.PopulationType{}, }) }) }) }) Convey("Given a dataset metadata with is_based_on field with a cantabular type and a valid population type", t, func() { - metadata := &dataset.Metadata{ - DatasetDetails: dataset.DatasetDetails{ + metadata := &dataset.Metadata{} + ds := &dataset.Dataset{ + Current: &dataset.DatasetDetails{ IsBasedOn: &dataset.IsBasedOn{ ID: "UR_HH", Type: "cantabular_flexible_table", @@ -186,7 +193,7 @@ func TestPopulateCantabularFields(t *testing.T) { s := &models.SearchDataImport{ Summary: testSummary, } - s.PopulateCantabularFields(ctx, metadata) + s.PopulateCantabularFields(ctx, ds, metadata) Convey("Then the expected population type fields are populated", func() { So(*s, ShouldResemble, models.SearchDataImport{