diff --git a/CHANGELOG.md b/CHANGELOG.md index 44cf8ff..ef4b23b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,18 @@ All notable changes to this project will be documented in this file. ## [Unreleased] + + +## [1.1.0] - 2021-06-22 ### Added -- Query ORCID and filter for current employees. +- query ORCID for person and current employees at ROR organization +- refactor all queries to follow three steps: source, extraction, mapping to vivo-rdf +- check GraphQL queries for response code 200 and additionally if response body contains error messages (no data) +- make all controller use HTTP-Get requests +- centralize input validation +- export to VIVO in chunks +- simplify error handling +- document methods in swagger-UI and remove response section ## [Renamed Project] - 2021-05-25 As new datsources were integrated and the name datacitecommons2vivo was not reflecting diff --git a/DockerfileBuild b/DockerfileBuild index 75f2c97..6b63768 100644 --- a/DockerfileBuild +++ b/DockerfileBuild @@ -3,7 +3,7 @@ FROM maven:3.6.3-jdk-11-slim AS build-env WORKDIR /app COPY ./pom.xml ./pom.xml COPY ./src ./src -RUN mvn clean install -Dmaven.test.skip=true && cp target/*.jar app.jar +RUN mvn clean install -Dmaven.test.skip=true && cp target/generate2vivo-*.jar app.jar #-------- NEXT STAGE ----------- diff --git a/README.md b/README.md index 394f3dd..b53af80 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,43 @@ [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) ## generate2vivo -generate2vivo is a Data Ingest Tool for the Open-Source-Software VIVO. -It queries metadata from the Datacite Commons PID-Graph and the ROR API, -maps it to the VIVO ontology using [sparql-generate](https://ci.mines-stetienne.fr/sparql-generate/index.html) and optionally imports it into a VIVO instance. +generate2vivo is an extensible Data Ingest Tool for the Open-Source-Software VIVO. +It currently queries metadata from Datacite Commons, ROR and ORCID +and maps them to the VIVO ontology using [sparql-generate](https://ci.mines-stetienne.fr/sparql-generate/index.html). +The resulting RDF data can be exported to a VIVO instance directly or returned in a HTTP response. +- [Available queries](#available-queries) + + [Datacite Commons](#datacite-commons) + + [ROR](#ror) + + [ORCID](#orcid) - [Installation](#installation) -- [Usage](#usage) - + [Datacite Commons](#datacite-commons) - + [ROR](#ror) - [Run in Command Line](#run-in-command-line) - [Extensible](#extensible) +### Available queries +The datasources and queries that are currently available are listed below. + +##### Datacite Commons +For Datacite Commons the following queries are available: +* `organization` : This method gets data about an organization by passing a ROR id. +* `organizationPlusPeople`: This method gets data about an organization and its affiliated people by passing a ROR id. +* `organizationPlusPeoplePlusPublications`:This method gets data about an organization and its affiliated people and their respective publications by passing a ROR id. +* `person`: This method gets data about a person by passing an ORCID id. +* `personPlusPublications`: This method gets data about a person and their publications by passing an ORCID id. +* `work`: This method gets data about a work by passing an DOI. + +##### ROR +For ROR there are 2 queries available: +* `organization`: This method gets data about an organization by passing a ROR id. +* `organizationPlusChildren`: This method gets data about an organization and all their sub-organizations by passing a ROR id. + +##### ORCID +For ORCID the following queries are available: +* `personPlusWorks`: This method gets data about a person and their works by passing an ORCID id. +* `currentEmployeesPlusWorks`: This method gets data about an organization's current employees and their works by passing a ROR id. + + + ### Installation 1. Clone the repository to a local folder using `git clone https://github.com/vivo-community/generate2vivo.git` 2. Change into the folder where the repository has been cloned. @@ -33,32 +59,16 @@ maps it to the VIVO ontology using [sparql-generate](https://ci.mines-stetienne. 5. A minimal swagger-ui will be available at `http://localhost:9000/swagger-ui/`. -### Usage -Go to `http://localhost:9000/swagger-ui/` in your browser and choose Datacite Commons or ROR as a data source. - -##### Datacite Commons -For Datacite Commons there are 2 queries available: -* `getOrganizationPlusPersons`: Queries Datacite Commons for the organization and its affiliated people. -* `getPersonPlusPublications`: Queries Datacite Commons for the person and its affiliated publications. - -##### ROR -For ROR there are 2 queries available: -* `getOrganization`: Queries ROR for the organization. -* `getOrganizationPlusChildren`: Queries ROR for the organization and all of its sub-organizations recursively. - -The program will return a 200 Status, if the data was imported to VIVO or if you chose not to provide your VIVO details, -it will return the RDF-data as a result in format JSON-LD. - ### Run in Command Line Alternatively you can run the queries from the command line using the sparql-generate executable JAR-file. All queries are placed in folder `src/main/resources/sparqlg` and come with a `sparql-generate-conf.json`. Its structure and use are explained in detail on the [sparql-generate website](https://ci.mines-stetienne.fr/sparql-generate/language-cli.html). ### Extensible -The software is easily extensible, meaning you can add and remove datasources without touching the code. +The software is easily extensible, meaning you can add and remove datasources. For example, if you are not interested in using Datacite Commons, just remove the folder from `src/main/resources/sparqlg` -and the respective controller in the package `eu.tib.controller` and it's gone. +and the respective controller in the package `eu.tib.controller`. On the other hand, if you would like to add a datasource: * add a folder with your queries under `src/main/resources/sparqlg` and include a `sparql-generate-conf.json` diff --git a/pom.xml b/pom.xml index ff8e0a0..5120aa2 100644 --- a/pom.xml +++ b/pom.xml @@ -13,8 +13,8 @@ eu.tib generate2vivo - 1.0.0 - Data Ingest from different datasources like Datacite Commons or ROR to VIVO + 1.1.0 + Extensible Data Ingest Tool for VIVO. Contains data sources like Datacite Commons, ORCID and ROR. UTF-8 diff --git a/src/main/java/eu/tib/controller/DataciteCommonsController.java b/src/main/java/eu/tib/controller/DataciteCommonsController.java index f15a256..901bd54 100644 --- a/src/main/java/eu/tib/controller/DataciteCommonsController.java +++ b/src/main/java/eu/tib/controller/DataciteCommonsController.java @@ -1,17 +1,16 @@ package eu.tib.controller; +import eu.tib.controller.validation.InputValidator; import eu.tib.service.ResponseService; import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiParam; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.ResponseEntity; import org.springframework.util.StopWatch; import org.springframework.validation.annotation.Validated; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.bind.annotation.*; import javax.validation.Valid; import javax.validation.constraints.Pattern; @@ -27,9 +26,29 @@ public class DataciteCommonsController { @Autowired private ResponseService responseService; - @PostMapping(value = "/getOrganizationPlusPersons", produces = "application/json") - public ResponseEntity getOrganizationPlusPersons( - @Valid @Pattern(regexp = "^https://ror.org/\\d{2}[a-z0-9]{5}\\d{2}") + @ApiOperation(value = "Retrieve organization data from Datacite Commons", notes = "This method gets data about an organization from Datacite Commons by passing a ROR id.") + @GetMapping(value = "/organization", produces = "application/json") + public ResponseEntity getOrganization( + @Valid @Pattern(regexp = InputValidator.ror) + @ApiParam("Complete ROR URL consisting of https://ror.org/ plus id") + @RequestParam String ror) { + + final String id = "sparqlg/datacitecommons/organization"; + log.info("Incoming Request for " + id + " with ror: " + ror); + StopWatch stopWatch = new StopWatch(id); + stopWatch.start(id); + + ResponseEntity result = responseService.buildResponse(id, Collections.singletonMap("ror", ror)); + + stopWatch.stop(); + log.info(id + " took " + stopWatch.getTotalTimeSeconds() + "s"); + return result; + } + + @ApiOperation(value = "Retrieve data about an organization and its affiliated people from Datacite Commons", notes = "This method gets data about an organization and its affiliated people from Datacite Commons by passing a ROR id.") + @GetMapping(value = "/organizationPlusPeople", produces = "application/json") + public ResponseEntity getOrganizationPlusPeople( + @Valid @Pattern(regexp = InputValidator.ror) @ApiParam("Complete ROR URL consisting of https://ror.org/ plus id") @RequestParam String ror) { @@ -44,10 +63,48 @@ public ResponseEntity getOrganizationPlusPersons( log.info(id + " took " + stopWatch.getTotalTimeSeconds() + "s"); return result; } + @ApiOperation(value = "Retrieve data about an organization and its affiliated people and their respective publications from Datacite Commons", notes = "This method gets data about an organization and its affiliated people and their respective publications from Datacite Commons by passing a ROR id.") + @GetMapping(value = "/organizationPlusPeoplePlusPublications", produces = "application/json") + public ResponseEntity getOrganizationPlusPeoplePlusPublications( + @Valid @Pattern(regexp = InputValidator.ror) + @ApiParam("Complete ROR URL consisting of https://ror.org/ plus id") + @RequestParam String ror) { + + final String id = "sparqlg/datacitecommons/orga2person2publication"; + log.info("Incoming Request for " + id + " with ror: " + ror); + StopWatch stopWatch = new StopWatch(id); + stopWatch.start(id); - @PostMapping(value = "/getPersonPlusPublications", produces = "application/json") + ResponseEntity result = responseService.buildResponse(id, Collections.singletonMap("ror", ror)); + + stopWatch.stop(); + log.info(id + " took " + stopWatch.getTotalTimeSeconds() + "s"); + return result; + } + + @ApiOperation(value = "Retrieve data about a person from Datacite Commons", notes = "This method gets data about a person from Datacite Commons by passing an ORCID id.") + @GetMapping(value = "/person", produces = "application/json") + public ResponseEntity getPerson( + @Valid @Pattern(regexp = InputValidator.orcid) + @ApiParam("Complete Orcid URL consisting of https://orcid.org/ plus id") + @RequestParam String orcid) { + + final String id = "sparqlg/datacitecommons/person"; + log.info("Incoming Request for " + id + " with orcid: " + orcid); + StopWatch stopWatch = new StopWatch(id); + stopWatch.start(id); + + ResponseEntity result = responseService.buildResponse(id, Collections.singletonMap("orcid", orcid)); + + stopWatch.stop(); + log.info(id + " took " + stopWatch.getTotalTimeSeconds() + "s"); + return result; + } + + @ApiOperation(value = "Retrieve data about a person and their publications from Datacite Commons", notes = "This method gets data about a person and their publications from Datacite Commons by passing an ORCID id.") + @GetMapping(value = "/personPlusPublications", produces = "application/json") public ResponseEntity getPersonPlusPublications( - @Valid @Pattern(regexp = "^https://orcid.org/\\d{4}-\\d{4}-\\d{4}-\\d{4}") + @Valid @Pattern(regexp = InputValidator.orcid) @ApiParam("Complete Orcid URL consisting of https://orcid.org/ plus id") @RequestParam String orcid) { @@ -62,4 +119,23 @@ public ResponseEntity getPersonPlusPublications( log.info(id + " took " + stopWatch.getTotalTimeSeconds() + "s"); return result; } + + @ApiOperation(value = "Retrieve data about a work from Datacite Commons", notes = "This method gets data about a work from Datacite Commons by passing an DOI.") + @GetMapping(value = "/work", produces = "application/json") + public ResponseEntity getWork( + @Valid @Pattern(regexp = InputValidator.doi) + @ApiParam("DOI of the publication") + @RequestParam String doi) { + + final String id = "sparqlg/datacitecommons/work"; + log.info("Incoming Request for " + id + " with doi: " + doi); + StopWatch stopWatch = new StopWatch(id); + stopWatch.start(id); + + ResponseEntity result = responseService.buildResponse(id, Collections.singletonMap("doi", doi)); + + stopWatch.stop(); + log.info(id + " took " + stopWatch.getTotalTimeSeconds() + "s"); + return result; + } } diff --git a/src/main/java/eu/tib/controller/OrcidController.java b/src/main/java/eu/tib/controller/OrcidController.java index edfc149..6d9bc0f 100644 --- a/src/main/java/eu/tib/controller/OrcidController.java +++ b/src/main/java/eu/tib/controller/OrcidController.java @@ -1,17 +1,16 @@ package eu.tib.controller; +import eu.tib.controller.validation.InputValidator; import eu.tib.service.ResponseService; import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiParam; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.ResponseEntity; import org.springframework.util.StopWatch; import org.springframework.validation.annotation.Validated; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.bind.annotation.*; import javax.validation.Valid; import javax.validation.constraints.Pattern; @@ -27,18 +26,41 @@ public class OrcidController { @Autowired private ResponseService responseService; - @PostMapping(value = "/getCurrentEmployees", produces = "application/json") - public ResponseEntity getCurrentEmployees( - @Valid @Pattern(regexp = "^Q[1-9]\\d*$") - @ApiParam("Wikidata id for a research organization starting with Q") - @RequestParam String wikidata) { + @ApiOperation(value = "Retrieve data about a person and their works from ORCID", notes = "This method gets data about a person and their works from ORCID by passing an ORCID id.") + @GetMapping(value = "/personPlusWorks", produces = "application/json") + public ResponseEntity getPersonPlusWorks( + @Valid @Pattern(regexp = InputValidator.orcid) + @ApiParam("Complete Orcid URL consisting of https://orcid.org/ plus id") + @RequestParam String orcid) { + + final String id = "sparqlg/orcid/person"; + log.info("Incoming Request for " + id + " with orcid: " + orcid); + StopWatch stopWatch = new StopWatch(id); + stopWatch.start(id); + + String orcid_id = orcid.replaceFirst("https://orcid.org/",""); + ResponseEntity result = responseService.buildResponse(id, + Collections.singletonMap("orcid", orcid_id)); + + stopWatch.stop(); + log.info(id + " took " + stopWatch.getTotalTimeSeconds() + "s"); + return result; + } + + + @ApiOperation(value = "Retrieve data about an organization's current employees and their works from ORCID", notes = "This method gets data about an organization's current employees and their works from ORCID by passing a ROR id.") + @GetMapping(value = "/currentEmployeesPlusWorks", produces = "application/json") + public ResponseEntity getCurrentEmployeesPlusWorks( + @Valid @Pattern(regexp = InputValidator.ror) + @ApiParam("Complete ROR URL consisting of https://ror.org/ plus id") + @RequestParam String ror) { final String id = "sparqlg/orcid/employees"; - log.info("Incoming Request for " + id + " with wikidata: " + wikidata); + log.info("Incoming Request for " + id + " with ror: " + ror); StopWatch stopWatch = new StopWatch(id); stopWatch.start(id); - ResponseEntity result = responseService.buildResponse(id, Collections.singletonMap("wikidata", wikidata)); + ResponseEntity result = responseService.buildResponse(id, Collections.singletonMap("ror", ror)); stopWatch.stop(); log.info(id + " took " + stopWatch.getTotalTimeSeconds() + "s"); diff --git a/src/main/java/eu/tib/controller/RORController.java b/src/main/java/eu/tib/controller/RORController.java index 4cb9fbe..844f469 100644 --- a/src/main/java/eu/tib/controller/RORController.java +++ b/src/main/java/eu/tib/controller/RORController.java @@ -1,17 +1,16 @@ package eu.tib.controller; +import eu.tib.controller.validation.InputValidator; import eu.tib.service.ResponseService; import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiParam; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.ResponseEntity; import org.springframework.util.StopWatch; import org.springframework.validation.annotation.Validated; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.bind.annotation.*; import javax.validation.Valid; import javax.validation.constraints.Pattern; @@ -27,13 +26,14 @@ public class RORController { @Autowired private ResponseService responseService; - @PostMapping(value = "/getOrganizationPlusChildren", produces = "application/json") - public ResponseEntity getOrganizationPlusChildren( - @Valid @Pattern(regexp = "^https://ror.org/\\d{2}[a-z0-9]{5}\\d{2}") + @ApiOperation(value = "Retrieve data about an organization from ROR", notes = "This method gets data about an organization from ROR by passing a ROR id.") + @GetMapping(value = "/organization", produces = "application/json") + public ResponseEntity getOrganization( + @Valid @Pattern(regexp = InputValidator.ror) @ApiParam("Complete ROR URL consisting of https://ror.org/ plus id") @RequestParam String ror) { - final String id = "sparqlg/ror/orga2children"; + final String id = "sparqlg/ror/organization"; log.info("Incoming Request for " + id + " with ror: " + ror); StopWatch stopWatch = new StopWatch(id); stopWatch.start(id); @@ -45,13 +45,14 @@ public ResponseEntity getOrganizationPlusChildren( return result; } - @PostMapping(value = "/getOrganization", produces = "application/json") - public ResponseEntity getOrganization( - @Valid @Pattern(regexp = "^https://ror.org/\\d{2}[a-z0-9]{5}\\d{2}") + @ApiOperation(value = "Retrieve data about an organization and all their sub-organizations from ROR", notes = "This method gets data about an organization and all their sub-organizations from ROR by passing a ROR id.") + @GetMapping(value = "/organizationPlusChildren", produces = "application/json") + public ResponseEntity getOrganizationPlusChildren( + @Valid @Pattern(regexp = InputValidator.ror) @ApiParam("Complete ROR URL consisting of https://ror.org/ plus id") @RequestParam String ror) { - final String id = "sparqlg/ror/organization"; + final String id = "sparqlg/ror/orga2children"; log.info("Incoming Request for " + id + " with ror: " + ror); StopWatch stopWatch = new StopWatch(id); stopWatch.start(id); diff --git a/src/main/java/eu/tib/controller/RestExceptionHandler.java b/src/main/java/eu/tib/controller/RestExceptionHandler.java deleted file mode 100644 index bed691c..0000000 --- a/src/main/java/eu/tib/controller/RestExceptionHandler.java +++ /dev/null @@ -1,141 +0,0 @@ -package eu.tib.controller; - -import eu.tib.exception.*; -import lombok.extern.slf4j.Slf4j; -import org.springframework.core.Ordered; -import org.springframework.core.annotation.Order; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpStatus; -import org.springframework.http.ResponseEntity; -import org.springframework.http.converter.HttpMessageNotReadableException; -import org.springframework.http.converter.HttpMessageNotWritableException; -import org.springframework.web.HttpMediaTypeNotSupportedException; -import org.springframework.web.bind.MethodArgumentNotValidException; -import org.springframework.web.bind.MissingServletRequestParameterException; -import org.springframework.web.bind.annotation.ControllerAdvice; -import org.springframework.web.bind.annotation.ExceptionHandler; -import org.springframework.web.context.request.ServletWebRequest; -import org.springframework.web.context.request.WebRequest; -import org.springframework.web.method.annotation.MethodArgumentTypeMismatchException; -import org.springframework.web.servlet.NoHandlerFoundException; -import org.springframework.web.servlet.mvc.method.annotation.ResponseEntityExceptionHandler; - -import static org.springframework.http.HttpStatus.*; - -@Order(Ordered.HIGHEST_PRECEDENCE) -@ControllerAdvice -@Slf4j -public class RestExceptionHandler extends ResponseEntityExceptionHandler { - - @Override - protected ResponseEntity handleMissingServletRequestParameter( - MissingServletRequestParameterException ex, HttpHeaders headers, - HttpStatus status, WebRequest request) { - String error = ex.getParameterName() + " parameter is missing"; - return buildResponseEntity(new ApiError(BAD_REQUEST, error, ex)); - } - - @Override - protected ResponseEntity handleMethodArgumentNotValid( - MethodArgumentNotValidException ex, - HttpHeaders headers, - HttpStatus status, - WebRequest request) { - ApiError apiError = new ApiError(BAD_REQUEST); - apiError.setMessage("Validation error"); - apiError.addValidationErrors(ex.getBindingResult().getFieldErrors()); - apiError.addValidationError(ex.getBindingResult().getGlobalErrors()); - return buildResponseEntity(apiError); - } - - @Override - protected ResponseEntity handleHttpMediaTypeNotSupported( - HttpMediaTypeNotSupportedException ex, - HttpHeaders headers, - HttpStatus status, - WebRequest request) { - StringBuilder builder = new StringBuilder(); - builder.append(ex.getContentType()); - builder.append(" media type is not supported. Supported media types are "); - ex.getSupportedMediaTypes().forEach(t -> builder.append(t).append(", ")); - return buildResponseEntity(new ApiError(HttpStatus.UNSUPPORTED_MEDIA_TYPE, builder.substring(0, builder.length() - 2), ex)); - } - - @ExceptionHandler(VIVOExportException.class) - protected ResponseEntity handleEntityNotFound( - VIVOExportException ex) { - ApiError apiError = new ApiError(NOT_FOUND); - apiError.setMessage(ex.getMessage()); - return buildResponseEntity(apiError); - } - - @ExceptionHandler(SparqlExecutionException.class) - protected ResponseEntity handleEntityNotFound( - SparqlExecutionException ex) { - ApiError apiError = new ApiError(INTERNAL_SERVER_ERROR); - apiError.setMessage(ex.getMessage()); - return buildResponseEntity(apiError); - } - - @ExceptionHandler(SparqlParsingException.class) - protected ResponseEntity handleEntityNotFound( - SparqlParsingException ex) { - ApiError apiError = new ApiError(INTERNAL_SERVER_ERROR); - apiError.setMessage(ex.getMessage()); - return buildResponseEntity(apiError); - } - - @ExceptionHandler(ConfigLoadingException.class) - protected ResponseEntity handleEntityNotFound( - ConfigLoadingException ex) { - ApiError apiError = new ApiError(INTERNAL_SERVER_ERROR); - apiError.setMessage(ex.getMessage()); - return buildResponseEntity(apiError); - } - - @ExceptionHandler(StreamManagerException.class) - protected ResponseEntity handleEntityNotFound( - StreamManagerException ex) { - ApiError apiError = new ApiError(INTERNAL_SERVER_ERROR); - apiError.setMessage(ex.getMessage()); - return buildResponseEntity(apiError); - } - - @Override - protected ResponseEntity handleHttpMessageNotReadable(HttpMessageNotReadableException ex, HttpHeaders headers, HttpStatus status, WebRequest request) { - ServletWebRequest servletWebRequest = (ServletWebRequest) request; - log.info("{} to {}", servletWebRequest.getHttpMethod(), servletWebRequest.getRequest().getServletPath()); - String error = "Malformed JSON request"; - return buildResponseEntity(new ApiError(HttpStatus.BAD_REQUEST, error, ex)); - } - - @Override - protected ResponseEntity handleHttpMessageNotWritable(HttpMessageNotWritableException ex, HttpHeaders headers, HttpStatus status, WebRequest request) { - String error = "Error writing JSON output"; - return buildResponseEntity(new ApiError(HttpStatus.INTERNAL_SERVER_ERROR, error, ex)); - } - - @Override - protected ResponseEntity handleNoHandlerFoundException( - NoHandlerFoundException ex, HttpHeaders headers, HttpStatus status, WebRequest request) { - ApiError apiError = new ApiError(BAD_REQUEST); - apiError.setMessage(String.format("Could not find the %s method for URL %s", ex.getHttpMethod(), ex.getRequestURL())); - apiError.setDebugMessage(ex.getMessage()); - return buildResponseEntity(apiError); - } - - @ExceptionHandler(MethodArgumentTypeMismatchException.class) - protected ResponseEntity handleMethodArgumentTypeMismatch(MethodArgumentTypeMismatchException ex, - WebRequest request) { - ApiError apiError = new ApiError(BAD_REQUEST); - apiError.setMessage(String.format("The parameter '%s' of value '%s' could not be converted to type '%s'", ex.getName(), ex.getValue(), ex.getRequiredType().getSimpleName())); - apiError.setDebugMessage(ex.getMessage()); - return buildResponseEntity(apiError); - } - - - private ResponseEntity buildResponseEntity(ApiError apiError) { - return new ResponseEntity<>(apiError, apiError.getStatus()); - } - -} diff --git a/src/main/java/eu/tib/controller/validation/InputValidator.java b/src/main/java/eu/tib/controller/validation/InputValidator.java new file mode 100644 index 0000000..3b6c334 --- /dev/null +++ b/src/main/java/eu/tib/controller/validation/InputValidator.java @@ -0,0 +1,7 @@ +package eu.tib.controller.validation; + +public class InputValidator { + public static final String ror = "^https://ror.org/\\d{2}[a-z0-9]{5}\\d{2}$"; + public static final String orcid = "^https://orcid.org/\\d{4}-\\d{4}-\\d{4}-\\d{3}(\\d|X)$"; + public static final String doi = "^10\\.[0-9]{4}.*"; +} diff --git a/src/main/java/eu/tib/controller/validation/Validator.java b/src/main/java/eu/tib/controller/validation/Validator.java new file mode 100644 index 0000000..050ad84 --- /dev/null +++ b/src/main/java/eu/tib/controller/validation/Validator.java @@ -0,0 +1,6 @@ +package eu.tib.controller.validation; + +public class Validator { + public static final String ror = "^https://ror.org/\\d{2}[a-z0-9]{5}\\d{2}$"; + public static final String orcid = "^https://orcid.org/\\d{4}-\\d{4}-\\d{4}-\\d{3}(\\d|X)$"; +} diff --git a/src/main/java/eu/tib/exception/ApiError.java b/src/main/java/eu/tib/exception/ApiError.java deleted file mode 100644 index d1ad140..0000000 --- a/src/main/java/eu/tib/exception/ApiError.java +++ /dev/null @@ -1,88 +0,0 @@ -package eu.tib.exception; - -import com.fasterxml.jackson.annotation.JsonFormat; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver; -import lombok.Data; -import org.springframework.http.HttpStatus; -import org.springframework.validation.FieldError; -import org.springframework.validation.ObjectError; - -import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.List; - -@Data -@JsonTypeInfo(include = JsonTypeInfo.As.WRAPPER_OBJECT, use = JsonTypeInfo.Id.CUSTOM, property = "error", visible = true) -@JsonTypeIdResolver(LowerCaseClassNameResolver.class) -public -class ApiError { - - private HttpStatus status; - @JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "dd-MM-yyyy hh:mm:ss") - private LocalDateTime timestamp; - private String message; - private String debugMessage; - private List subErrors; - - private ApiError() { - timestamp = LocalDateTime.now(); - } - - public ApiError(HttpStatus status) { - this(); - this.status = status; - } - - public ApiError(HttpStatus status, Throwable ex) { - this(); - this.status = status; - this.message = "Unexpected error"; - this.debugMessage = ex.getLocalizedMessage(); - } - - public ApiError(HttpStatus status, String message, Throwable ex) { - this(); - this.status = status; - this.message = message; - this.debugMessage = ex.getLocalizedMessage(); - } - - private void addSubError(ApiSubError subError) { - if (subErrors == null) { - subErrors = new ArrayList<>(); - } - subErrors.add(subError); - } - - private void addValidationError(String object, String field, Object rejectedValue, String message) { - addSubError(new ApiValidationError(object, field, rejectedValue, message)); - } - - private void addValidationError(String object, String message) { - addSubError(new ApiValidationError(object, message)); - } - - private void addValidationError(FieldError fieldError) { - this.addValidationError( - fieldError.getObjectName(), - fieldError.getField(), - fieldError.getRejectedValue(), - fieldError.getDefaultMessage()); - } - - public void addValidationErrors(List fieldErrors) { - fieldErrors.forEach(this::addValidationError); - } - - private void addValidationError(ObjectError objectError) { - this.addValidationError( - objectError.getObjectName(), - objectError.getDefaultMessage()); - } - - public void addValidationError(List globalErrors) { - globalErrors.forEach(this::addValidationError); - } -} - diff --git a/src/main/java/eu/tib/exception/ApiSubError.java b/src/main/java/eu/tib/exception/ApiSubError.java deleted file mode 100644 index 0fa9a5c..0000000 --- a/src/main/java/eu/tib/exception/ApiSubError.java +++ /dev/null @@ -1,5 +0,0 @@ -package eu.tib.exception; - -public abstract class ApiSubError { - -} \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/ApiValidationError.java b/src/main/java/eu/tib/exception/ApiValidationError.java deleted file mode 100644 index b4a61fa..0000000 --- a/src/main/java/eu/tib/exception/ApiValidationError.java +++ /dev/null @@ -1,20 +0,0 @@ -package eu.tib.exception; - -import lombok.AllArgsConstructor; -import lombok.Data; -import lombok.EqualsAndHashCode; - -@Data -@EqualsAndHashCode(callSuper = false) -@AllArgsConstructor -public class ApiValidationError extends ApiSubError { - private String object; - private String field; - private Object rejectedValue; - private String message; - - ApiValidationError(String object, String message) { - this.object = object; - this.message = message; - } -} \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/ConfigLoadingException.java b/src/main/java/eu/tib/exception/ConfigLoadingException.java deleted file mode 100644 index 90215b1..0000000 --- a/src/main/java/eu/tib/exception/ConfigLoadingException.java +++ /dev/null @@ -1,31 +0,0 @@ -package eu.tib.exception; - -import org.apache.commons.lang3.StringUtils; - -import java.util.HashMap; -import java.util.Map; -import java.util.stream.IntStream; - -public class ConfigLoadingException extends RuntimeException { - - public ConfigLoadingException(Class clazz, String... searchParamsMap) { - super(ConfigLoadingException.generateMessage(clazz.getSimpleName(), toMap(String.class, String.class, searchParamsMap))); - } - - private static String generateMessage(String entity, Map searchParams) { - return StringUtils.capitalize(entity) + - " could not read FileConfigurations for parameters " + - searchParams; - } - - private static Map toMap( - Class keyType, Class valueType, Object... entries) { - if (entries.length % 2 == 1) - throw new IllegalArgumentException("Invalid entries"); - return IntStream.range(0, entries.length / 2).map(i -> i * 2) - .collect(HashMap::new, - (m, i) -> m.put(keyType.cast(entries[i]), valueType.cast(entries[i + 1])), - Map::putAll); - } - -} \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/ErrorResponse.java b/src/main/java/eu/tib/exception/ErrorResponse.java new file mode 100644 index 0000000..6dd77a3 --- /dev/null +++ b/src/main/java/eu/tib/exception/ErrorResponse.java @@ -0,0 +1,39 @@ +package eu.tib.exception; + +import com.fasterxml.jackson.annotation.JsonFormat; +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.Setter; + +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +@Getter +@Setter +@RequiredArgsConstructor +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ErrorResponse { + private final int status; + private final String message; + @JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "dd-MM-yyyy hh:mm:ss") + private LocalDateTime timestamp = LocalDateTime.now(); + private List errors; + + public void addValidationError(String field, String message) { + if (Objects.isNull(errors)) { + errors = new ArrayList<>(); + } + errors.add(new ValidationError(field, message)); + } + + @Getter + @Setter + @RequiredArgsConstructor + private static class ValidationError { + private final String field; + private final String message; + } +} \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/GeneralExceptionHandler.java b/src/main/java/eu/tib/exception/GeneralExceptionHandler.java new file mode 100644 index 0000000..0c105d0 --- /dev/null +++ b/src/main/java/eu/tib/exception/GeneralExceptionHandler.java @@ -0,0 +1,69 @@ +package eu.tib.exception; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.validation.FieldError; +import org.springframework.web.bind.MethodArgumentNotValidException; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.bind.annotation.RestControllerAdvice; +import org.springframework.web.context.request.WebRequest; +import org.springframework.web.servlet.mvc.method.annotation.ResponseEntityExceptionHandler; + +@Slf4j +@RestControllerAdvice +public class GeneralExceptionHandler extends ResponseEntityExceptionHandler { + + @ExceptionHandler(SparqlExecutionException.class) + public ResponseEntity handleSparqlExecutionException(SparqlExecutionException ex, WebRequest request) { + return buildErrorResponse(ex, HttpStatus.INTERNAL_SERVER_ERROR, request); + } + + @ExceptionHandler(VIVOExportException.class) + public ResponseEntity handleVIVOExportException(VIVOExportException ex, WebRequest request) { + return buildErrorResponse(ex, HttpStatus.SERVICE_UNAVAILABLE, request); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity handleAllUncaughtException(Exception exception, WebRequest request) { + return buildErrorResponse(exception, "Unknown error occurred", HttpStatus.INTERNAL_SERVER_ERROR, request); + } + + private ResponseEntity buildErrorResponse(Exception exception, + HttpStatus httpStatus, + WebRequest request) { + return buildErrorResponse(exception, exception.getMessage(), httpStatus, request); + } + + private ResponseEntity buildErrorResponse(Exception exception, + String message, + HttpStatus httpStatus, + WebRequest request) { + ErrorResponse errorResponse = new ErrorResponse(httpStatus.value(), message); + return ResponseEntity.status(httpStatus).body(errorResponse); + } + + @Override + public ResponseEntity handleExceptionInternal( + Exception ex, + Object body, + HttpHeaders headers, + HttpStatus status, + WebRequest request) { + + return buildErrorResponse(ex, status, request); + } + + @Override + protected ResponseEntity handleMethodArgumentNotValid(MethodArgumentNotValidException ex, + HttpHeaders headers, + HttpStatus status, + WebRequest request) { + ErrorResponse errorResponse = new ErrorResponse(HttpStatus.UNPROCESSABLE_ENTITY.value(), "Validation error. Check 'errors' field for details."); + for (FieldError fieldError : ex.getBindingResult().getFieldErrors()) { + errorResponse.addValidationError(fieldError.getField(), fieldError.getDefaultMessage()); + } + return ResponseEntity.unprocessableEntity().body(errorResponse); + } +} \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/LowerCaseClassNameResolver.java b/src/main/java/eu/tib/exception/LowerCaseClassNameResolver.java deleted file mode 100644 index 4dcc1f4..0000000 --- a/src/main/java/eu/tib/exception/LowerCaseClassNameResolver.java +++ /dev/null @@ -1,22 +0,0 @@ -package eu.tib.exception; - -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase; - -public class LowerCaseClassNameResolver extends TypeIdResolverBase { - - @Override - public String idFromValue(Object value) { - return value.getClass().getSimpleName().toLowerCase(); - } - - @Override - public String idFromValueAndType(Object value, Class suggestedType) { - return idFromValue(value); - } - - @Override - public JsonTypeInfo.Id getMechanism() { - return JsonTypeInfo.Id.CUSTOM; - } -} \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/SparqlExecutionException.java b/src/main/java/eu/tib/exception/SparqlExecutionException.java index b7192b5..aa2ead0 100644 --- a/src/main/java/eu/tib/exception/SparqlExecutionException.java +++ b/src/main/java/eu/tib/exception/SparqlExecutionException.java @@ -1,31 +1,7 @@ package eu.tib.exception; -import org.apache.commons.lang3.StringUtils; - -import java.util.HashMap; -import java.util.Map; -import java.util.stream.IntStream; - public class SparqlExecutionException extends RuntimeException { - - public SparqlExecutionException(Class clazz, String... searchParamsMap) { - super(SparqlExecutionException.generateMessage(clazz.getSimpleName(), toMap(String.class, String.class, searchParamsMap))); - } - - private static String generateMessage(String entity, Map searchParams) { - return StringUtils.capitalize(entity) + - " could not execute SparQL-Generate for parameters " + - searchParams; - } - - private static Map toMap( - Class keyType, Class valueType, Object... entries) { - if (entries.length % 2 == 1) - throw new IllegalArgumentException("Invalid entries"); - return IntStream.range(0, entries.length / 2).map(i -> i * 2) - .collect(HashMap::new, - (m, i) -> m.put(keyType.cast(entries[i]), valueType.cast(entries[i + 1])), - Map::putAll); + public SparqlExecutionException(String message) { + super(message); } - } \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/SparqlParsingException.java b/src/main/java/eu/tib/exception/SparqlParsingException.java deleted file mode 100644 index 02a07f2..0000000 --- a/src/main/java/eu/tib/exception/SparqlParsingException.java +++ /dev/null @@ -1,31 +0,0 @@ -package eu.tib.exception; - -import org.apache.commons.lang3.StringUtils; - -import java.util.HashMap; -import java.util.Map; -import java.util.stream.IntStream; - -public class SparqlParsingException extends RuntimeException { - - public SparqlParsingException(Class clazz, String... searchParamsMap) { - super(SparqlParsingException.generateMessage(clazz.getSimpleName(), toMap(String.class, String.class, searchParamsMap))); - } - - private static String generateMessage(String entity, Map searchParams) { - return StringUtils.capitalize(entity) + - " could not execute SparQL-Generate for parameters " + - searchParams; - } - - private static Map toMap( - Class keyType, Class valueType, Object... entries) { - if (entries.length % 2 == 1) - throw new IllegalArgumentException("Invalid entries"); - return IntStream.range(0, entries.length / 2).map(i -> i * 2) - .collect(HashMap::new, - (m, i) -> m.put(keyType.cast(entries[i]), valueType.cast(entries[i + 1])), - Map::putAll); - } - -} \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/StreamManagerException.java b/src/main/java/eu/tib/exception/StreamManagerException.java deleted file mode 100644 index 787657e..0000000 --- a/src/main/java/eu/tib/exception/StreamManagerException.java +++ /dev/null @@ -1,31 +0,0 @@ -package eu.tib.exception; - -import org.apache.commons.lang3.StringUtils; - -import java.util.HashMap; -import java.util.Map; -import java.util.stream.IntStream; - -public class StreamManagerException extends RuntimeException { - - public StreamManagerException(Class clazz, String... searchParamsMap) { - super(StreamManagerException.generateMessage(clazz.getSimpleName(), toMap(String.class, String.class, searchParamsMap))); - } - - private static String generateMessage(String entity, Map searchParams) { - return StringUtils.capitalize(entity) + - " could not create StreamManager properly for parameters " + - searchParams; - } - - private static Map toMap( - Class keyType, Class valueType, Object... entries) { - if (entries.length % 2 == 1) - throw new IllegalArgumentException("Invalid entries"); - return IntStream.range(0, entries.length / 2).map(i -> i * 2) - .collect(HashMap::new, - (m, i) -> m.put(keyType.cast(entries[i]), valueType.cast(entries[i + 1])), - Map::putAll); - } - -} \ No newline at end of file diff --git a/src/main/java/eu/tib/exception/VIVOExportException.java b/src/main/java/eu/tib/exception/VIVOExportException.java index da1c254..3a49753 100644 --- a/src/main/java/eu/tib/exception/VIVOExportException.java +++ b/src/main/java/eu/tib/exception/VIVOExportException.java @@ -1,31 +1,7 @@ package eu.tib.exception; -import org.apache.commons.lang3.StringUtils; - -import java.util.HashMap; -import java.util.Map; -import java.util.stream.IntStream; - public class VIVOExportException extends RuntimeException { - - public VIVOExportException(Class clazz, String... searchParamsMap) { - super(VIVOExportException.generateMessage(clazz.getSimpleName(), toMap(String.class, String.class, searchParamsMap))); - } - - private static String generateMessage(String entity, Map searchParams) { - return StringUtils.capitalize(entity) + - " could not export data to VIVO for parameters " + - searchParams; - } - - private static Map toMap( - Class keyType, Class valueType, Object... entries) { - if (entries.length % 2 == 1) - throw new IllegalArgumentException("Invalid entries"); - return IntStream.range(0, entries.length / 2).map(i -> i * 2) - .collect(HashMap::new, - (m, i) -> m.put(keyType.cast(entries[i]), valueType.cast(entries[i + 1])), - Map::putAll); + public VIVOExportException(String message) { + super(message); } - } \ No newline at end of file diff --git a/src/main/java/eu/tib/service/GeneratePipeline.java b/src/main/java/eu/tib/service/GeneratePipeline.java index cdfdc21..2954009 100644 --- a/src/main/java/eu/tib/service/GeneratePipeline.java +++ b/src/main/java/eu/tib/service/GeneratePipeline.java @@ -2,10 +2,7 @@ import com.google.gson.Gson; import com.google.gson.JsonSyntaxException; -import eu.tib.exception.ConfigLoadingException; import eu.tib.exception.SparqlExecutionException; -import eu.tib.exception.SparqlParsingException; -import eu.tib.exception.StreamManagerException; import eu.tib.utils.ResourceUtils; import fr.mines_stetienne.ci.sparql_generate.FileConfigurations; import fr.mines_stetienne.ci.sparql_generate.SPARQLExt; @@ -69,7 +66,7 @@ public Model run(String confPath, Map input) { return output; } catch (Exception e) { log.error("Error while executing SPARQL-Generate.", e); - throw new SparqlExecutionException(GeneratePipeline.class); + throw new SparqlExecutionException("Error while executing SPARQL-Generate."); } } @@ -79,8 +76,8 @@ public FileConfigurations readConfig(String confFilePath) { FileConfigurations config = (new Gson()).fromJson(conf, FileConfigurations.class); return config; } catch (IOException | NullPointerException | JsonSyntaxException e) { - log.error("Error while reading the config file.", e); - throw new ConfigLoadingException(GeneratePipeline.class, "configFile", confFilePath); + log.error("Error while reading config file " + confFilePath, e); + throw new SparqlExecutionException("Error while reading config file " + confFilePath); } } @@ -92,10 +89,10 @@ public SPARQLExtQuery parseSparqlGenerateQuery(String queryPath, FileConfigurati return q; } catch (IOException | NullPointerException e) { log.error(String.format("No query file %s was found.", queryPath), e); - throw new SparqlParsingException(GeneratePipeline.class, "queryName", queryPath); + throw new SparqlExecutionException(String.format("No query file %s was found.", queryPath)); } catch (QueryException e) { log.error(String.format("Query %s could not be parsed.", queryPath), e); - throw new SparqlParsingException(GeneratePipeline.class, "queryName", queryPath); + throw new SparqlExecutionException(String.format("Query %s could not be parsed.", queryPath)); } } @@ -144,7 +141,7 @@ public SPARQLExtStreamManager prepareStreamManager(String confPath, FileConfigur mapper.addAltEntry(doc.uri, docpath); } catch (Exception e) { log.error(String.format("No named query was found at %s.", docpath), e); - throw new StreamManagerException(GeneratePipeline.class, "namedQuery", docpath); + throw new SparqlExecutionException(String.format("No named query was found at %s.", docpath)); } }); } @@ -156,7 +153,7 @@ public SPARQLExtStreamManager prepareStreamManager(String confPath, FileConfigur mapper.addAltEntry(doc.uri, docpath); } catch (Exception e) { log.error(String.format("No documentset was found at %s.", docpath), e); - throw new StreamManagerException(GeneratePipeline.class, "documentset", docpath); + throw new SparqlExecutionException(String.format("No documentset was found at %s.", docpath)); } }); } @@ -167,8 +164,8 @@ public SPARQLExtStreamManager prepareStreamManager(String confPath, FileConfigur try { mapper.addAltEntry(doc.uri, docpath); } catch (Exception e) { - log.error(String.format("No named graph was found at %s.", docpath), e); - throw new StreamManagerException(GeneratePipeline.class, "namedGraph", docpath); + log.error(String.format("No named graph was found at %s.", docpath),e); + throw new SparqlExecutionException(String.format("No named graph was found at %s.", docpath)); } }); } diff --git a/src/main/java/eu/tib/service/ResponseService.java b/src/main/java/eu/tib/service/ResponseService.java index dbe7ccb..fe7115b 100644 --- a/src/main/java/eu/tib/service/ResponseService.java +++ b/src/main/java/eu/tib/service/ResponseService.java @@ -22,23 +22,28 @@ public class ResponseService { @Autowired private VIVOProperties vivoProperties; - public ResponseEntity buildResponse(String queryName, Map input) { + public ResponseEntity buildResponse(String queryName, Map input) { log.info("Starting pipeline for " + queryName); GeneratePipeline pipeline = new GeneratePipeline(); Model result = pipeline.run(queryName, input); log.info("Finished pipeline for " + queryName); + if (result.isEmpty()) log.info("No data was generated."); + if (vivoProperties.isValid()) { log.info("Found VIVO properties"); vivoExport.exportData(result, vivoProperties); - return ResponseEntity.status(HttpStatus.OK).body("{\"status\":\"SPARQL update accepted.\"}"); + + String msg = (result.isEmpty()) ? "No data was generated." : "SPARQL update accepted."; + String statusJSON = String.format("{\"status\":\"%s\"}", msg); + return ResponseEntity.status(HttpStatus.OK).body(statusJSON); } else { log.info("Returning JSON-LD"); StringWriter stringWriter = new StringWriter(); result.write(stringWriter, "JSON-LD"); - String dataJson = stringWriter.toString(); - return ResponseEntity.status(HttpStatus.OK).body(dataJson); + String dataJSON = stringWriter.toString(); + return ResponseEntity.status(HttpStatus.OK).body(dataJSON); } } } diff --git a/src/main/java/eu/tib/storage/VIVOExport.java b/src/main/java/eu/tib/storage/VIVOExport.java index f93906e..e839b4d 100644 --- a/src/main/java/eu/tib/storage/VIVOExport.java +++ b/src/main/java/eu/tib/storage/VIVOExport.java @@ -3,6 +3,8 @@ import eu.tib.exception.VIVOExportException; import lombok.extern.slf4j.Slf4j; import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.StmtIterator; import org.springframework.stereotype.Repository; import java.io.ByteArrayOutputStream; @@ -18,7 +20,36 @@ @Repository public class VIVOExport { + private static final int CHUNK_SIZE = 2500; // triples per 'chunk' + public void exportData(Model data, VIVOProperties vivo) { + if (!data.isEmpty()) exportInChunks(data, vivo); + } + + /** + * method taken from https://github.com/WheatVIVO/datasources/blob/master/datasources/src/main/java/org/wheatinitiative/vivo/datasource/util/sparql/SparqlEndpoint.java + * and modified to send chunk and free it for garbage collection + **/ + public void exportInChunks(Model data, VIVOProperties vivo) { + StmtIterator sit = data.listStatements(); + int i = 0; + Model currentChunk = ModelFactory.createDefaultModel(); + + while (sit.hasNext()) { + currentChunk.add(sit.nextStatement()); + i++; + + if (i >= CHUNK_SIZE || !sit.hasNext()) { + send2VIVO(currentChunk, vivo); + //reset variables + currentChunk = ModelFactory.createDefaultModel(); + i = 0; + } + } + } + + public void send2VIVO(Model data, VIVOProperties vivo) { + log.info("Writing " + data.size() + " new statements to VIVO"); String sparqlInsertQuery = buildInsertQuery(data, vivo.getGraph()); log.debug("Sparql Insert Query: \n" + sparqlInsertQuery); @@ -59,15 +90,12 @@ public void send(HttpRequest request) { HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); if (response.statusCode() != 200) { - log.info("Error while exporting data to VIVO: " + response.statusCode()); - log.info(response.body()); - throw new VIVOExportException(VIVOExport.class, "errorCode", Integer.toString(response.statusCode()), - "errorMessage", response.body()); + log.error(response.body()); + throw new VIVOExportException("Error while exporting data to VIVO: " + response.statusCode()); } } catch (IOException | InterruptedException e) { - log.error("Error while exporting data to VIVO", e); - throw new VIVOExportException(VIVOExport.class, "error", e.getClass().getName()); + throw new VIVOExportException("Error while exporting data to VIVO"); } } } \ No newline at end of file diff --git a/src/main/java/eu/tib/swagger/SwaggerConfig.java b/src/main/java/eu/tib/swagger/SwaggerConfig.java index 34d6b91..9e4ddb2 100644 --- a/src/main/java/eu/tib/swagger/SwaggerConfig.java +++ b/src/main/java/eu/tib/swagger/SwaggerConfig.java @@ -19,6 +19,7 @@ public Docket productApi() { .apis( RequestHandlerSelectors.basePackage( "eu.tib" ) ) .paths(PathSelectors.any()) .build() + .useDefaultResponseMessages(false) .pathMapping("/") .apiInfo(apiInfo()); } @@ -26,8 +27,8 @@ public Docket productApi() { private ApiInfo apiInfo() { return new ApiInfoBuilder() .title("generate2vivo") - .description("Data Ingest from different datasources like Datacite Commons or ROR to VIVO") - .version("1.0.0") + .description("Extensible Data Ingest Tool for VIVO. Contains data sources like Datacite Commons, ORCID and ROR.") + .version("1.1.0") .build(); } } diff --git a/src/main/resources/sparqlg/datacitecommons/orga2person/documentset/orga2person.graphql b/src/main/resources/sparqlg/datacitecommons/orga2person/documentset/orga2person.graphql index 0a01754..3292bba 100644 --- a/src/main/resources/sparqlg/datacitecommons/orga2person/documentset/orga2person.graphql +++ b/src/main/resources/sparqlg/datacitecommons/orga2person/documentset/orga2person.graphql @@ -1,9 +1,6 @@ query orga2person($ror: ID!, $after: String) { organization(id: $ror) { - id - name - - people(first:100, after: $after) { + people(first:1000, after: $after) { pageInfo { endCursor hasNextPage diff --git a/src/main/resources/sparqlg/datacitecommons/orga2person/orga2person.rqg b/src/main/resources/sparqlg/datacitecommons/orga2person/orga2person.rqg index bc40b9a..f88e743 100644 --- a/src/main/resources/sparqlg/datacitecommons/orga2person/orga2person.rqg +++ b/src/main/resources/sparqlg/datacitecommons/orga2person/orga2person.rqg @@ -1,56 +1,26 @@ PREFIX iter: PREFIX fun: -PREFIX rdfs: -PREFIX foaf: -PREFIX obo: -PREFIX vcard: -PREFIX vivo: -PREFIX owl: -GENERATE (?ror, ?after) { - - <{?rorid}> a foaf:Organization ; - rdfs:label ?name . +GENERATE (?ror, ?orga_id, ?after) { +### map person data +### and for every person: create a connection to organization GENERATE { - <{?orcidid}-to-{?rorid}> a vivo:Position ; - rdfs:label "Unknown" ; - vivo:relates <{?orcidid}> ; - vivo:relates <{?rorid}> . - - <{?orcidid}> a foaf:Person ; - rdfs:label ?completeName ; - vivo:orcidId ; - obo:ARG_2000028 <{?orcid}-vcard-dc> . - - <{?orcid}> a owl:Thing ; - vivo:confirmedOrcidId <{?orcidid}> . - - <{?orcidid}-vcard> a vcard:Individual ; - obo:ARG_2000029 <{?orcidid}> ; - vcard:hasName <{?orcidid}-vcard-name> . + GENERATE ( ?person_data ) . - <{?orcidid}-vcard-name> a vcard:Name ; - vcard:familyName ?familyName ; - vcard:givenName ?givenName . + GENERATE (?person_id, ?orga_id, ?role, ?start_date, ?end_date) . } - ITERATOR iter:JSONPath(?orga, '$.people.nodes[*]') AS ?person + ITERATOR iter:JSONPath(?orga_data, '$.people.nodes[*]') AS ?person_data WHERE{ - # basic properties of a person in json - BIND(fun:JSONPath(?person, '$.id' ) AS ?orcid) - BIND(REPLACE(?orcid, "https://orcid.org/" , "" ) AS ?orcidid) - BIND(fun:JSONPath(?person, '$.givenName' ) AS ?givenName) - BIND(fun:JSONPath(?person, '$.familyName' ) AS ?familyName) - # processed properties - BIND(CONCAT(?familyName, ", ", ?givenName) AS ?completeName) - }. + BIND((?person_data) AS ?person_id) + } . - GENERATE{ - GENERATE ( ?ror, ?cursor ) . - } + +############################# call this query again if hasNextPage =true ############################################# + GENERATE ( ?ror, ?orga_id, ?cursor ) WHERE{ - BIND(fun:JSONPath(?orga, '$.people.pageInfo.hasNextPage' ) AS ?hasNextPage) - BIND(fun:JSONPath(?orga, '$.people.pageInfo.endCursor' ) AS ?endCursor) + BIND(fun:JSONPath(?orga_data, '$.people.pageInfo.hasNextPage' ) AS ?hasNextPage) + BIND(fun:JSONPath(?orga_data, '$.people.pageInfo.endCursor' ) AS ?endCursor) BIND(IF(STR(?hasNextPage)="true", ?endCursor, ?undef) AS ?cursor ) FILTER( BOUND(?cursor) ). }. @@ -66,10 +36,9 @@ WHERE { BIND( fun:HTTPPost(, "Content-type: application/json" , ?postbody) AS ?response ) FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) BIND( fun:HTTPExtractBody( ?response ) AS ?body ) - BIND( fun:JSONPath( ?body, "$.data.organization" ) AS ?orga ) - # basic properties of organization in json - BIND(fun:JSONPath(?orga, '$.id' ) AS ?ror) - BIND(REPLACE(?ror, "https://ror.org/" , "" ) AS ?rorid) - BIND(fun:JSONPath(?orga, '$.name' ) AS ?name) + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.organization" ) AS ?orga_data ) } diff --git a/src/main/resources/sparqlg/datacitecommons/orga2person/orga_source.rqg b/src/main/resources/sparqlg/datacitecommons/orga2person/orga_source.rqg new file mode 100644 index 0000000..048abbe --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/orga2person/orga_source.rqg @@ -0,0 +1,26 @@ +PREFIX fun: + +GENERATE (?ror) { + # create organization once + GENERATE ( ?orga_data ) . + + # go into loop + GENERATE ( ?ror, ?orga_id, ?undef ) . +} +SOURCE AS ?graphqlquery +WHERE { + # replace whitespace in graphql query (compress message) + BIND(REPLACE(?graphqlquery, "\\s+", " ") AS ?graphql) + BIND ("""\{"variables":\{"ror":"{?ror}"},"query":"{?graphql}"}""" AS ?postbody) + + # build http post request + BIND( fun:HTTPPost(, "Content-type: application/json" , ?postbody) AS ?response ) + FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) + BIND( fun:HTTPExtractBody( ?response ) AS ?body ) + + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.organization" ) AS ?orga_data ) + BIND((?orga_data) AS ?orga_id) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/orga2person/sparql-generate-conf.json b/src/main/resources/sparqlg/datacitecommons/orga2person/sparql-generate-conf.json index 7d844dc..8f575cd 100644 --- a/src/main/resources/sparqlg/datacitecommons/orga2person/sparql-generate-conf.json +++ b/src/main/resources/sparqlg/datacitecommons/orga2person/sparql-generate-conf.json @@ -1,14 +1,56 @@ { "base": "http://vivo.mydomain.edu/individual/", - "query": "orga2person.rqg", - "documentset": [ + "query": "orga_source.rqg", + "namedqueries": [ { "uri": "https://projects.tib.eu/tapir/datacitecommons/orga2person/orga2person.rqg", "path": "orga2person.rqg" }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/orga_source.rqg", + "path": "../organization/orga_source.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/orga_mapping.rqg", + "path": "../organization/orga_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/orga_id.rqg", + "path": "../organization/orga_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/organization.rqg", + "path": "../../vivo-rdf/organization.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/person_mapping.rqg", + "path": "../person/person_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/person_id.rqg", + "path": "../person/person_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/person.rqg", + "path": "../../vivo-rdf/person.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/position.rqg", + "path": "../../vivo-rdf/position.rqg" + } + ], + "documentset": [ { "uri": "https://projects.tib.eu/tapir/datacitecommons/orga2person/documentset/orga2person.graphql", "path": "documentset/orga2person.graphql" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/documentset/organization.graphql", + "path": "../organization/documentset/organization.graphql" } ] } \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/orga2person2publication/documentset/orga2person.graphql b/src/main/resources/sparqlg/datacitecommons/orga2person2publication/documentset/orga2person.graphql new file mode 100644 index 0000000..3292bba --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/orga2person2publication/documentset/orga2person.graphql @@ -0,0 +1,15 @@ +query orga2person($ror: ID!, $after: String) { + organization(id: $ror) { + people(first:1000, after: $after) { + pageInfo { + endCursor + hasNextPage + } + nodes { + id + givenName + familyName + } + } + } +} diff --git a/src/main/resources/sparqlg/datacitecommons/orga2person2publication/orga2person2publication.rqg b/src/main/resources/sparqlg/datacitecommons/orga2person2publication/orga2person2publication.rqg new file mode 100644 index 0000000..d46f8fb --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/orga2person2publication/orga2person2publication.rqg @@ -0,0 +1,45 @@ +PREFIX iter: +PREFIX fun: + +GENERATE (?ror, ?orga_id, ?after) { + +### map person data +### and for every person: create a connection to organization + GENERATE { + GENERATE ( ?orcid, ?person_id, ?undef ) . + + GENERATE (?person_id, ?orga_id, ?role, ?start_date, ?end_date) . + } + ITERATOR iter:JSONPath(?orga_data, '$.people.nodes[*]') AS ?person_data + WHERE{ + BIND((?person_data) AS ?person_id) + BIND(CONCAT("https://orcid.org/", ?person_id) AS ?orcid) + } . + + +############################# call this query again if hasNextPage =true ############################################# + GENERATE ( ?ror, ?orga_id,?cursor ) + WHERE{ + BIND(fun:JSONPath(?orga_data, '$.people.pageInfo.hasNextPage' ) AS ?hasNextPage) + BIND(fun:JSONPath(?orga_data, '$.people.pageInfo.endCursor' ) AS ?endCursor) + BIND(IF(STR(?hasNextPage)="true", ?endCursor, ?undef) AS ?cursor ) + FILTER( BOUND(?cursor) ). + }. +} +SOURCE AS ?graphqlquery +WHERE { + # replace whitespace in graphql query (compress message) + BIND(REPLACE(?graphqlquery, "\\s+", " ") AS ?graphql) + BIND(IF(BOUND(?after), ?after, "") AS ?after) + BIND ("""\{"variables":\{"ror":"{?ror}", "after":"{?after}"},"query":"{?graphql}"}""" AS ?postbody) + + # build http post request + BIND( fun:HTTPPost(, "Content-type: application/json" , ?postbody) AS ?response ) + FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) + BIND( fun:HTTPExtractBody( ?response ) AS ?body ) + + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.organization" ) AS ?orga_data ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/orga2person2publication/orga_source.rqg b/src/main/resources/sparqlg/datacitecommons/orga2person2publication/orga_source.rqg new file mode 100644 index 0000000..b378fe8 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/orga2person2publication/orga_source.rqg @@ -0,0 +1,26 @@ +PREFIX fun: + +GENERATE (?ror) { + # create organization once + GENERATE ( ?orga_data ) . + + # go into loop + GENERATE ( ?ror, ?orga_id, ?undef ) . +} +SOURCE AS ?graphqlquery +WHERE { + # replace whitespace in graphql query (compress message) + BIND(REPLACE(?graphqlquery, "\\s+", " ") AS ?graphql) + BIND ("""\{"variables":\{"ror":"{?ror}"},"query":"{?graphql}"}""" AS ?postbody) + + # build http post request + BIND( fun:HTTPPost(, "Content-type: application/json" , ?postbody) AS ?response ) + FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) + BIND( fun:HTTPExtractBody( ?response ) AS ?body ) + + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.organization" ) AS ?orga_data ) + BIND((?orga_data) AS ?orga_id) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/orga2person2publication/sparql-generate-conf.json b/src/main/resources/sparqlg/datacitecommons/orga2person2publication/sparql-generate-conf.json new file mode 100644 index 0000000..5e132c3 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/orga2person2publication/sparql-generate-conf.json @@ -0,0 +1,76 @@ +{ + "base": "http://vivo.mydomain.edu/individual/", + "query": "orga_source.rqg", + "namedqueries": [ + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/orga2person2publication/orga2person2publication.rqg", + "path": "orga2person2publication.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/orga_mapping.rqg", + "path": "../organization/orga_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/orga_id.rqg", + "path": "../organization/orga_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/organization.rqg", + "path": "../../vivo-rdf/organization.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/person_mapping.rqg", + "path": "../person/person_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/person_id.rqg", + "path": "../person/person_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person2publication/person2publication.rqg", + "path": "../person2publication/person2publication.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/person.rqg", + "path": "../../vivo-rdf/person.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/position.rqg", + "path": "../../vivo-rdf/position.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/authorship.rqg", + "path": "../../vivo-rdf/authorship.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/research.rqg", + "path": "../../vivo-rdf/research.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/work/work_mapping.rqg", + "path": "../work/work_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/work/work_id.rqg", + "path": "../work/work_id.rqg" + } + ], + "documentset": [ + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/orga2person2publication/documentset/orga2person.graphql", + "path": "documentset/orga2person.graphql" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person2publication/documentset/person2publication.graphql", + "path": "../person2publication/documentset/person2publication.graphql" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/documentset/organization.graphql", + "path": "../organization/documentset/organization.graphql" + } + ] +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/organization/documentset/organization.graphql b/src/main/resources/sparqlg/datacitecommons/organization/documentset/organization.graphql new file mode 100644 index 0000000..2126463 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/organization/documentset/organization.graphql @@ -0,0 +1,10 @@ +query organization($ror: ID!) { + organization(id: $ror) { + id + name + geolocation { + pointLatitude + pointLongitude + } + } +} diff --git a/src/main/resources/sparqlg/datacitecommons/organization/orga_id.rqg b/src/main/resources/sparqlg/datacitecommons/organization/orga_id.rqg new file mode 100644 index 0000000..8e489cc --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/organization/orga_id.rqg @@ -0,0 +1,5 @@ +PREFIX fun: + +FUNCTION (?orga_data) { + REPLACE(fun:JSONPath(?orga_data, '$.id' ), "https://ror.org/" , "" ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/organization/orga_mapping.rqg b/src/main/resources/sparqlg/datacitecommons/organization/orga_mapping.rqg new file mode 100644 index 0000000..6a20c7d --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/organization/orga_mapping.rqg @@ -0,0 +1,18 @@ +PREFIX fun: +PREFIX xsd: + +GENERATE (?orga_data) { + ## fill in blanks in organization definition in vivo ontology + GENERATE ( ?id, ?name, ?established_dtv, ?email, ?website, ?city, ?state, ?country, ?lat, ?lng) . + + ## add ROR as PID + GENERATE (?id, "https://ror.org/", ?id) . +} +WHERE { + ### basic properties of organization + BIND((?orga_data) AS ?id) + BIND(fun:JSONPath(?orga_data, '$.name' ) AS ?name) + + BIND(xsd:decimal(fun:JSONPath(?orga_data, '$.geolocation.pointLatitude' )) AS ?lat) + BIND(xsd:decimal(fun:JSONPath(?orga_data, '$.geolocation.pointLongitude' )) AS ?lng) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/organization/orga_source.rqg b/src/main/resources/sparqlg/datacitecommons/organization/orga_source.rqg new file mode 100644 index 0000000..32e4e98 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/organization/orga_source.rqg @@ -0,0 +1,21 @@ +PREFIX fun: + +GENERATE (?ror) { + GENERATE ( ?orga_data ) . +} +SOURCE AS ?graphqlquery +WHERE { + # replace whitespace in graphql query (compress message) + BIND(REPLACE(?graphqlquery, "\\s+", " ") AS ?graphql) + BIND ("""\{"variables":\{"ror":"{?ror}"},"query":"{?graphql}"}""" AS ?postbody) + + # build http post request + BIND( fun:HTTPPost(, "Content-type: application/json" , ?postbody) AS ?response ) + FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) + BIND( fun:HTTPExtractBody( ?response ) AS ?body ) + + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.organization" ) AS ?orga_data ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/organization/sparql-generate-conf.json b/src/main/resources/sparqlg/datacitecommons/organization/sparql-generate-conf.json new file mode 100644 index 0000000..2b5ce9f --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/organization/sparql-generate-conf.json @@ -0,0 +1,28 @@ +{ + "base": "http://vivo.mydomain.edu/individual/", + "query": "orga_source.rqg", + "namedqueries": [ + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/orga_mapping.rqg", + "path": "orga_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/orga_id.rqg", + "path": "orga_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/organization.rqg", + "path": "../../vivo-rdf/organization.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" + } + ], + "documentset": [ + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/organization/documentset/organization.graphql", + "path": "documentset/organization.graphql" + } + ] +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/person/documentset/person.graphql b/src/main/resources/sparqlg/datacitecommons/person/documentset/person.graphql new file mode 100644 index 0000000..21fb92b --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/person/documentset/person.graphql @@ -0,0 +1,7 @@ +query person($orcid :ID!){ + person(id: $orcid) { + familyName + givenName + id + } +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/person/person_id.rqg b/src/main/resources/sparqlg/datacitecommons/person/person_id.rqg new file mode 100644 index 0000000..e0ff92d --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/person/person_id.rqg @@ -0,0 +1,5 @@ +PREFIX fun: + +FUNCTION (?person_data) { + REPLACE(fun:JSONPath(?person_data, '$.id' ), "https://orcid.org/" , "" ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/person/person_mapping.rqg b/src/main/resources/sparqlg/datacitecommons/person/person_mapping.rqg new file mode 100644 index 0000000..10174d7 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/person/person_mapping.rqg @@ -0,0 +1,18 @@ +PREFIX fun: + +GENERATE (?person_data) { + + ## fill in blanks in person definition in vivo ontology + GENERATE (?id, ?orcid, ?familyName, ?givenName) . + + ## add ORCID as PID + GENERATE (?id, "https://orcid.org/", ?id) . +} +WHERE { + ### basic properties of organization + BIND((?person_data) AS ?id) + BIND(?id AS ?orcid) + + BIND(fun:JSONPath(?person_data, '$.givenName' ) AS ?givenName) + BIND(fun:JSONPath(?person_data, '$.familyName' ) AS ?familyName) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/person/person_source.rqg b/src/main/resources/sparqlg/datacitecommons/person/person_source.rqg new file mode 100644 index 0000000..e0c5cee --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/person/person_source.rqg @@ -0,0 +1,21 @@ +PREFIX fun: + +GENERATE (?orcid) { + GENERATE ( ?person_data ) . +} +SOURCE AS ?graphqlquery +WHERE { + # replace whitespace in graphql query (compress message) + BIND(REPLACE(?graphqlquery, "\\s+", " ") AS ?graphql) + BIND ("""\{"variables":\{"orcid":"{?orcid}"},"query":"{?graphql}"}""" AS ?postbody) + + # build http post request + BIND( fun:HTTPPost(, "Content-type: application/json" , ?postbody) AS ?response ) + FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) + BIND( fun:HTTPExtractBody( ?response ) AS ?body ) + + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.person" ) AS ?person_data ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/person/sparql-generate-conf.json b/src/main/resources/sparqlg/datacitecommons/person/sparql-generate-conf.json new file mode 100644 index 0000000..d78aacf --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/person/sparql-generate-conf.json @@ -0,0 +1,28 @@ +{ + "base": "http://vivo.mydomain.edu/individual/", + "query": "person_source.rqg", + "namedqueries": [ + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/person_mapping.rqg", + "path": "person_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/person_id.rqg", + "path": "person_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/person.rqg", + "path": "../../vivo-rdf/person.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" + } + ], + "documentset": [ + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/documentset/person.graphql", + "path": "documentset/person.graphql" + } + ] +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/person2publication/documentset/person2publication.graphql b/src/main/resources/sparqlg/datacitecommons/person2publication/documentset/person2publication.graphql index a6215b9..ba7fa1b 100644 --- a/src/main/resources/sparqlg/datacitecommons/person2publication/documentset/person2publication.graphql +++ b/src/main/resources/sparqlg/datacitecommons/person2publication/documentset/person2publication.graphql @@ -1,10 +1,6 @@ query person($orcid :ID!, $after:String){ person(id: $orcid) { - familyName - givenName - id - - publications(first:100, after: $after) { + publications(first:1000, after: $after) { pageInfo { endCursor hasNextPage diff --git a/src/main/resources/sparqlg/datacitecommons/person2publication/person2publication.rqg b/src/main/resources/sparqlg/datacitecommons/person2publication/person2publication.rqg index a974f60..c83b1fb 100644 --- a/src/main/resources/sparqlg/datacitecommons/person2publication/person2publication.rqg +++ b/src/main/resources/sparqlg/datacitecommons/person2publication/person2publication.rqg @@ -1,105 +1,23 @@ PREFIX iter: PREFIX fun: -PREFIX rdfs: -PREFIX foaf: -PREFIX obo: -PREFIX vcard: -PREFIX vivo: -PREFIX owl: -PREFIX bibo: -PREFIX xsd: - -GENERATE (?orcid, ?after) { - -## triples for person - <{?orcidid}> a foaf:Person ; - rdfs:label ?completeName ; - vivo:orcidId <{?orcid}> ; - obo:ARG_2000028 <{?orcidid}-vcard> . - - <{?orcid}> a owl:Thing ; - vivo:confirmedOrcidId <{?orcidid}> . - - <{?orcidid}-vcard> a vcard:Individual ; - obo:ARG_2000029 <{?orcidid}> ; - vcard:hasName <{?orcidid}-vcard-name> . - - <{?orcidid}-vcard-name> a vcard:Name ; - vcard:familyName ?familyName ; - vcard:givenName ?givenName . +GENERATE (?orcid, ?person_id, ?after) { +### map work data +### and for every work: create a connection to person GENERATE { - ## connection person -> publication - <{?orcidid}-to-{?doi_clean}> a vivo:Authorship ; - vivo:relates <{?orcidid}> ; - vivo:relates <{?doi_clean}> . - - ## triples for publication - <{?doi_clean}> a ?pubtype ; - rdfs:label ?title ; - bibo:doi ?doi ; - vivo:dateTimeValue <{?doi_clean}-dtv> . + GENERATE ( ?work_data ) . - <{?doi_clean}-dtv> a vivo:DateTimeValue ; - vivo:dateTime ?pubyear_dtv ; - vivo:dateTimePrecision vivo:yearPrecision . + GENERATE (?person_id, ?work_id) . } - ITERATOR iter:JSONPath(?person, '$.publications.nodes[*][?(!@.versions.nodes[0])]') AS ?pub + ITERATOR iter:JSONPath(?person_data, '$.publications.nodes[*][?(!@.versions.nodes[0])]') AS ?work_data WHERE{ - # basic properties of a publication in json - BIND(fun:JSONPath(?pub, '$.doi' ) AS ?doi) - BIND(REPLACE(?doi, "\\W", "", "i") AS ?doi_clean) - BIND(fun:JSONPath(?pub, '$.titles[0].title' ) AS ?title) + BIND((?work_data) AS ?work_id) + } . - BIND(fun:JSONPath(?pub, '$.publicationYear' ) AS ?pubyear) - BIND(xsd:dateTime(CONCAT(STR(?pubyear), "-01-01T00:00:00")) AS ?pubyear_dtv) - BIND(fun:JSONPath(?pub, '$.types.citeproc' ) AS ?citeproc) - # mapping taken from VIVO (claim DOI) - BIND ( - IRI(COALESCE( - IF(?citeproc = "article-journal", bibo:AcademicArticle, 1/0), - IF(?citeproc = "article", bibo:Article, 1/0), - IF(?citeproc = "book", bibo:Book, 1/0), - IF(?citeproc = "chapter", bibo:Chapter, 1/0), - IF(?citeproc = "dataset", vivo:Dataset, 1/0), - IF(?citeproc = "figure", bibo:Image, 1/0), - IF(?citeproc = "graphic", bibo:Image, 1/0), - IF(?citeproc = "legal_case", bibo:LegalCaseDocument, 1/0), - IF(?citeproc = "legislation", bibo:Legislation, 1/0), - IF(?citeproc = "manuscript", bibo:Manuscript, 1/0), - IF(?citeproc = "map", bibo:Map, 1/0), - IF(?citeproc = "musical_score", vivo:Score, 1/0), - IF(?citeproc = "paper-conference", vivo:ConferencePaper, 1/0), - IF(?citeproc = "patent", bibo:Patent, 1/0), - IF(?citeproc = "personal_communication", bibo:PersonalCommunicationDocument, 1/0), - IF(?citeproc = "post-weblog", vivo:BlogPosting, 1/0), - IF(?citeproc = "report", bibo:Report, 1/0), - IF(?citeproc = "review", vivo:Review, 1/0), - IF(?citeproc = "speech", vivo:Speech, 1/0), - IF(?citeproc = "thesis", bibo:Thesis, 1/0), - IF(?citeproc = "webpage", bibo:Webpage, 1/0), - - # mapping leftover types - IF(STRSTARTS(?citeproc, "article"), bibo:Article, 1/0), - IF(?citeproc = "bill", bibo:Bill, 1/0), - IF(STRSTARTS(?citeproc, "entry"), bibo:DocumentPart, 1/0), - IF(?citeproc = "interview", bibo:Interview, 1/0), - IF(?citeproc = "motion_picture", bibo:Film, 1/0), - IF(?citeproc = "post", vivo:BlogPosting, 1/0), - IF(?citeproc = "review-book", vivo:Review, 1/0), - IF(?citeproc = "song", bibo:AudioDocument, 1/0), - IF(?citeproc = "treaty", bibo:LegalDocument, 1/0), - - bibo:Document #default if nothing else fits - )) AS ?pubtype - ) - }. - - GENERATE{ - GENERATE ( ?orcid, ?cursor ) . - } +############################# call this query again if hasNextPage =true ############################################# + GENERATE ( ?orcid, ?person_id, ?cursor ) WHERE{ BIND(fun:JSONPath(?person, '$.publications.pageInfo.hasNextPage' ) AS ?hasNextPage) BIND(fun:JSONPath(?person, '$.publications.pageInfo.endCursor' ) AS ?endCursor) @@ -120,11 +38,8 @@ WHERE { FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) BIND( fun:HTTPExtractBody( ?response ) AS ?body ) - BIND( fun:JSONPath( ?body, "$.data.person" ) AS ?person ) - # basic properties of a person in json - BIND(fun:JSONPath(?person, '$.id' ) AS ?orcid) - BIND(REPLACE(?orcid, "https://orcid.org/" , "" ) AS ?orcidid) - BIND(fun:JSONPath(?person, '$.givenName' ) AS ?givenName) - BIND(fun:JSONPath(?person, '$.familyName' ) AS ?familyName) - BIND(CONCAT(?familyName, ", ", ?givenName) AS ?completeName) -} + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.person" ) AS ?person_data ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/person2publication/person_source.rqg b/src/main/resources/sparqlg/datacitecommons/person2publication/person_source.rqg new file mode 100644 index 0000000..decfcfa --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/person2publication/person_source.rqg @@ -0,0 +1,26 @@ +PREFIX fun: + +GENERATE (?orcid) { + # create person once + GENERATE ( ?person_data ) . + + # go into loop + GENERATE ( ?orcid, ?person_id, ?undef ) . +} +SOURCE AS ?graphqlquery +WHERE { + # replace whitespace in graphql query (compress message) + BIND(REPLACE(?graphqlquery, "\\s+", " ") AS ?graphql) + BIND ("""\{"variables":\{"orcid":"{?orcid}"},"query":"{?graphql}"}""" AS ?postbody) + + # build http post request + BIND( fun:HTTPPost(, "Content-type: application/json" , ?postbody) AS ?response ) + FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) + BIND( fun:HTTPExtractBody( ?response ) AS ?body ) + + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.person" ) AS ?person_data ) + BIND((?person_data) AS ?person_id) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/person2publication/sparql-generate-conf.json b/src/main/resources/sparqlg/datacitecommons/person2publication/sparql-generate-conf.json index eb1e635..6f4ab92 100644 --- a/src/main/resources/sparqlg/datacitecommons/person2publication/sparql-generate-conf.json +++ b/src/main/resources/sparqlg/datacitecommons/person2publication/sparql-generate-conf.json @@ -1,14 +1,52 @@ { "base": "http://vivo.mydomain.edu/individual/", - "query": "person2publication.rqg", - "documentset": [ + "query": "person_source.rqg", + "namedqueries": [ { "uri": "https://projects.tib.eu/tapir/datacitecommons/person2publication/person2publication.rqg", "path": "person2publication.rqg" }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/work/work_mapping.rqg", + "path": "../work/work_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/work/work_id.rqg", + "path": "../work/work_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/research.rqg", + "path": "../../vivo-rdf/research.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/person_mapping.rqg", + "path": "../person/person_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/person_id.rqg", + "path": "../person/person_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/person.rqg", + "path": "../../vivo-rdf/person.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/authorship.rqg", + "path": "../../vivo-rdf/authorship.rqg" + } + ], + "documentset": [ { "uri": "https://projects.tib.eu/tapir/datacitecommons/person2publication/documentset/person2publication.graphql", "path": "documentset/person2publication.graphql" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/person/documentset/person.graphql", + "path": "../person/documentset/person.graphql" } ] } \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/work/documentset/work.graphql b/src/main/resources/sparqlg/datacitecommons/work/documentset/work.graphql new file mode 100644 index 0000000..80ff97e --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/work/documentset/work.graphql @@ -0,0 +1,12 @@ +query work($doi :ID!){ + work(id: $doi) { + doi + titles { + title + } + publicationYear + types { + citeproc + } + } +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/work/sparql-generate-conf.json b/src/main/resources/sparqlg/datacitecommons/work/sparql-generate-conf.json new file mode 100644 index 0000000..05446a2 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/work/sparql-generate-conf.json @@ -0,0 +1,24 @@ +{ + "base": "http://vivo.mydomain.edu/individual/", + "query": "work_source.rqg", + "namedqueries": [ + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/work/work_mapping.rqg", + "path": "work_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/work/work_id.rqg", + "path": "work_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/research.rqg", + "path": "../../vivo-rdf/research.rqg" + } + ], + "documentset": [ + { + "uri": "https://projects.tib.eu/tapir/datacitecommons/work/documentset/work.graphql", + "path": "documentset/work.graphql" + } + ] +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/work/work_id.rqg b/src/main/resources/sparqlg/datacitecommons/work/work_id.rqg new file mode 100644 index 0000000..9641838 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/work/work_id.rqg @@ -0,0 +1,5 @@ +PREFIX fun: + +FUNCTION (?work_data) { + REPLACE(fun:JSONPath(?work_data, '$.doi' ), "\\W", "", "i") +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/work/work_mapping.rqg b/src/main/resources/sparqlg/datacitecommons/work/work_mapping.rqg new file mode 100644 index 0000000..f8cf9f5 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/work/work_mapping.rqg @@ -0,0 +1,62 @@ +PREFIX fun: +PREFIX bibo: +PREFIX vivo: +PREFIX xsd: + +GENERATE (?work_data) { + + ## fill in blanks in work definition in vivo ontology + GENERATE (?id, ?pubtype, ?title, ?doi, ?pubyear_dtv) . +} +WHERE { + ### basic properties of organization + BIND((?work_data) AS ?id) + + # basic properties of a publication in json + BIND(fun:JSONPath(?work_data, '$.doi' ) AS ?doi) + BIND(fun:JSONPath(?work_data, '$.titles[0].title' ) AS ?title) + + BIND(fun:JSONPath(?work_data, '$.publicationYear' ) AS ?pubyear) + BIND(xsd:dateTime(CONCAT(STR(?pubyear), "-01-01T00:00:00")) AS ?pubyear_dtv) + + BIND(fun:JSONPath(?work_data, '$.types.citeproc' ) AS ?citeproc) + # mapping taken from VIVO (claim DOI functionality) + BIND ( + IRI(COALESCE( + IF(?citeproc = "article-journal", bibo:AcademicArticle, 1/0), + IF(?citeproc = "article", bibo:Article, 1/0), + IF(?citeproc = "book", bibo:Book, 1/0), + IF(?citeproc = "chapter", bibo:Chapter, 1/0), + IF(?citeproc = "dataset", vivo:Dataset, 1/0), + IF(?citeproc = "figure", bibo:Image, 1/0), + IF(?citeproc = "graphic", bibo:Image, 1/0), + IF(?citeproc = "legal_case", bibo:LegalCaseDocument, 1/0), + IF(?citeproc = "legislation", bibo:Legislation, 1/0), + IF(?citeproc = "manuscript", bibo:Manuscript, 1/0), + IF(?citeproc = "map", bibo:Map, 1/0), + IF(?citeproc = "musical_score", vivo:Score, 1/0), + IF(?citeproc = "paper-conference", vivo:ConferencePaper, 1/0), + IF(?citeproc = "patent", bibo:Patent, 1/0), + IF(?citeproc = "personal_communication", bibo:PersonalCommunicationDocument, 1/0), + IF(?citeproc = "post-weblog", vivo:BlogPosting, 1/0), + IF(?citeproc = "report", bibo:Report, 1/0), + IF(?citeproc = "review", vivo:Review, 1/0), + IF(?citeproc = "speech", vivo:Speech, 1/0), + IF(?citeproc = "thesis", bibo:Thesis, 1/0), + IF(?citeproc = "webpage", bibo:Webpage, 1/0), + + # mapping leftover types + IF(STRSTARTS(?citeproc, "article"), bibo:Article, 1/0), + IF(?citeproc = "bill", bibo:Bill, 1/0), + IF(STRSTARTS(?citeproc, "entry"), bibo:DocumentPart, 1/0), + IF(?citeproc = "interview", bibo:Interview, 1/0), + IF(?citeproc = "motion_picture", bibo:Film, 1/0), + IF(?citeproc = "post", vivo:BlogPosting, 1/0), + IF(?citeproc = "review-book", vivo:Review, 1/0), + IF(?citeproc = "song", bibo:AudioDocument, 1/0), + IF(?citeproc = "treaty", bibo:LegalDocument, 1/0), + + bibo:Document #default if nothing else fits + )) AS ?pubtype + ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/datacitecommons/work/work_source.rqg b/src/main/resources/sparqlg/datacitecommons/work/work_source.rqg new file mode 100644 index 0000000..0689ea3 --- /dev/null +++ b/src/main/resources/sparqlg/datacitecommons/work/work_source.rqg @@ -0,0 +1,21 @@ +PREFIX fun: + +GENERATE (?doi) { + GENERATE ( ?work_data ) . +} +SOURCE AS ?graphqlquery +WHERE { + # replace whitespace in graphql query (compress message) + BIND(REPLACE(?graphqlquery, "\\s+", " ") AS ?graphql) + BIND ("""\{"variables":\{"doi":"{?doi}"},"query":"{?graphql}"}""" AS ?postbody) + + # build http post request + BIND( fun:HTTPPost(, "Content-type: application/json" , ?postbody) AS ?response ) + FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) + BIND( fun:HTTPExtractBody( ?response ) AS ?body ) + + BIND( fun:JSONPath( ?body, "$.errors" ) AS ?errors ) + FILTER( !BOUND(?errors) ) + + BIND( fun:JSONPath( ?body, "$.data.work" ) AS ?work_data ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/README.md b/src/main/resources/sparqlg/orcid/employees/README.md index 072615a..6a0b69c 100644 --- a/src/main/resources/sparqlg/orcid/employees/README.md +++ b/src/main/resources/sparqlg/orcid/employees/README.md @@ -2,8 +2,7 @@ This query reengineers the DataciteCommons organization -> people connection, described in https://www.pidforum.org/t/employment-field-always-empty-when-using-connection/1571/12 , -skipping the first step ROR->Wikidata and adding a filter at the end to only return ORCIDs -affiliated with the organization via Ringgold or Grid-id in their ORCID profile and with an empty end-date. +adding a filter at the end to only return ORCIDs affiliated with the organization via Ringgold or Grid-id in their ORCID profile and with an empty end-date. This query is very costly and takes quite some time as you need to call the ORCID API x times (x being the number of affiliated employees). \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/currentEmployees.rqg b/src/main/resources/sparqlg/orcid/employees/currentEmployees.rqg deleted file mode 100644 index bd32b73..0000000 --- a/src/main/resources/sparqlg/orcid/employees/currentEmployees.rqg +++ /dev/null @@ -1,29 +0,0 @@ -PREFIX fun: -PREFIX iter: -PREFIX foaf: -PREFIX rdfs: - -GENERATE ( ?grid_id, ?ringgold_id, ?orcid ) { - - GENERATE { - <{?orcid}> a foaf:Person ; - rdfs:label ?name. - } - ITERATOR iter:JSONPath(?body, '$.affiliation-group[*]') AS ?employment - WHERE{ - # filter only employees where orga_id is same as ringgold or grid AND no end-date (=current) - BIND(fun:JSONPath(?employment, '$.summaries[0].employment-summary.organization.disambiguated-organization.disambiguated-organization-identifier' ) AS ?orga_id) - BIND(fun:JSONPath(?employment, '$.summaries[0].employment-summary.end-date' ) AS ?end_date) - FILTER( !BOUND(?end_date) && (?orga_id = ?grid_id || ?orga_id = ?ringgold_id) ). - - BIND(fun:JSONPath(?body, '$.affiliation-group[0].summaries[0].employment-summary.source.source-name.value' ) AS ?name) - }. - -} -WHERE{ - # query employments of orcid for every person - BIND( as ?geturl) - BIND( fun:HTTPGet(?geturl, "Accept: application/json") AS ?response ) - FILTER( fun:HTTPExtractResponseCode( ?response ) = 200 ) - BIND( fun:HTTPExtractBody( ?response ) AS ?body ) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/employees_filter.rqg b/src/main/resources/sparqlg/orcid/employees/employees_filter.rqg new file mode 100644 index 0000000..1e0090b --- /dev/null +++ b/src/main/resources/sparqlg/orcid/employees/employees_filter.rqg @@ -0,0 +1,50 @@ +PREFIX fun: +PREFIX iter: + +## get all people affiliated with the organization +GENERATE (?orga_id, ?ringgold_id, ?grid_id){ + + ## get orcid data for each person + GENERATE{ + + ### for every person check if current employment and if yes, create triples + ## current = employees where orga_id is same as ringgold or grid AND no end-date + GENERATE { + ## create person in vivo ontology, if current employee + GENERATE (?orcid_data) . + + ## connect person to organization via current employment + GENERATE (?person_id, ?orga_id, ?employment) . + } + ITERATOR iter:JSONPath(?orcid_data, '$.activities-summary.employments.affiliation-group[*]') AS ?employment + WHERE{ + BIND((?orcid_data) AS ?person_id) + + BIND(fun:JSONPath(?employment, '$.summaries[0].employment-summary.organization.disambiguated-organization.disambiguated-organization-identifier' ) AS ?disamb_orga) + BIND(fun:JSONPath(?employment, '$.summaries[0].employment-summary.end-date' ) AS ?end_date) + FILTER( !BOUND(?end_date) && (?disamb_orga = ?grid_id || ?disamb_orga = ?ringgold_id) ). + }. + + } + ITERATOR iter:JSONPath(?orcid_employees, '$.expanded-result[*]') AS ?person + WHERE{ + BIND(fun:JSONPath(?person, '$.orcid-id') AS ?orcid) + + # query person data from ORCID as json + BIND( as ?orcid_url) + BIND( fun:HTTPGet(?orcid_url, "Accept: application/json" ) AS ?orcid_response ) + FILTER( fun:HTTPExtractResponseCode( ?orcid_response ) = 200 ) + BIND( fun:HTTPExtractBody( ?orcid_response ) AS ?orcid_data ) + } . +} +WHERE{ + ### call orcid with ringgold and grid-id for affiliated employees + BIND(IF(BOUND(?ringgold_id) && BOUND(?grid_id), "%20OR%20", "") AS ?connector) + BIND(IF(BOUND(?ringgold_id), "ringgold-org-id:{?ringgold_id}", "") AS ?ringgold_search) + BIND(IF(BOUND(?grid_id), "grid-org-id:{?grid_id}", "") AS ?grid_search) + + BIND("https://pub.orcid.org/v3.0/expanded-search/?q={?ringgold_search}{?connector}{?grid_search}" as ?orcid_employees_url) + BIND(fun:HTTPGet(IRI(?orcid_employees_url), "Accept: application/json") AS ?orcid_employees_response) + FILTER(fun:HTTPExtractResponseCode( ?orcid_employees_response ) = 200) + BIND(fun:HTTPExtractBody( ?orcid_employees_response ) AS ?orcid_employees) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/employees_source.rqg b/src/main/resources/sparqlg/orcid/employees/employees_source.rqg new file mode 100644 index 0000000..a329cfa --- /dev/null +++ b/src/main/resources/sparqlg/orcid/employees/employees_source.rqg @@ -0,0 +1,29 @@ +PREFIX fun: +PREFIX iter: + +GENERATE (?ror) { + + ### if ROR is included, you could use GENERATE ( ?ror_data ) . + ### but to keep data sources independent: + GENERATE ( ?ror_data, ?wikidata_id, ?ringgold_id, ?grid_id ) . + + GENERATE (?orga_id, ?ringgold_id, ?grid_id ) . + +} +SOURCE AS ?ror_data +WHERE{ + ### extract wikidata id from ROR data + BIND(fun:JSONPath(?ror_data, '$.external_ids.Wikidata.all[0]' ) AS ?wikidata_id) + + ### call wikidata to get ringgold and grid-id for organization + BIND( as ?wikidata_url) + BIND(fun:HTTPGet(?wikidata_url, "Accept: application/json") AS ?wikidata_response) + FILTER(fun:HTTPExtractResponseCode( ?wikidata_response ) = 200) + BIND(fun:HTTPExtractBody( ?wikidata_response ) AS ?wikidata_body) + + BIND(fun:JSONPath(?wikidata_body, '$.entities.{?wikidata_id}.claims.P3500[0].mainsnak.datavalue.value' ) AS ?ringgold_id) + BIND(fun:JSONPath(?wikidata_body, '$.entities.{?wikidata_id}.claims.P2427[0].mainsnak.datavalue.value' ) AS ?grid_id) + + # after creating organization, get its id to make the orga-person connection + BIND((?ror_data) AS ?orga_id) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/employment_mapping.rqg b/src/main/resources/sparqlg/orcid/employees/employment_mapping.rqg new file mode 100644 index 0000000..9b6a369 --- /dev/null +++ b/src/main/resources/sparqlg/orcid/employees/employment_mapping.rqg @@ -0,0 +1,23 @@ +PREFIX fun: +PREFIX xsd: + +## extract properties of employment data for mapping +GENERATE (?person_id, ?orga_id, ?employment) { + + GENERATE (?person_id, ?orga_id, ?role, ?start_date, ?end_date) . +} +WHERE{ + BIND(fun:JSONPath(?employment, '$.summaries[0].employment-summary.role-title' ) AS ?role) + + BIND(fun:JSONPath(?employment, '$.summaries[0].employment-summary.start-date' ) AS ?start) + BIND(fun:JSONPath(?start, '$.year.value') as ?start_year) + BIND(fun:JSONPath(?start, '$.month.value') as ?start_month) + BIND(fun:JSONPath(?start, '$.day.value') as ?start_day) + BIND(xsd:dateTime(CONCAT(?start_year, "-", ?start_month, "-", ?start_day, "T00:00:00")) as ?start_date) + + BIND(fun:JSONPath(?employment, '$.summaries[0].employment-summary.end-date' ) AS ?end) + BIND(fun:JSONPath(?end, '$.year.value') as ?end_year) + BIND(fun:JSONPath(?end, '$.month.value') as ?end_month) + BIND(fun:JSONPath(?end, '$.day.value') as ?end_day) + BIND(xsd:dateTime(CONCAT(?end_year, "-", ?end_month, "-", ?end_day, "T00:00:00")) as ?end_date) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/orga_source.rqg b/src/main/resources/sparqlg/orcid/employees/orga_source.rqg deleted file mode 100644 index 16a87a2..0000000 --- a/src/main/resources/sparqlg/orcid/employees/orga_source.rqg +++ /dev/null @@ -1,24 +0,0 @@ -PREFIX fun: -PREFIX iter: - -GENERATE (?wikidata) { - - GENERATE ( ?grid_id, ?ringgold_id, ?orcid ) - ITERATOR iter:JSONPath(?body, '$.expanded-result[*]') AS ?person - WHERE{ - BIND(fun:JSONPath(?person, '$.orcid-id') AS ?orcid) - }. - -} -SOURCE AS ?orga -WHERE{ - # get organization-ids (ringgold & grid) from wikidata - BIND(fun:JSONPath(?orga, '$.entities.{?wikidata}.claims.P3500[0].mainsnak.datavalue.value' ) AS ?ringgold_id) - BIND(fun:JSONPath(?orga, '$.entities.{?wikidata}.claims.P2427[0].mainsnak.datavalue.value' ) AS ?grid_id) - - # call orcid with ringgold and grid-id for affiliated employees - BIND( as ?orcid_url) - BIND(fun:HTTPGet(?orcid_url, "Accept: application/json") AS ?response) - FILTER(fun:HTTPExtractResponseCode( ?response ) = 200) - BIND(fun:HTTPExtractBody( ?response ) AS ?body) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/organization/orga_id.rqg b/src/main/resources/sparqlg/orcid/employees/organization/orga_id.rqg new file mode 100644 index 0000000..09a1ca9 --- /dev/null +++ b/src/main/resources/sparqlg/orcid/employees/organization/orga_id.rqg @@ -0,0 +1,5 @@ +PREFIX fun: + +FUNCTION (?ror_data) { + REPLACE(fun:JSONPath(?ror_data, '$.id' ), "https://ror.org/" , "" ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/organization/orga_placeholder.rqg b/src/main/resources/sparqlg/orcid/employees/organization/orga_placeholder.rqg new file mode 100644 index 0000000..b6b280c --- /dev/null +++ b/src/main/resources/sparqlg/orcid/employees/organization/orga_placeholder.rqg @@ -0,0 +1,25 @@ +PREFIX fun: +PREFIX xsd: + +GENERATE ( ?ror_data, ?wikidata_id, ?ringgold_id, ?grid_id ) { + + ## fill in blanks in person definition in vivo ontology + GENERATE (?id, ?name, ?established_dtv, ?email, ?website, ?city, ?state, ?country, ?lat, ?lng) . + + ## add ROR as PID + GENERATE (?id, "https://ror.org/", ?id) . + + ## add Wikidata as PID + GENERATE (?id, "https://www.wikidata.org/wiki/", ?wikidata_id) . + + ## add Grid as PID + GENERATE (?id, "https://www.grid.ac/institutes/", ?grid_id) . + + ## add Ringgold as PID + GENERATE (?id, "https://www.ringgold.com/", ?ringgold_id) . +} +WHERE { + ### basic properties of organization + BIND((?ror_data) AS ?id) + BIND(fun:JSONPath(?ror_data, '$.name' ) AS ?name) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/employees/sparql-generate-conf.json b/src/main/resources/sparqlg/orcid/employees/sparql-generate-conf.json index 5163c1a..4c07a9d 100644 --- a/src/main/resources/sparqlg/orcid/employees/sparql-generate-conf.json +++ b/src/main/resources/sparqlg/orcid/employees/sparql-generate-conf.json @@ -1,10 +1,58 @@ { "base": "http://vivo.mydomain.edu/individual/", - "query": "orga_source.rqg", + "query": "employees_source.rqg", "namedqueries": [ { - "uri": "https://projects.tib.eu/tapir/orcid/employees/currentEmployees.rqg", - "path": "currentEmployees.rqg" + "uri": "https://projects.tib.eu/tapir/orcid/employees/organization/orga_placeholder.rqg", + "path": "organization/orga_placeholder.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/orcid/employees/organization/orga_id.rqg", + "path": "organization/orga_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/organization.rqg", + "path": "../../vivo-rdf/organization.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/orcid/employees/employees_filter.rqg", + "path": "employees_filter.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/orcid/employees/employment_mapping.rqg", + "path": "employment_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/orcid/person/person_mapping.rqg", + "path": "../person/person_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/orcid/person/person_id.rqg", + "path": "../person/person_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/person.rqg", + "path": "../../vivo-rdf/person.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/position.rqg", + "path": "../../vivo-rdf/position.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/orcid/person/work/work_mapping.rqg", + "path": "../person/work/work_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/research.rqg", + "path": "../../vivo-rdf/research.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/authorship.rqg", + "path": "../../vivo-rdf/authorship.rqg" } ] } \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/person/person_id.rqg b/src/main/resources/sparqlg/orcid/person/person_id.rqg new file mode 100644 index 0000000..90b35f1 --- /dev/null +++ b/src/main/resources/sparqlg/orcid/person/person_id.rqg @@ -0,0 +1,5 @@ +PREFIX fun: + +FUNCTION (?orcid_data) { + fun:JSONPath(?orcid_data, '$.orcid-identifier.path' ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/person/person_mapping.rqg b/src/main/resources/sparqlg/orcid/person/person_mapping.rqg new file mode 100644 index 0000000..d690806 --- /dev/null +++ b/src/main/resources/sparqlg/orcid/person/person_mapping.rqg @@ -0,0 +1,24 @@ +PREFIX fun: +PREFIX iter: +PREFIX xsd: + +GENERATE (?orcid_data) { + + ## fill in blanks in person definition in vivo ontology + GENERATE (?person_id, ?orcid, ?familyName, ?givenName) . + + ## add ORCID as data source + GENERATE (?person_id, "https://orcid.org/", ?orcid) . + + ## map works + GENERATE (?person_id, ?work_data) + ITERATOR iter:JSONPath(?orcid_data, '$.activities-summary.works.group[*]') AS ?work_data . +} +WHERE { + ### extract properties of person + BIND((?orcid_data) AS ?person_id) + BIND(?person_id AS ?orcid) + + BIND(fun:JSONPath(?orcid_data, '$.person.name.given-names.value' ) AS ?givenName) + BIND(fun:JSONPath(?orcid_data, '$.person.name.family-name.value' ) AS ?familyName) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/person/person_source.rqg b/src/main/resources/sparqlg/orcid/person/person_source.rqg new file mode 100644 index 0000000..8b631a9 --- /dev/null +++ b/src/main/resources/sparqlg/orcid/person/person_source.rqg @@ -0,0 +1,15 @@ +PREFIX fun: + +### query data about person from ORCID +GENERATE (?orcid) { + + GENERATE ( ?orcid_data ) . + +} +WHERE { + # query person data from ORCID + BIND( as ?orcid_url) + BIND( fun:HTTPGet(?orcid_url, "Accept: application/json") AS ?orcid_response ) + FILTER( fun:HTTPExtractResponseCode( ?orcid_response ) = 200 ) + BIND( fun:HTTPExtractBody( ?orcid_response ) AS ?orcid_data ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/person/sparql-generate-conf.json b/src/main/resources/sparqlg/orcid/person/sparql-generate-conf.json new file mode 100644 index 0000000..e100be8 --- /dev/null +++ b/src/main/resources/sparqlg/orcid/person/sparql-generate-conf.json @@ -0,0 +1,34 @@ +{ + "base": "http://vivo.mydomain.edu/individual/", + "query": "person_source.rqg", + "namedqueries": [ + { + "uri": "https://projects.tib.eu/tapir/orcid/person/person_mapping.rqg", + "path": "person_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/orcid/person/person_id.rqg", + "path": "person_id.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/person.rqg", + "path": "../../vivo-rdf/person.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/orcid/person/work/work_mapping.rqg", + "path": "work/work_mapping.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/research.rqg", + "path": "../../vivo-rdf/research.rqg" + }, + { + "uri": "https://projects.tib.eu/tapir/vivo-rdf/authorship.rqg", + "path": "../../vivo-rdf/authorship.rqg" + } + ] +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/orcid/person/work/work_mapping.rqg b/src/main/resources/sparqlg/orcid/person/work/work_mapping.rqg new file mode 100644 index 0000000..d42307a --- /dev/null +++ b/src/main/resources/sparqlg/orcid/person/work/work_mapping.rqg @@ -0,0 +1,20 @@ +PREFIX fun: +PREFIX bibo: + +GENERATE (?person_id, ?work_data) { + + GENERATE (?work_id, ?pubtype, ?title, ?doi, ?pubyear_dtv) . + + GENERATE (?person_id, ?work_id) . + +} +WHERE { + BIND(fun:JSONPath(?work_data, '$.work-summary[0]' ) AS ?work_summary) + BIND(fun:JSONPath(?work_summary, '$.title.title.value' ) AS ?title) + BIND(fun:JSONPath(?work_summary, '$.publication-date.year.value' ) AS ?pubyear) + BIND(fun:JSONPath(?work_summary, '$.external-ids.external-id[?(@.external-id-type==\'doi\')]' ) AS ?all_dois) + BIND(fun:JSONPath(?all_dois, '$.[0].external-id-value' ) AS ?doi) + BIND(bibo:Article AS ?pubtype) + + BIND(REPLACE(?doi, "\\W", "", "i") AS ?work_id) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/orga2children/orga2children.rqg b/src/main/resources/sparqlg/ror/orga2children/orga2children.rqg index 8cc4bf7..301b270 100644 --- a/src/main/resources/sparqlg/ror/orga2children/orga2children.rqg +++ b/src/main/resources/sparqlg/ror/orga2children/orga2children.rqg @@ -1,19 +1,17 @@ PREFIX iter: PREFIX fun: -PREFIX rdfs: -PREFIX foaf: -PREFIX obo: -GENERATE (?ror) { +GENERATE (?parent_id, ?ror) { - GENERATE ( ?orga ) . + ### use the organization mapping already defined in orga_mapping.rqg + GENERATE ( ?orga_data ) . - GENERATE { - <{?id}> obo:BFO_0000051 <{?child_id}> . + ### connection organization & sub-organization + GENERATE (?parent_id, ?id ) . - GENERATE (?child_ror ) . - } - ITERATOR iter:JSONPath(?orga, '$.relationships[*]') AS ?relationships + ### call this query recursively for all children + GENERATE (?id, ?child_ror ) + ITERATOR iter:JSONPath(?orga_data, '$.relationships[*]') AS ?relationships WHERE { BIND(fun:JSONPath(?relationships, '$.label' ) AS ?rel_label) BIND(fun:JSONPath(?relationships, '$.type' ) AS ?rel_type) @@ -26,7 +24,7 @@ GENERATE (?ror) { } . } -SOURCE AS ?orga -WHERE { - BIND(REPLACE(fun:JSONPath(?orga, '$.id' ), "https://ror.org/" , "" ) AS ?id) +SOURCE AS ?orga_data +WHERE{ + BIND((?orga_data) AS ?id) } diff --git a/src/main/resources/sparqlg/ror/orga2children/sparql-generate-conf.json b/src/main/resources/sparqlg/ror/orga2children/sparql-generate-conf.json index 9bc8d5d..8bfb361 100644 --- a/src/main/resources/sparqlg/ror/orga2children/sparql-generate-conf.json +++ b/src/main/resources/sparqlg/ror/orga2children/sparql-generate-conf.json @@ -7,36 +7,24 @@ "path": "orga2children.rqg" }, { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/organization.rqg", - "path": "../organization/queryset/organization.rqg" + "uri": "https://projects.tib.eu/tapir/ror/organization/orga_mapping.rqg", + "path": "../organization/orga_mapping.rqg" }, { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_email.rqg", - "path": "../organization/queryset/orga_email.rqg" + "uri": "https://projects.tib.eu/tapir/ror/organization/orga_id.rqg", + "path": "../organization/orga_id.rqg" }, { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_website.rqg", - "path": "../organization/queryset/orga_website.rqg" + "uri": "https://projects.tib.eu/tapir/vivo-rdf/organization.rqg", + "path": "../../vivo-rdf/organization.rqg" }, { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_geo.rqg", - "path": "../organization/queryset/orga_geo.rqg" + "uri": "https://projects.tib.eu/tapir/vivo-rdf/orga2suborga.rqg", + "path": "../../vivo-rdf/orga2suborga.rqg" }, { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_labels.rqg", - "path": "../organization/queryset/orga_labels.rqg" - }, - { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_established.rqg", - "path": "../organization/queryset/orga_established.rqg" - }, - { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_acronym.rqg", - "path": "../organization/queryset/orga_acronym.rqg" - }, - { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_address.rqg", - "path": "../organization/queryset/orga_address.rqg" + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" } ] } \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/orga_id.rqg b/src/main/resources/sparqlg/ror/organization/orga_id.rqg new file mode 100644 index 0000000..09a1ca9 --- /dev/null +++ b/src/main/resources/sparqlg/ror/organization/orga_id.rqg @@ -0,0 +1,5 @@ +PREFIX fun: + +FUNCTION (?ror_data) { + REPLACE(fun:JSONPath(?ror_data, '$.id' ), "https://ror.org/" , "" ) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/orga_mapping.rqg b/src/main/resources/sparqlg/ror/organization/orga_mapping.rqg new file mode 100644 index 0000000..2eab358 --- /dev/null +++ b/src/main/resources/sparqlg/ror/organization/orga_mapping.rqg @@ -0,0 +1,36 @@ +PREFIX fun: +PREFIX xsd: + +GENERATE (?ror_data) { + + ## fill in blanks in organization definition in vivo ontology + GENERATE ( ?id, ?name, ?established_dtv, ?email, ?website, ?city, ?state, ?country, ?lat, ?lng) . + + ## add ROR as PID + GENERATE (?id, "https://ror.org/", ?id) . + +} +WHERE { + ### basic properties of organization + BIND((?ror_data) AS ?id) + BIND(fun:JSONPath(?ror_data, '$.name' ) AS ?name) + + ### established + BIND(fun:JSONPath(?ror_data, '$.established' ) AS ?established) + BIND(xsd:dateTime(CONCAT(STR(?established), "-01-01T00:00:00")) AS ?established_dtv) + + ### email + BIND(fun:JSONPath(?ror_data, '$.email_address' ) AS ?email) + + ### website + BIND(fun:JSONPath(?ror_data, '$.links[0]' ) AS ?website) + + ### address (city, state, country, lat, lng) + BIND(fun:JSONPath(?ror_data, '$.country.country_name' ) AS ?country) + + BIND(fun:JSONPath(?ror_data, '$.addresses[0]' ) AS ?address) + BIND(fun:JSONPath(?address, '$.city' ) AS ?city) + BIND(fun:JSONPath(?address, '$.geonames_city.geonames_admin1.name' ) AS ?state) + BIND(xsd:decimal(fun:JSONPath(?address, '$.lat' )) AS ?lat) + BIND(xsd:decimal(fun:JSONPath(?address, '$.lng' )) AS ?lng) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/orga_source.rqg b/src/main/resources/sparqlg/ror/organization/orga_source.rqg index 04baa6f..cb82721 100644 --- a/src/main/resources/sparqlg/ror/organization/orga_source.rqg +++ b/src/main/resources/sparqlg/ror/organization/orga_source.rqg @@ -1,4 +1,6 @@ +### query data about organization from ROR + GENERATE (?ror) { - GENERATE ( ?orga ) . + GENERATE ( ?orga ) . } -SOURCE AS ?orga +SOURCE AS ?orga \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/queryset/orga_acronym.rqg b/src/main/resources/sparqlg/ror/organization/queryset/orga_acronym.rqg deleted file mode 100644 index c5b9f0e..0000000 --- a/src/main/resources/sparqlg/ror/organization/queryset/orga_acronym.rqg +++ /dev/null @@ -1,8 +0,0 @@ -PREFIX iter: -PREFIX vivo: - -### acronyms: SUB-QUERY TO ADD ABBREVIATION -GENERATE (?id, ?orga) { - <{?id}> vivo:abbreviation ?acronym . -} -ITERATOR iter:JSONPath(?orga, '$.acronyms[*]') AS ?acronym diff --git a/src/main/resources/sparqlg/ror/organization/queryset/orga_address.rqg b/src/main/resources/sparqlg/ror/organization/queryset/orga_address.rqg deleted file mode 100644 index e18019e..0000000 --- a/src/main/resources/sparqlg/ror/organization/queryset/orga_address.rqg +++ /dev/null @@ -1,31 +0,0 @@ -PREFIX fun: -PREFIX obo: -PREFIX vcard: - -### address (city, state, country) : SUB-QUERY TO ADD ADDRESS DATA -GENERATE (?id, ?orga) { - <{?id}> obo:ARG_2000028 <{?id}-vcard> . - - <{?id}-vcard> a vcard:Individual ; - obo:ARG_2000029 <{?id}> . - - <{?id}-vcard> vcard:hasAddress <{?id}-vcard-address> . - - <{?id}-vcard-address> a vcard:Address ; - vcard:locality ?city_with_tag ; - vcard:region ?state_with_tag ; - vcard:country ?country_with_tag . -} -WHERE { - BIND(fun:JSONPath(?orga, '$.country.country_name' ) AS ?country) - BIND(STRLANG(?country, "en") AS ?country_with_tag) - - BIND(fun:JSONPath(?orga, '$.addresses[0]' ) AS ?address) - BIND(fun:JSONPath(?address, '$.city' ) AS ?city) - BIND(STRLANG(?city, "en") AS ?city_with_tag) - - BIND(fun:JSONPath(?address, '$.geonames_city.geonames_admin1.name' ) AS ?state) - BIND(STRLANG(?state, "en") AS ?state_with_tag) - - FILTER( BOUND(?city) || BOUND(?city) || BOUND(?state) ) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/queryset/orga_email.rqg b/src/main/resources/sparqlg/ror/organization/queryset/orga_email.rqg deleted file mode 100644 index 49601b6..0000000 --- a/src/main/resources/sparqlg/ror/organization/queryset/orga_email.rqg +++ /dev/null @@ -1,20 +0,0 @@ -PREFIX fun: -PREFIX obo: -PREFIX vcard: - -### email_address : SUB-QUERY TO ADD EMAIL -GENERATE (?id, ?orga) { - <{?id}> obo:ARG_2000028 <{?id}-vcard> . - - <{?id}-vcard> a vcard:Individual ; - obo:ARG_2000029 <{?id}> . - - <{?id}-vcard> vcard:hasEmail <{?id}-vcard-email> . - - <{?id}-vcard-email> a vcard:Email ; - vcard:email ?email . -} - WHERE { - BIND(fun:JSONPath(?orga, '$.email_address' ) AS ?email) - FILTER( BOUND(?email) ) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/queryset/orga_established.rqg b/src/main/resources/sparqlg/ror/organization/queryset/orga_established.rqg deleted file mode 100644 index f380b94..0000000 --- a/src/main/resources/sparqlg/ror/organization/queryset/orga_established.rqg +++ /dev/null @@ -1,20 +0,0 @@ -PREFIX fun: -PREFIX vivo: -PREFIX xsd: - -### established : SUB-QUERY TO ADD THE DATE THE ORGANIZATION WAS ESTABLISHED -GENERATE (?id, ?orga) { - <{?id}> vivo:dateTimeInterval <{?id}-dti> . - - <{?id}-dti> a vivo:DateTimeInterval ; - vivo:start <{?id}-dtv> . - - <{?id}-dtv> a vivo:DateTimeValue ; - vivo:dateTime ?established_dtv ; - vivo:dateTimePrecision vivo:yearPrecision . -} -WHERE { - BIND(fun:JSONPath(?orga, '$.established' ) AS ?established) - BIND(xsd:dateTime(CONCAT(STR(?established), "-01-01T00:00:00")) AS ?established_dtv) - FILTER( BOUND(?established) ) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/queryset/orga_geo.rqg b/src/main/resources/sparqlg/ror/organization/queryset/orga_geo.rqg deleted file mode 100644 index b110253..0000000 --- a/src/main/resources/sparqlg/ror/organization/queryset/orga_geo.rqg +++ /dev/null @@ -1,24 +0,0 @@ -PREFIX fun: -PREFIX obo: -PREFIX vcard: -PREFIX xsd: - -### address (lat,lng) : SUB-QUERY TO ADD LATITUDE & LONGITUDE -GENERATE (?id, ?orga) { - <{?id}> obo:ARG_2000028 <{?id}-vcard> . - - <{?id}-vcard> a vcard:Individual ; - obo:ARG_2000029 <{?id}> . - - <{?id}-vcard> vcard:hasGeo <{?id}-vcard-geo> . - - <{?id}-vcard-geo> a vcard:Geo ; - vcard:geo "geo:{?lat},{?lng}" . -} -WHERE { - BIND(fun:JSONPath(?orga, '$.addresses[0]' ) AS ?address) - BIND(xsd:decimal(fun:JSONPath(?address, '$.lat' )) AS ?lat) - BIND(xsd:decimal(fun:JSONPath(?address, '$.lng' )) AS ?lng) - - FILTER( BOUND(?lat) && BOUND(?lng) ) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/queryset/orga_labels.rqg b/src/main/resources/sparqlg/ror/organization/queryset/orga_labels.rqg deleted file mode 100644 index 7203740..0000000 --- a/src/main/resources/sparqlg/ror/organization/queryset/orga_labels.rqg +++ /dev/null @@ -1,15 +0,0 @@ -PREFIX fun: -PREFIX iter: -PREFIX rdfs: - -### labels: SUB-QUERY TO ADD NAMES IN DIFFERENT LANGUAGES -GENERATE (?id, ?orga) { - <{?id}> rdfs:label ?label_with_tag . -} -ITERATOR iter:JSONPath(?orga, '$.labels[*]') AS ?labels -WHERE { - # alternative labels in different languages - BIND(fun:JSONPath(?labels, '$.label' ) AS ?label) - BIND(fun:JSONPath(?labels, '$.iso639' ) AS ?iso639) - BIND(STRLANG(?label, ?iso639) AS ?label_with_tag) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/queryset/orga_website.rqg b/src/main/resources/sparqlg/ror/organization/queryset/orga_website.rqg deleted file mode 100644 index d398602..0000000 --- a/src/main/resources/sparqlg/ror/organization/queryset/orga_website.rqg +++ /dev/null @@ -1,23 +0,0 @@ -PREFIX fun: -PREFIX obo: -PREFIX vcard: -PREFIX vivo: -PREFIX xsd: - -### links : SUB-QUERY TO ADD WEBSITE -GENERATE (?id, ?orga) { - <{?id}> obo:ARG_2000028 <{?id}-vcard> . - - <{?id}-vcard> a vcard:Individual ; - obo:ARG_2000029 <{?id}> . - - <{?id}-vcard> vcard:hasURL <{?id}-vcard-url> . - - <{?id}-vcard-url> a vcard:URL ; - vivo:rank 1 ; - vcard:url "{?url}"^^xsd:anyURI . -} -WHERE { - BIND(fun:JSONPath(?orga, '$.links[0]' ) AS ?url) - FILTER( BOUND(?url) ) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/queryset/organization.rqg b/src/main/resources/sparqlg/ror/organization/queryset/organization.rqg deleted file mode 100644 index 1f5c02d..0000000 --- a/src/main/resources/sparqlg/ror/organization/queryset/organization.rqg +++ /dev/null @@ -1,43 +0,0 @@ -PREFIX iter: -PREFIX fun: -PREFIX rdfs: -PREFIX foaf: -PREFIX vivo: -PREFIX xsd: -PREFIX obo: -PREFIX vcard: - - -GENERATE (?orga) { - - <{?id}> a foaf:Organization ; - rdfs:label ?name_with_tag . - -### labels: SUB-QUERY TO ADD NAMES IN DIFFERENT LANGUAGES - GENERATE ( ?id, ?orga ) . - -### acronyms: SUB-QUERY TO ADD ABBREVIATION - GENERATE ( ?id, ?orga ) . - -### established : SUB-QUERY TO ADD THE DATE THE ORGANIZATION WAS ESTABLISHED - GENERATE ( ?id, ?orga ) . - -### email_address : SUB-QUERY TO ADD EMAIL - GENERATE ( ?id, ?orga ) . - -### links : SUB-QUERY TO ADD WEBSITE - GENERATE ( ?id, ?orga ) . - -### address (city, state, country) : SUB-QUERY TO ADD ADDRESS DATA - GENERATE ( ?id, ?orga ) . - -### geo (lat,lng) : SUB-QUERY TO ADD LATITUDE & LONGITUDE - GENERATE ( ?id, ?orga ) . - -} -WHERE { - # basic properties of organization - BIND(REPLACE(fun:JSONPath(?orga, '$.id' ), "https://ror.org/" , "" ) AS ?id) - BIND(fun:JSONPath(?orga, '$.name' ) AS ?name) - BIND(STRLANG(?name, "en") AS ?name_with_tag) -} \ No newline at end of file diff --git a/src/main/resources/sparqlg/ror/organization/sparql-generate-conf.json b/src/main/resources/sparqlg/ror/organization/sparql-generate-conf.json index 1c3af82..cb45b7f 100644 --- a/src/main/resources/sparqlg/ror/organization/sparql-generate-conf.json +++ b/src/main/resources/sparqlg/ror/organization/sparql-generate-conf.json @@ -3,36 +3,20 @@ "query": "orga_source.rqg", "namedqueries": [ { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/organization.rqg", - "path": "queryset/organization.rqg" + "uri": "https://projects.tib.eu/tapir/ror/organization/orga_mapping.rqg", + "path": "orga_mapping.rqg" }, { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_email.rqg", - "path": "queryset/orga_email.rqg" + "uri": "https://projects.tib.eu/tapir/ror/organization/orga_id.rqg", + "path": "orga_id.rqg" }, { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_website.rqg", - "path": "queryset/orga_website.rqg" + "uri": "https://projects.tib.eu/tapir/vivo-rdf/organization.rqg", + "path": "../../vivo-rdf/organization.rqg" }, { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_geo.rqg", - "path": "queryset/orga_geo.rqg" - }, - { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_labels.rqg", - "path": "queryset/orga_labels.rqg" - }, - { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_established.rqg", - "path": "queryset/orga_established.rqg" - }, - { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_acronym.rqg", - "path": "queryset/orga_acronym.rqg" - }, - { - "uri": "https://projects.tib.eu/tapir/ror/organization/queryset/orga_address.rqg", - "path": "queryset/orga_address.rqg" + "uri": "https://projects.tib.eu/tapir/vivo-rdf/agent_pid.rqg", + "path": "../../vivo-rdf/agent_pid.rqg" } ] } \ No newline at end of file diff --git a/src/main/resources/sparqlg/vivo-rdf/agent_pid.rqg b/src/main/resources/sparqlg/vivo-rdf/agent_pid.rqg new file mode 100644 index 0000000..8c43219 --- /dev/null +++ b/src/main/resources/sparqlg/vivo-rdf/agent_pid.rqg @@ -0,0 +1,15 @@ +PREFIX foaf: + +GENERATE (?agent_id, ?service_url, ?account_name){ + + <{?agent_id}> foaf:holdsAccount <{?service_domain_clean}-{?account_name_clean}> . + + <{?service_domain_clean}-{?account_name_clean}> a foaf:OnlineAccount ; + foaf:accountServiceHomePage ?service_url; + foaf:accountName ?account_name . + +} WHERE { + BIND(REPLACE(?service_url, "^(?:.*?://)?(?:.*?@)?([^:]+?)(:\\d+)?((/.*)|$)", "$1") AS ?service_domain) + BIND(REPLACE(?service_domain, "\\W", "", "i") AS ?service_domain_clean) + BIND(REPLACE(?account_name, "\\W", "", "i") AS ?account_name_clean) +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/vivo-rdf/authorship.rqg b/src/main/resources/sparqlg/vivo-rdf/authorship.rqg new file mode 100644 index 0000000..751f072 --- /dev/null +++ b/src/main/resources/sparqlg/vivo-rdf/authorship.rqg @@ -0,0 +1,9 @@ +PREFIX vivo: + +GENERATE (?person_id, ?work_id){ + + <{?person_id}-to-{?work_id}> a vivo:Authorship ; + vivo:relates <{?person_id}> ; + vivo:relates <{?work_id}> . + +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/vivo-rdf/orga2suborga.rqg b/src/main/resources/sparqlg/vivo-rdf/orga2suborga.rqg new file mode 100644 index 0000000..b3e227e --- /dev/null +++ b/src/main/resources/sparqlg/vivo-rdf/orga2suborga.rqg @@ -0,0 +1,5 @@ +PREFIX obo: + +GENERATE (?parent_id, ?id){ + <{?parent_id}> obo:BFO_0000051 <{?id}> . +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/vivo-rdf/organization.rqg b/src/main/resources/sparqlg/vivo-rdf/organization.rqg new file mode 100644 index 0000000..732d6dd --- /dev/null +++ b/src/main/resources/sparqlg/vivo-rdf/organization.rqg @@ -0,0 +1,97 @@ +PREFIX rdfs: +PREFIX foaf: +PREFIX vivo: +PREFIX xsd: +PREFIX obo: +PREFIX vcard: + + +GENERATE (?id, ?name, ?established_dtv, ?email, ?website, ?city, ?state, ?country, ?lat, ?lng) { + +### id, name : basic attributes of an organization + <{?id}> a foaf:Organization ; + rdfs:label ?name . + + +### established : TRIPLES TO ADD THE DATE THE ORGANIZATION WAS ESTABLISHED + GENERATE{ + <{?id}> vivo:dateTimeInterval <{?id}-dti> . + + <{?id}-dti> a vivo:DateTimeInterval ; + vivo:start <{?id}-dtv> . + + <{?id}-dtv> a vivo:DateTimeValue ; + vivo:dateTime ?established_dtv ; + vivo:dateTimePrecision vivo:yearPrecision . + } + WHERE { + FILTER( BOUND(?established_dtv) ). + } . + + +### vcard : TRIPLES TO ADD VCARD + GENERATE{ + <{?id}> obo:ARG_2000028 <{?id}-vcard> . + + <{?id}-vcard> a vcard:Individual ; + obo:ARG_2000029 <{?id}> . + } + WHERE { + FILTER( BOUND(?email) || BOUND(?website) || BOUND(?city) || BOUND(?state) || BOUND(?country) || (BOUND(?lat) && BOUND(?lng))). + } . + + +### email : TRIPLES TO ADD EMAIL + GENERATE{ + <{?id}-vcard> vcard:hasEmail <{?id}-vcard-email> . + + <{?id}-vcard-email> a vcard:Email ; + vcard:email ?email . + } + WHERE { + FILTER( BOUND(?email) ). + } . + + +### website : TRIPLES TO ADD WEBSITE + GENERATE{ + <{?id}-vcard> vcard:hasURL <{?id}-vcard-url> . + + <{?id}-vcard-url> a vcard:URL ; + vivo:rank 1 ; + vcard:url "{?website}"^^xsd:anyURI . + } + WHERE { + FILTER( BOUND(?website) ). + } . + + +### address (city, state, country) : TRIPLES TO ADD ADDRESS DATA + GENERATE{ + <{?id}-vcard> vcard:hasAddress <{?id}-vcard-address> . + + <{?id}-vcard-address> a vcard:Address ; + vcard:locality ?city ; + vcard:region ?state ; + vcard:country ?country . + } + WHERE { + FILTER( BOUND(?city) || BOUND(?state) || BOUND(?country) ). + } . + + +### geo (lat,lng) : TRIPLES TO ADD LATITUDE & LONGITUDE + GENERATE{ + <{?id}-vcard> vcard:hasGeo <{?id}-vcard-geo> . + + <{?id}-vcard-geo> a vcard:Geo ; + vcard:geo "geo:{?lat},{?lng}" . + } + WHERE { + FILTER( BOUND(?lat) && BOUND(?lng) ). + } . + +} +WHERE { + FILTER( BOUND(?id) ). +} diff --git a/src/main/resources/sparqlg/vivo-rdf/person.rqg b/src/main/resources/sparqlg/vivo-rdf/person.rqg new file mode 100644 index 0000000..5bdef7e --- /dev/null +++ b/src/main/resources/sparqlg/vivo-rdf/person.rqg @@ -0,0 +1,41 @@ +PREFIX rdfs: +PREFIX foaf: +PREFIX vivo: +PREFIX obo: +PREFIX vcard: + +GENERATE (?id, ?orcid, ?familyName, ?givenName){ + + <{?id}> a foaf:Person ; + rdfs:label ?completeName ; + vivo:orcidId . + + +### vcard : TRIPLES TO ADD VCARD + GENERATE{ + <{?id}> obo:ARG_2000028 <{?id}-vcard> . + + <{?id}-vcard> a vcard:Individual ; + obo:ARG_2000029 <{?id}> . + } + WHERE { + FILTER( BOUND(?familyName) || BOUND(?givenName) ). + } . + + +### name : TRIPLES TO ADD NAME + GENERATE{ + <{?id}-vcard> vcard:hasName <{?id}-vcard-name> . + + <{?id}-vcard-name> a vcard:Name ; + vcard:familyName ?familyName ; + vcard:givenName ?givenName . + } + WHERE { + FILTER( BOUND(?familyName) || BOUND(?givenName) ). + } . +} +WHERE{ + BIND(CONCAT(?familyName, ", ", ?givenName) AS ?completeName) + FILTER( BOUND(?id) ). +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/vivo-rdf/position.rqg b/src/main/resources/sparqlg/vivo-rdf/position.rqg new file mode 100644 index 0000000..50e29e9 --- /dev/null +++ b/src/main/resources/sparqlg/vivo-rdf/position.rqg @@ -0,0 +1,54 @@ +PREFIX rdfs: +PREFIX vivo: + +GENERATE (?person_id, ?orga_id, ?role, ?start_date, ?end_date){ + + <{?position_id}> a vivo:Position ; + rdfs:label ?role_value ; + vivo:relates <{?person_id}> ; + vivo:relates <{?orga_id}> . + + +### position dates : TRIPLES FOR POSITION DATES + GENERATE{ + <{?position_id}> vivo:dateTimeInterval <{?position_id}-dti> . + } + WHERE { + FILTER( BOUND(?start_date) || BOUND(?end_date) ). + } . + + GENERATE{ + <{?position_id}-dti> vivo:start <{?position_id}-start> . + + <{?position_id}-start> a vivo:DateTimeValue ; + vivo:dateTime ?start_date ; + vivo:dateTimePrecision vivo:yearPrecision . + } + WHERE { + FILTER( BOUND(?start_date) ). + } . + + GENERATE{ + <{?position_id}-dti> vivo:end <{?position_id}-end> . + + <{?position_id}-end> a vivo:DateTimeValue ; + vivo:dateTime ?end_date ; + vivo:dateTimePrecision vivo:yearPrecision . + } + WHERE { + FILTER( BOUND(?end_date) ). + } . + +} +WHERE { + # set default values if empty or unbound + BIND(IF(BOUND(?role) && strlen(?role)>0, ?role, "Unknown") AS ?role_value) + BIND(REPLACE(?role_value, "\\W", "", "i") AS ?role_clean) + + BIND(IF(BOUND(?start_date) && strlen(?start_date)>0, STR(?start_date), "") AS ?start_value) + + # create (hopefully) unique id + BIND("{?person_id}-{?role_clean}-{?start_value}" AS ?position_id) + + FILTER( BOUND(?person_id) && BOUND(?orga_id) ). +} \ No newline at end of file diff --git a/src/main/resources/sparqlg/vivo-rdf/research.rqg b/src/main/resources/sparqlg/vivo-rdf/research.rqg new file mode 100644 index 0000000..be6cb4a --- /dev/null +++ b/src/main/resources/sparqlg/vivo-rdf/research.rqg @@ -0,0 +1,19 @@ +PREFIX vivo: +PREFIX rdfs: +PREFIX bibo: + +GENERATE (?id, ?pubtype, ?title, ?doi, ?pubyear) { + + <{?id}> a ?pubtype ; + rdfs:label ?title ; + bibo:doi ?doi ; + vivo:dateTimeValue <{?id}-dtv> . + + <{?id}-dtv> a vivo:DateTimeValue ; + vivo:dateTime ?pubyear ; + vivo:dateTimePrecision vivo:yearPrecision . + +} +WHERE { + FILTER( BOUND(?id) && BOUND(?pubtype) ). +} \ No newline at end of file diff --git a/src/test/java/eu/tib/service/GeneratePipelineTest.java b/src/test/java/eu/tib/service/GeneratePipelineTest.java index ef0ff67..9e99cd5 100644 --- a/src/test/java/eu/tib/service/GeneratePipelineTest.java +++ b/src/test/java/eu/tib/service/GeneratePipelineTest.java @@ -1,7 +1,6 @@ package eu.tib.service; -import eu.tib.exception.ConfigLoadingException; -import eu.tib.exception.SparqlParsingException; +import eu.tib.exception.SparqlExecutionException; import fr.mines_stetienne.ci.sparql_generate.FileConfigurations; import fr.mines_stetienne.ci.sparql_generate.stream.LocationMapperAccept; import fr.mines_stetienne.ci.sparql_generate.stream.LookUpRequest; @@ -43,7 +42,7 @@ void readConfigTest() { @Test void failReadConfigTest() { String nonExistingPath = "xxxxxxxxx"; - assertThrows(ConfigLoadingException.class, () -> pip.readConfig(nonExistingPath)); + assertThrows(SparqlExecutionException.class, () -> pip.readConfig(nonExistingPath)); } @Test @@ -57,9 +56,9 @@ void parseSparqlGenerateQueryTest() { @Test void failParseSparqlGenerateQueryTest() { String nonExistingPath = "xxxxxxxxx"; - assertThrows(SparqlParsingException.class, () -> pip.parseSparqlGenerateQuery(nonExistingPath, config)); + assertThrows(SparqlExecutionException.class, () -> pip.parseSparqlGenerateQuery(nonExistingPath, config)); String notAQueryPath = confPath + File.separator + CONF_FILE; - assertThrows(SparqlParsingException.class, () -> pip.parseSparqlGenerateQuery(notAQueryPath, config)); + assertThrows(SparqlExecutionException.class, () -> pip.parseSparqlGenerateQuery(notAQueryPath, config)); } @Test