diff --git a/doc/release-notes/6485-multiple-stores.md b/doc/release-notes/6485-multiple-stores.md index ea2d224d612..e9c7e654d96 100644 --- a/doc/release-notes/6485-multiple-stores.md +++ b/doc/release-notes/6485-multiple-stores.md @@ -29,8 +29,4 @@ Any additional S3 options you have set will need to be replaced as well, followi Once these options are set, restarting the glassfish service is all that is needed to complete the change. -<<<<<<< HEAD Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored (in the /temp subdir of that directory), independent of the location of any 'file' store defined above. -======= -Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored (in the /temp subdir of that directory), independent of the location of any 'file' store defined above. ->>>>>>> branch 'IQSS/6485' of https://github.com/TexasDigitalLibrary/dataverse.git diff --git a/doc/release-notes/6489-release-notes.md b/doc/release-notes/6489-release-notes.md new file mode 100644 index 00000000000..b28174c9a74 --- /dev/null +++ b/doc/release-notes/6489-release-notes.md @@ -0,0 +1,17 @@ +# S3 Direct Upload support + +S3 stores can now optionally be configured to support direct upload of files, as one option for supporting upload of larger files. + +General information about this capability can be found in the Big Data Support Guide with specific information about how to enable it in the Configuration Guide - File Storage section. + +**Upgrade Information:** + +Direct upload to S3 is enabled per store by one new jvm option: + + ./asadmin create-jvm-options "\-Ddataverse.files..upload-redirect=true" + +The existing :MaxFileUploadSizeInBytes property and ```dataverse.files..url-expiration-minutes``` jvm option for the same store also apply to direct upload. + +Direct upload via the Dataverse web interface is transparent to the user and handled automatically by the browser. Some minor differences in file upload exist: directly uploaded files are not unzipped and Dataverse does not scan their content to help in assigning a MIME type. Ingest of tabular files and metadata extraction from FITS files will occur, but can be turned off for files above a specified size limit through the new dataverse.files..ingestsizelimit jvm option. + +API calls to support direct upload also exist, and, if direct upload is enabled for a store in Dataverse, the latest DVUploader (v1.0.8) provides a'-directupload' flag that enables its use. diff --git a/doc/release-notes/6514-shib-updates b/doc/release-notes/6514-shib-updates new file mode 100644 index 00000000000..561358c6b8d --- /dev/null +++ b/doc/release-notes/6514-shib-updates @@ -0,0 +1 @@ +New DB option :ShibAffiliationAttribute \ No newline at end of file diff --git a/doc/release-notes/6650-export-import-mismatch b/doc/release-notes/6650-export-import-mismatch new file mode 100644 index 00000000000..0ab2999a603 --- /dev/null +++ b/doc/release-notes/6650-export-import-mismatch @@ -0,0 +1,3 @@ +Run ReExportall to update JSON Exports + +http://guides.dataverse.org/en/4.19/admin/metadataexport.html?highlight=export#batch-exports-through-the-api \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/api/file-provenance.json b/doc/sphinx-guides/source/_static/api/file-provenance.json new file mode 100644 index 00000000000..6c823cdb5f3 --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/file-provenance.json @@ -0,0 +1 @@ +{"prefix": {"pre_0": "http://www.w3.org/2001/XMLSchema", "s-prov": "http://s-prov/ns/#", "provone": "http://purl.dataone.org/provone/2015/01/15/ontology#", "vargen": "http://openprovenance.org/vargen#", "foaf": "http://xmlns.com/foaf/0.1/", "dcterms": "http://purl.org/dc/terms/", "tmpl": "http://openprovenance.org/tmpl#", "var": "http://openprovenance.org/var#", "vcard": "http://www.w3.org/2006/vcard/ns#", "swirrl": "http://project-dare.eu/ns#"}, "bundle": {"vargen:SessionSnapshot": {"prefix": {"s-prov": "http://s-prov/ns/#", "provone": "http://purl.dataone.org/provone/2015/01/15/ontology#", "vargen": "http://openprovenance.org/vargen#", "tmpl": "http://openprovenance.org/tmpl#", "var": "http://openprovenance.org/var#", "vcard": "http://www.w3.org/2006/vcard/ns#", "swirrl": "http://project-dare.eu/ns#"}, "entity": {"vargen:inData": {"swirrl:volumeId": {"$": "var:rawVolumeId", "type": "prov:QUALIFIED_NAME"}, "prov:type": {"$": "provone:Data", "type": "prov:QUALIFIED_NAME"}}, "vargen:inFile": {"prov:atLocation": {"$": "var:atLocation", "type": "prov:QUALIFIED_NAME"}, "s-prov:format": {"$": "var:format", "type": "prov:QUALIFIED_NAME"}, "s-prov:checksum": {"$": "var:checksum", "type": "prov:QUALIFIED_NAME"}}, "vargen:WorkData": {"swirrl:volumeId": {"$": "var:workVolumeId", "type": "prov:QUALIFIED_NAME"}, "prov:type": {"$": "provone:Data", "type": "prov:QUALIFIED_NAME"}}, "var:JupSnapshot": {"prov:generatedAt": {"$": "var:generatedAt", "type": "prov:QUALIFIED_NAME"}, "prov:atLocation": {"$": "var:repoUrl", "type": "prov:QUALIFIED_NAME"}, "s-prov:description": {"$": "var:description", "type": "prov:QUALIFIED_NAME"}, "prov:type": {"$": "swirrl:NotebookSnapshot", "type": "prov:QUALIFIED_NAME"}, "swirrl:sessionId": {"$": "var:sessionId", "type": "prov:QUALIFIED_NAME"}}}, "used": {"_:id1": {"prov:activity": "vargen:snapshot", "prov:entity": "var:Jupyter"}, "_:id2": {"prov:activity": "vargen:snapshot", "prov:entity": "vargen:WorkData"}, "_:id3": {"prov:activity": "vargen:snapshot", "prov:entity": "vargen:inData"}}, "wasDerivedFrom": {"_:id4": {"prov:usedEntity": "var:Jupyter", "prov:generatedEntity": "var:JupSnapshot"}}, "wasAssociatedWith": {"_:id5": {"prov:activity": "vargen:snapshot", "prov:agent": "var:snapAgent"}}, "actedOnBehalfOf": {"_:id6": {"prov:delegate": "var:snapAgent", "prov:responsible": "var:user"}}, "activity": {"vargen:snapshot": {"prov:atLocation": {"$": "var:method_path", "type": "prov:QUALIFIED_NAME"}, "tmpl:startTime": {"$": "var:startTime", "type": "prov:QUALIFIED_NAME"}, "tmpl:endTime": {"$": "var:endTime", "type": "prov:QUALIFIED_NAME"}}}, "wasGeneratedBy": {"_:id7": {"prov:activity": "vargen:snapshot", "prov:entity": "var:JupSnapshot"}}, "agent": {"var:user": {"vcard:uid": {"$": "var:name", "type": "prov:QUALIFIED_NAME"}, "swirrl:authMode": {"$": "var:authmode", "type": "prov:QUALIFIED_NAME"}, "swirrl:group": {"$": "var:group", "type": "prov:QUALIFIED_NAME"}, "prov:type": {"$": "prov:Person", "type": "prov:QUALIFIED_NAME"}}, "var:snapAgent": {"vcard:uid": {"$": "var:name_api", "type": "prov:QUALIFIED_NAME"}, "prov:type": {"$": "prov:SoftwareAgent", "type": "prov:QUALIFIED_NAME"}}}, "hadMember": {"_:id8": {"prov:collection": "vargen:inData", "prov:entity": "vargen:inFile"}}}}} \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index fac4957bfb3..79ecdbad0b8 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2190,6 +2190,8 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/prov-freeform?persistentId=doi:10.5072/FK2/AAA000" -H "Content-type:application/json" --upload-file provenance.json +See a sample JSON file :download:`file-provenance.json <../_static/api/file-provenance.json>` from http://openprovenance.org (c.f. Huynh, Trung Dong and Moreau, Luc (2014) ProvStore: a public provenance repository. At 5th International Provenance and Annotation Workshop (IPAW'14), Cologne, Germany, 09-13 Jun 2014. pp. 275-277). + Delete Provenance JSON for an uploaded file ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index bb16dd9133d..c1c2969a60a 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -6,7 +6,52 @@ Big data support is highly experimental. Eventually this content will move to th .. contents:: |toctitle| :local: -Various components need to be installed and configured for big data support. +Various components need to be installed and/or configured for big data support. + +S3 Direct Upload and Download +----------------------------- + +A lightweight option for supporting file sizes beyond a few gigabytes - a size that can cause performance issues when uploaded through the Dataverse server itself - is to configure an S3 store to provide direct upload and download via 'pre-signed URLs'. When these options are configured, file uploads and downloads are made directly to and from a configured S3 store using secure (https) connections that enforce Dataverse's access controls. (The upload and download URLs are signed with a unique key that only allows access for a short time period and Dataverse will only generate such a URL if the user has permission to upload/download the specific file in question.) + +This option can handle files >40GB and could be appropriate for files up to a TB. Other options can scale farther, but this option has the advantages that it is simple to configure and does not require any user training - uploads and downloads are done via the same interface as normal uploads to Dataverse. + +To configure these options, an administrator must set two JVM options for the Dataverse server using the same process as for other configuration options: + +``./asadmin create-jvm-options "-Ddataverse.files..download-redirect=true"`` +``./asadmin create-jvm-options "-Ddataverse.files..upload-redirect=true"`` + + +With multiple stores configured, it is possible to configure one S3 store with direct upload and/or download to support large files (in general or for specific dataverses) while configuring only direct download, or no direct access for another store. + +It is also possible to set file upload size limits per store. See the :MaxFileUploadSizeInBytes setting described in the :doc:`/installation/config` guide. + +At present, one potential drawback for direct-upload is that files are only partially 'ingested', tabular and FITS files are processed, but zip files are not unzipped, and the file contents are not inspected to evaluate their mimetype. This could be appropriate for large files, or it may be useful to completely turn off ingest processing for performance reasons (ingest processing requires a copy of the file to be retrieved by Dataverse from the S3 store). A store using direct upload can be configured to disable all ingest processing for files above a given size limit: + +``./asadmin create-jvm-options "-Ddataverse.files..ingestsizelimit="`` + + +**IMPORTANT:** One additional step that is required to enable direct download to work with previewers is to allow cross site (CORS) requests on your S3 store. +The example below shows how to enable the minimum needed CORS rules on a bucket using the AWS CLI command line tool. Note that you may need to add more methods and/or locations, if you also need to support certain previewers and external tools. + +``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` + +with the contents of the file cors.json as follows: + +.. code-block:: json + + { + "CORSRules": [ + { + "AllowedOrigins": ["https://"], + "AllowedHeaders": ["*"], + "AllowedMethods": ["PUT", "GET"] + } + ] + } + +Alternatively, you can enable CORS using the AWS S3 web interface, using json-encoded rules as in the example above. + +Since the direct upload mechanism creates the final file rather than an intermediate temporary file, user actions, such as neither saving or canceling an upload session before closing the browser page, can leave an abandoned file in the store. The direct upload mechanism attempts to use S3 Tags to aid in identifying/removing such files. Upon upload, files are given a "dv-status":"temp" tag which is removed when the dataset changes are saved and the new file(s) are added in Dataverse. Note that not all S3 implementations support Tags: Minio does not. WIth such stores, direct upload works, but Tags are not used. Data Capture Module (DCM) ------------------------- diff --git a/doc/sphinx-guides/source/developers/deployment.rst b/doc/sphinx-guides/source/developers/deployment.rst index 9532e7c769f..5e830bfde5b 100755 --- a/doc/sphinx-guides/source/developers/deployment.rst +++ b/doc/sphinx-guides/source/developers/deployment.rst @@ -82,23 +82,26 @@ Download and Run the "Create Instance" Script Once you have done the configuration above, you are ready to try running the "ec2-create-instance.sh" script to spin up Dataverse in AWS. -Download :download:`ec2-create-instance.sh <../../../../scripts/installer/ec2-create-instance.sh>` and put it somewhere reasonable. For the purpose of these instructions we'll assume it's in the "Downloads" directory in your home directory. +Download :download:`ec2-create-instance.sh` and put it somewhere reasonable. For the purpose of these instructions we'll assume it's in the "Downloads" directory in your home directory. -ec2-create-instance accepts a number few command-line switches: +To run it with default values you just need the script, but you may also want a current copy of the ansible :download:`group vars`_ file. + +ec2-create-instance accepts a number of command-line switches, including: * -r: GitHub Repository URL (defaults to https://github.com/IQSS/dataverse.git) * -b: branch to build (defaults to develop) * -p: pemfile directory (defaults to $HOME) * -g: Ansible GroupVars file (if you wish to override role defaults) +* -h: help (displays usage for each available option) ``bash ~/Downloads/ec2-create-instance.sh -b develop -r https://github.com/scholarsportal/dataverse.git -g main.yml`` -Now you will need to wait around 15 minutes until the deployment is finished. Eventually, the output should tell you how to access the installation of Dataverse in a web browser or via ssh. It will also provide instructions on how to delete the instance when you are finished with it. Please be aware that AWS charges per minute for a running instance. You can also delete your instance from https://console.aws.amazon.com/console/home?region=us-east-1 . +You will need to wait for 15 minutes or so until the deployment is finished, longer if you've enabled sample data and/or the API test suite. Eventually, the output should tell you how to access the installation of Dataverse in a web browser or via SSH. It will also provide instructions on how to delete the instance when you are finished with it. Please be aware that AWS charges per minute for a running instance. You may also delete your instance from https://console.aws.amazon.com/console/home?region=us-east-1 . -Caveats -~~~~~~~ +Caveat Recipiens +~~~~~~~~~~~~~~~~ -Please note that while the script should work fine on newish branches, older branches that have different dependencies such as an older version of Solr may not produce a working Dataverse installation. Your mileage may vary. +Please note that while the script should work well on new-ish branches, older branches that have different dependencies such as an older version of Solr may not produce a working Dataverse installation. Your mileage may vary. ---- diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst index 82a4e22ebed..2894c457d85 100755 --- a/doc/sphinx-guides/source/developers/testing.rst +++ b/doc/sphinx-guides/source/developers/testing.rst @@ -108,22 +108,33 @@ Unfortunately, the term "integration tests" can mean different things to differe Running the Full API Test Suite Using EC2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To run the API test suite on EC2 you should first follow the steps in the :doc:`deployment` section to get set up for AWS in general and EC2 in particular. +To run the API test suite in an EC2 instance you should first follow the steps in the :doc:`deployment` section to get set up for AWS in general and EC2 in particular. -Then read the instructions in https://github.com/IQSS/dataverse-sample-data for EC2 but be sure to make the adjustments below. +You may always retrieve a current copy of the ec2-create-instance.sh script and accompanying group_var.yml file from the `dataverse-ansible repo`_: -Edit ``ec2config.yaml`` to change ``test_suite`` to ``true``. +- `ec2-create-instance.sh`_ +- `main.yml`_ -Pass in the repo and branch you are testing. You should also specify a local directory where server.log and other useful information will be written so you can start debugging any failures. +Edit ``main.yml`` to set the desired GitHub repo, branch, and to ensure that the API test suite is enabled: + +- ``dataverse_repo: https://github.com/IQSS/dataverse.git`` +- ``dataverse_branch: develop`` +- ``dataverse.api.test_suite: true`` +- ``dataverse.sampledata.enabled: true`` + +If you wish, you may pass the local path of a logging directory, which will tell ec2-create-instance.sh to `grab glassfish, maven and other logs`_ for your review. + +Finally, run the script: .. code-block:: bash - export REPO=https://github.com/IQSS/dataverse.git - export BRANCH=123-my-branch - export LOGS=/tmp/123 + $ ./ec2-create-instance.sh -g main.yml -l log_dir + +Near the beginning and at the end of the ec2-create-instance.sh output you will see instructions for connecting to the instance via SSH. If you are actively working on a branch and want to refresh the warfile after each commit, you may wish to call a `redeploy.sh`_ script placed by the Ansible role, which will do a "git pull" against your branch, build the warfile, deploy the warfile, then restart glassfish. By default this script is written to /tmp/dataverse/redeploy.sh. You may invoke the script by appending it to the SSH command in ec2-create's output: + +.. code-block:: bash - mkdir $LOGS - ./ec2-create-instance.sh -g ec2config.yaml -r $REPO -b $BRANCH -l $LOGS + $ ssh -i your_pem.pem user@ec2-host.aws.com /tmp/dataverse/redeploy.sh Running the full API test suite using Docker ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index bfa66b97eb1..fe559776500 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -244,6 +244,8 @@ If you wish to change which store is used by default, you'll need to delete the ./asadmin $ASADMIN_OPTS delete-jvm-options "-Ddataverse.files.storage-driver-id=file" ./asadmin $ASADMIN_OPTS create-jvm-options "-Ddataverse.files.storage-driver-id=" + + It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. File Storage ++++++++++++ @@ -505,7 +507,9 @@ JVM Option Value Description dataverse.files.storage-driver-id Enable as the default storage driver. ``file`` dataverse.files..bucket-name The bucket name. See above. (none) dataverse.files..download-redirect ``true``/``false`` Enable direct download or proxy through Dataverse. ``false`` -dataverse.files..url-expiration-minutes If direct downloads: time until links expire. Optional. 60 +dataverse.files..upload-redirect ``true``/``false`` Enable direct upload of files added to a dataset to the S3 store. ``false`` +dataverse.files..ingestsizelimit Maximum size of directupload files that should be ingested (none) +dataverse.files..url-expiration-minutes If direct uploads/downloads: time until links expire. Optional. 60 dataverse.files..custom-endpoint-url Use custom S3 endpoint. Needs URL either with or without protocol. (none) dataverse.files..custom-endpoint-region Only used when using custom endpoint. Optional. ``dataverse`` dataverse.files..path-style-access ``true``/``false`` Use path style buckets instead of subdomains. Optional. ``false`` @@ -1474,7 +1478,14 @@ Alongside the ``:StatusMessageHeader`` you need to add StatusMessageText for the :MaxFileUploadSizeInBytes +++++++++++++++++++++++++ -Set `MaxFileUploadSizeInBytes` to "2147483648", for example, to limit the size of files uploaded to 2 GB. +This setting controls the maximum size of uploaded files. +- To have one limit for all stores, set `MaxFileUploadSizeInBytes` to "2147483648", for example, to limit the size of files uploaded to 2 GB: + +``curl -X PUT -d 2147483648 http://localhost:8080/api/admin/settings/:MaxFileUploadSizeInBytes`` + +- To have limits per store with an optional default, use a serialized json object for the value of `MaxFileUploadSizeInBytes` with an entry per store, as in the following example, which maintains a 2 GB default and adds higher limits for stores with ids "fileOne" and "s3". + +``curl -X PUT -d '{"default":"2147483648","fileOne":"4000000000","s3":"8000000000"}' http://localhost:8080/api/admin/settings/:MaxFileUploadSizeInBytes`` Notes: @@ -1484,7 +1495,7 @@ Notes: - For larger file upload sizes, you may need to configure your reverse proxy timeout. If using apache2 (httpd) with Shibboleth, add a timeout to the ProxyPass defined in etc/httpd/conf.d/ssl.conf (which is described in the :doc:`/installation/shibboleth` setup). -``curl -X PUT -d 2147483648 http://localhost:8080/api/admin/settings/:MaxFileUploadSizeInBytes`` + :ZipDownloadLimit +++++++++++++++++ @@ -1777,6 +1788,53 @@ You can set the value of "#THIS PAGE#" to the URL of your Dataverse homepage, or ``curl -X PUT -d true http://localhost:8080/api/admin/settings/:ShibPassiveLoginEnabled`` +:ShibAffiliationAttribute ++++++++++++++++++++++++++ + +The Shibboleth affiliation attribute holds information about the affiliation of the user (e.g. "OU") and is read from the DiscoFeed at each login. ``:ShibAffiliationAttribute`` is a name of a Shibboleth attribute in the Shibboleth header which Dataverse will read from instead of DiscoFeed. If this value is not set or empty, Dataverse uses the DiscoFeed. + +If the attribute is not yet set for the Shibboleth, please consult the Shibboleth Administrators at your institution. Typically it requires changing of the `/etc/shibboleth/attribute-map.xml` file by adding an attribute request, e.g. + +``` + + + +``` + +In order to implement the change, you should restart Shibboleth and Apache2 services: + +``` +sudo service shibd restart +sudo service apache2 restart +``` + +To check if the attribute is sent, you should log in again to Dataverse and check Shibboleth's transaction log. You should see something like this: + +``` +INFO Shibboleth-TRANSACTION [25]: Cached the following attributes with session (ID: _9d1f34c0733b61c0feb0ca7596ef43b2) for (applicationId: default) { +INFO Shibboleth-TRANSACTION [25]: givenName (1 values) +INFO Shibboleth-TRANSACTION [25]: ou (1 values) +INFO Shibboleth-TRANSACTION [25]: sn (1 values) +INFO Shibboleth-TRANSACTION [25]: eppn (1 values) +INFO Shibboleth-TRANSACTION [25]: mail (1 values) +INFO Shibboleth-TRANSACTION [25]: displayName (1 values) +INFO Shibboleth-TRANSACTION [25]: } +``` + +If you see the attribue you requested in this list, you can set the attribute in Dataverse. + +To set ``:ShibAffiliationAttribute``: + +``curl -X PUT -d "ou" http://localhost:8080/api/admin/settings/:ShibAffiliationAttribute`` + +To delete ``:ShibAffiliationAttribute``: + +``curl -X DELETE http://localhost:8080/api/admin/settings/:ShibAffiliationAttribute`` + +To check the current value of ``:ShibAffiliationAttribute``: + +``curl -X GET http://localhost:8080/api/admin/settings/:ShibAffiliationAttribute`` + .. _:ComputeBaseUrl: :ComputeBaseUrl diff --git a/scripts/installer/ec2-create-instance.sh b/scripts/installer/ec2-create-instance.sh index 63e9ac21bc6..81fdd5940c1 100755 --- a/scripts/installer/ec2-create-instance.sh +++ b/scripts/installer/ec2-create-instance.sh @@ -3,21 +3,28 @@ # For docs, see the "Deployment" page in the Dev Guide. # repo and branch defaults -REPO_URL='https://github.com/IQSS/dataverse.git' -BRANCH='develop' +REPO_URL_DEFAULT='https://github.com/IQSS/dataverse.git' +BRANCH_DEFAULT='develop' PEM_DEFAULT=${HOME} +AWS_AMI_DEFAULT='ami-9887c6e7' usage() { - echo "Usage: $0 -b -r -p -g " 1>&2 + echo "Usage: $0 -b -r -p -g -a -i aws_image -s aws_size -t aws_tag -l local_log_path" 1>&2 echo "default branch is develop" echo "default repo is https://github.com/IQSS/dataverse" echo "default .pem location is ${HOME}" echo "example group_vars may be retrieved from https://raw.githubusercontent.com/IQSS/dataverse-ansible/master/defaults/main.yml" + echo "default AWS AMI ID is $AWS_AMI_DEFAULT" + echo "default AWS size is t2.medium" + echo "local log path" exit 1 } -while getopts ":r:b:g:p:" o; do +while getopts ":a:r:b:g:p:i:s:t:l:" o; do case "${o}" in + a) + DA_BRANCH=${OPTARG} + ;; r) REPO_URL=${OPTARG} ;; @@ -30,32 +37,74 @@ while getopts ":r:b:g:p:" o; do p) PEM_DIR=${OPTARG} ;; + i) + AWS_IMAGE=${OPTARG} + ;; + s) + AWS_SIZE=${OPTARG} + ;; + t) + TAG=${OPTARG} + ;; + l) + LOCAL_LOG_PATH=${OPTARG} + ;; *) usage ;; esac done -# test for user-supplied conf files +# test for ansible group_vars if [ ! -z "$GRPVRS" ]; then GVFILE=$(basename "$GRPVRS") GVARG="-e @$GVFILE" echo "using $GRPVRS for extra vars" fi +# test for CLI args if [ ! -z "$REPO_URL" ]; then GVARG+=" -e dataverse_repo=$REPO_URL" - echo "using $REPO_URL" + echo "using repo $REPO_URL" fi if [ ! -z "$BRANCH" ]; then GVARG+=" -e dataverse_branch=$BRANCH" - echo "building $BRANCH" + echo "building branch $BRANCH" fi +# The AMI ID may change in the future and the way to look it up is with the following command, which takes a long time to run: +# aws ec2 describe-images --owners 'aws-marketplace' --filters 'Name=product-code,Values=aw0evgkw8e5c1q413zgy5pjce' --query 'sort_by(Images, &CreationDate)[-1].[ImageId]' --output 'text' +# To use an AMI, one must subscribe to it via the AWS GUI. +# AMI IDs are specific to the region. + +if [ ! -z "$AWS_IMAGE" ]; then + AMI_ID=$AWS_IMAGE +else + AMI_ID="$AWS_AMI_DEFAULT" +fi +echo "using $AMI_ID" + +if [ ! -z "$AWS_SIZE" ]; then + SIZE=$AWS_SIZE +else + SIZE="t2.medium" +fi +echo "using $SIZE" + +if [ ! -z "$TAG" ]; then + TAGARG="--tag-specifications ResourceType=instance,Tags=[{Key=name,Value=$TAG}]" + echo "using tag $TAG" +fi + +# default to dataverse-ansible/master +if [ -z "$DA_BRANCH" ]; then + DA_BRANCH="master" +fi + +# ansible doesn't care about pem_dir (yet) if [ -z "$PEM_DIR" ]; then PEM_DIR="$PEM_DEFAULT" - echo "using $PEM_DIR" fi AWS_CLI_VERSION=$(aws --version) @@ -95,22 +144,12 @@ else exit 1 fi -# The AMI ID may change in the future and the way to look it up is with the -# following command, which takes a long time to run: -# -# aws ec2 describe-images --owners 'aws-marketplace' --filters 'Name=product-code,Values=aw0evgkw8e5c1q413zgy5pjce' --query 'sort_by(Images, &CreationDate)[-1].[ImageId]' --output 'text' -# -# To use this AMI, we subscribed to it from the AWS GUI. -# AMI IDs are specific to the region. -AMI_ID='ami-9887c6e7' -# Smaller than medium lead to Maven and Solr problems. -SIZE='t2.medium' echo "Creating EC2 instance" # TODO: Add some error checking for "ec2 run-instances". -INSTANCE_ID=$(aws ec2 run-instances --image-id $AMI_ID --security-groups $SECURITY_GROUP --count 1 --instance-type $SIZE --key-name $PEM_DIR/$KEY_NAME --query 'Instances[0].InstanceId' --block-device-mappings '[ { "DeviceName": "/dev/sda1", "Ebs": { "DeleteOnTermination": true } } ]' | tr -d \") +INSTANCE_ID=$(aws ec2 run-instances --image-id $AMI_ID --security-groups $SECURITY_GROUP $TAGARG --count 1 --instance-type $SIZE --key-name $PEM_DIR/$KEY_NAME --query 'Instances[0].InstanceId' --block-device-mappings '[ { "DeviceName": "/dev/sda1", "Ebs": { "DeleteOnTermination": true } } ]' | tr -d \") echo "Instance ID: "$INSTANCE_ID -echo "giving instance 30 seconds to wake up..." -sleep 30 +echo "giving instance 60 seconds to wake up..." +sleep 60 echo "End creating EC2 instance" PUBLIC_DNS=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID --query "Reservations[*].Instances[*].[PublicDnsName]" --output text) @@ -132,18 +171,36 @@ ssh -T -i $PEM_FILE -o 'StrictHostKeyChecking no' -o 'UserKnownHostsFile=/dev/nu sudo yum -y install epel-release sudo yum -y install https://releases.ansible.com/ansible/rpm/release/epel-7-x86_64/ansible-2.7.9-1.el7.ans.noarch.rpm sudo yum -y install git nano -git clone https://github.com/IQSS/dataverse-ansible.git dataverse +git clone -b $DA_BRANCH https://github.com/IQSS/dataverse-ansible.git dataverse export ANSIBLE_ROLES_PATH=. -echo $extra_vars ansible-playbook -v -i dataverse/inventory dataverse/dataverse.pb --connection=local $GVARG EOF +if [ ! -z "$LOCAL_LOG_PATH" ]; then + echo "copying logs to $LOCAL_LOG_PATH." + # 1 accept SSH keys + ssh-keyscan ${PUBLIC_DNS} >> ~/.ssh/known_hosts + # 2 logdir should exist + mkdir -p $LOCAL_LOG_PATH + # 3 grab logs for local processing in jenkins + rsync -av -e "ssh -i $PEM_FILE" --ignore-missing-args centos@$PUBLIC_DNS:/tmp/dataverse/target/site $LOCAL_LOG_PATH/ + rsync -av -e "ssh -i $PEM_FILE" --ignore-missing-args centos@$PUBLIC_DNS:/tmp/dataverse/target/surefire-reports $LOCAL_LOG_PATH/ + rsync -av -e "ssh -i $PEM_FILE" centos@$PUBLIC_DNS:/usr/local/glassfish4/glassfish/domains/domain1/logs/server* $LOCAL_LOG_PATH/ + # 4 grab mvn.out + rsync -av -e "ssh -i $PEM_FILE" --ignore-missing-args centos@$PUBLIC_DNS:/tmp/dataverse/mvn.out $LOCAL_LOG_PATH/ + # 5 jacoco + rsync -av -e "ssh -i $PEM_FILE" --ignore-missing-args centos@$PUBLIC_DNS:/tmp/dataverse/target/coverage-it $LOCAL_LOG_PATH/ + rsync -av -e "ssh -i $PEM_FILE" --ignore-missing-args centos@$PUBLIC_DNS:/tmp/dataverse/target/*.exec $LOCAL_LOG_PATH/ + rsync -av -e "ssh -i $PEM_FILE" --ignore-missing-args centos@$PUBLIC_DNS:/tmp/dataverse/target/classes $LOCAL_LOG_PATH/ + rsync -av -e "ssh -i $PEM_FILE" --ignore-missing-args centos@$PUBLIC_DNS:/tmp/dataverse/src $LOCAL_LOG_PATH/ +fi + # Port 8080 has been added because Ansible puts a redirect in place # from HTTP to HTTPS and the cert is invalid (self-signed), forcing # the user to click through browser warnings. -CLICKABLE_LINK="http://${PUBLIC_DNS}:8080" +CLICKABLE_LINK="http://${PUBLIC_DNS}" echo "To ssh into the new instance:" echo "ssh -i $PEM_FILE $USER_AT_HOST" -echo "Branch \"$BRANCH\" from $REPO_URL has been deployed to $CLICKABLE_LINK" +echo "Branch $BRANCH from $REPO_URL has been deployed to $CLICKABLE_LINK" echo "When you are done, please terminate your instance with:" echo "aws ec2 terminate-instances --instance-ids $INSTANCE_ID" diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 54a88c27d91..65d26d2eb63 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -29,6 +29,8 @@ import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.Stateless; +import javax.ejb.TransactionAttribute; +import javax.ejb.TransactionAttributeType; import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.NoResultException; @@ -942,6 +944,17 @@ public DataFile save(DataFile dataFile) { } } + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public DataFile saveInTransaction(DataFile dataFile) { + + if (dataFile.isMergeable()) { + DataFile savedDataFile = em.merge(dataFile); + return savedDataFile; + } else { + throw new IllegalArgumentException("This DataFile object has been set to NOT MERGEABLE; please ensure a MERGEABLE object is passed to the save method."); + } + } + private void msg(String m){ System.out.println(m); } @@ -1565,7 +1578,6 @@ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws I throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject " + "(id: " + dataFileId + ", location: " + storageLocation); } - logger.info("deleting: " + storageLocation); StorageIO directStorageAccess = DataAccess.getDirectStorageIO(storageLocation); directStorageAccess.delete(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 69de63c5fa6..6d26c0cba58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -97,7 +97,7 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String topicClassVocab="topicClassVocab"; public final static String topicClassVocabURI="topicClassVocabURI"; public final static String descriptionText="dsDescriptionValue"; - public final static String descriptionDate="descriptionDate"; + public final static String descriptionDate="dsDescriptionDate"; public final static String timePeriodCovered="timePeriodCovered"; // SEK added 6/13/2016 public final static String timePeriodCoveredStart="timePeriodCoveredStart"; public final static String timePeriodCoveredEnd="timePeriodCoveredEnd"; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 690443d4def..0ece7e9c4c2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1774,7 +1774,6 @@ public void updateOwnerDataverse() { private String init(boolean initFull) { //System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes - this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSize(); setDataverseSiteUrl(systemConfig.getDataverseSiteUrl()); guestbookResponse = new GuestbookResponse(); @@ -1821,7 +1820,9 @@ private String init(boolean initFull) { // Set Working Version and Dataset by DatasaetVersion Id //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId); - } + } + this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getOwner().getEffectiveStorageDriverId()); + if (retrieveDatasetVersionResponse == null) { return permissionsWrapper.notFound(); @@ -2981,16 +2982,6 @@ public void setLinkingDataverseErrorMessage(String linkingDataverseErrorMessage) this.linkingDataverseErrorMessage = linkingDataverseErrorMessage; } - UIInput selectedLinkingDataverseMenu; - - public UIInput getSelectedDataverseMenu() { - return selectedLinkingDataverseMenu; - } - - public void setSelectedDataverseMenu(UIInput selectedDataverseMenu) { - this.selectedLinkingDataverseMenu = selectedDataverseMenu; - } - private Boolean saveLink(Dataverse dataverse){ boolean retVal = true; if (readOnly) { diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 4b53937a87f..75dbb39e2ca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -32,6 +32,8 @@ import javax.validation.constraints.NotNull; import javax.validation.constraints.Pattern; import javax.validation.constraints.Size; + +import org.apache.commons.lang.StringUtils; import org.hibernate.validator.constraints.NotBlank; import org.hibernate.validator.constraints.NotEmpty; @@ -149,7 +151,7 @@ public String getIndexableCategoryName() { private String affiliation; - private String storageDriver=""; + private String storageDriver=null; // Note: We can't have "Remove" here, as there are role assignments that refer // to this role. So, adding it would mean violating a forign key contstraint. @@ -762,7 +764,7 @@ public boolean isAncestorOf( DvObject other ) { public String getEffectiveStorageDriverId() { String id = storageDriver; - if(id == null) { + if(StringUtils.isBlank(id)) { if(this.getOwner() != null) { id = this.getOwner().getEffectiveStorageDriverId(); } else { @@ -774,10 +776,17 @@ public String getEffectiveStorageDriverId() { public String getStorageDriverId() { + if(storageDriver==null) { + return DataAccess.UNDEFINED_STORAGE_DRIVER_IDENTIFIER; + } return storageDriver; } public void setStorageDriverId(String storageDriver) { - this.storageDriver = storageDriver; + if(storageDriver!=null&&storageDriver.equals(DataAccess.UNDEFINED_STORAGE_DRIVER_IDENTIFIER)) { + this.storageDriver=null; + } else { + this.storageDriver = storageDriver; + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index 12f398c9c7d..165c1759b5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -1214,19 +1214,24 @@ public Set> getStorageDriverOptions() { HashMap drivers =new HashMap(); drivers.putAll(DataAccess.getStorageDriverLabels()); //Add an entry for the default (inherited from an ancestor or the system default) - drivers.put(getDefaultStorageDriverLabel(), ""); + drivers.put(getDefaultStorageDriverLabel(), DataAccess.UNDEFINED_STORAGE_DRIVER_IDENTIFIER); return drivers.entrySet(); } public String getDefaultStorageDriverLabel() { String storageDriverId = DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER; Dataverse parent = dataverse.getOwner(); + boolean fromAncestor=false; if(parent != null) { storageDriverId = parent.getEffectiveStorageDriverId(); - } - boolean fromAncestor=false; - if(!storageDriverId.equals(DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER)) { - fromAncestor = true; + //recurse dataverse chain to root and if any have a storagedriver set, fromAncestor is true + while(parent!=null) { + if(!parent.getStorageDriverId().equals(DataAccess.UNDEFINED_STORAGE_DRIVER_IDENTIFIER)) { + fromAncestor=true; + break; + } + parent=parent.getOwner(); + } } String label = DataAccess.getStorageDriverLabelFor(storageDriverId); if(fromAncestor) { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 8425af60335..8fe7651fe85 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -7,13 +7,19 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; +import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import edu.harvard.iq.dataverse.datasetutility.FileReplaceException; import edu.harvard.iq.dataverse.datasetutility.FileReplacePageHelper; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.DataAccessOption; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; +import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataFileCommand; @@ -32,6 +38,8 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; +import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -306,6 +314,10 @@ public Long getMaxFileUploadSizeInBytes() { return this.maxFileUploadSizeInBytes; } + public String getHumanMaxFileUploadSizeInBytes() { + return FileSizeChecker.bytesToHumanReadable(this.maxFileUploadSizeInBytes); + } + public boolean isUnlimitedUploadFileSize() { return this.maxFileUploadSizeInBytes == null; @@ -350,6 +362,10 @@ public boolean doesSessionUserHaveDataSetPermission(Permission permissionToCheck return hasPermission; } + public boolean directUploadEnabled() { + return Boolean.getBoolean("dataverse.files." + this.dataset.getDataverseContext().getEffectiveStorageDriverId() + ".upload-redirect"); + } + public void reset() { // ? } @@ -439,10 +455,7 @@ public String initCreateMode(String modeToken, DatasetVersion version, List(); selectedFiles = selectedFileMetadatasList; + this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getOwner().getEffectiveStorageDriverId()); + this.multipleUploadFilesLimit = systemConfig.getMultipleUploadFilesLimit(); + logger.fine("done"); saveEnabled = true; @@ -462,9 +478,6 @@ public String init() { newFiles = new ArrayList<>(); uploadedFiles = new ArrayList<>(); - this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSize(); - this.multipleUploadFilesLimit = systemConfig.getMultipleUploadFilesLimit(); - if (dataset.getId() != null){ // Set Working Version and Dataset by Datasaet Id and Version //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), null); @@ -479,7 +492,10 @@ public String init() { // that the dataset id is mandatory... But 404 will do for now. return permissionsWrapper.notFound(); } - + + this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getOwner().getEffectiveStorageDriverId()); + this.multipleUploadFilesLimit = systemConfig.getMultipleUploadFilesLimit(); + workingVersion = dataset.getEditVersion(); clone = workingVersion.cloneDatasetVersion(); if (workingVersion == null || !workingVersion.isDraft()) { @@ -954,42 +970,64 @@ public void deleteFiles() { } private void deleteTempFile(DataFile dataFile) { - // Before we remove the file from the list and forget about - // it: - // The physical uploaded file is still sitting in the temporary - // directory. If it were saved, it would be moved into its - // permanent location. But since the user chose not to save it, - // we have to delete the temp file too. - // - // Eventually, we will likely add a dedicated mechanism - // for managing temp files, similar to (or part of) the storage - // access framework, that would allow us to handle specialized - // configurations - highly sensitive/private data, that - // has to be kept encrypted even in temp files, and such. - // But for now, we just delete the file directly on the - // local filesystem: - - try { - List generatedTempFiles = ingestService.listGeneratedTempFiles( - Paths.get(FileUtil.getFilesTempDirectory()), dataFile.getStorageIdentifier()); - if (generatedTempFiles != null) { - for (Path generated : generatedTempFiles) { - logger.fine("(Deleting generated thumbnail file " + generated.toString() + ")"); - try { - Files.delete(generated); - } catch (IOException ioex) { - logger.warning("Failed to delete generated file " + generated.toString()); - } - } - } - Files.delete(Paths.get(FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier())); - } catch (IOException ioEx) { - // safe to ignore - it's just a temp file. - logger.warning("Failed to delete temporary file " + FileUtil.getFilesTempDirectory() + "/" - + dataFile.getStorageIdentifier()); - } - } - + // Before we remove the file from the list and forget about + // it: + // The physical uploaded file is still sitting in the temporary + // directory. If it were saved, it would be moved into its + // permanent location. But since the user chose not to save it, + // we have to delete the temp file too. + // + // Eventually, we will likely add a dedicated mechanism + // for managing temp files, similar to (or part of) the storage + // access framework, that would allow us to handle specialized + // configurations - highly sensitive/private data, that + // has to be kept encrypted even in temp files, and such. + // But for now, we just delete the file directly on the + // local filesystem: + + try { + List generatedTempFiles = ingestService.listGeneratedTempFiles( + Paths.get(FileUtil.getFilesTempDirectory()), dataFile.getStorageIdentifier()); + if (generatedTempFiles != null) { + for (Path generated : generatedTempFiles) { + logger.fine("(Deleting generated thumbnail file " + generated.toString() + ")"); + try { + Files.delete(generated); + } catch (IOException ioex) { + logger.warning("Failed to delete generated file " + generated.toString()); + } + } + } + String si = dataFile.getStorageIdentifier(); + if (si.contains("://")) { + //Direct upload files will already have a store id in their storageidentifier + //but they need to be associated with a dataset for the overall storagelocation to be calculated + //so we temporarily set the owner + if(dataFile.getOwner()!=null) { + logger.warning("Datafile owner was not null as expected"); + } + dataFile.setOwner(dataset); + //Use one StorageIO to get the storageLocation and then create a direct storage storageIO class to perform the delete + // (since delete is forbidden except for direct storage) + String sl = DataAccess.getStorageIO(dataFile).getStorageLocation(); + DataAccess.getDirectStorageIO(sl).delete(); + dataFile.setOwner(null); + } else { + //Temp files sent to this method have no prefix, not even "tmp://" + Files.delete(Paths.get(FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier())); + } + } catch (IOException ioEx) { + // safe to ignore - it's just a temp file. + logger.warning(ioEx.getMessage()); + if(dataFile.getStorageIdentifier().contains("://")) { + logger.warning("Failed to delete temporary file " + dataFile.getStorageIdentifier()); + } else { + logger.warning("Failed to delete temporary file " + FileUtil.getFilesTempDirectory() + "/" + + dataFile.getStorageIdentifier()); + } + } + } + private void removeFileMetadataFromList(List fmds, FileMetadata fmToDelete) { Iterator fmit = fmds.iterator(); while (fmit.hasNext()) { @@ -1559,7 +1597,7 @@ public void handleDropBoxUpload(ActionEvent event) { // for example, multiple files can be extracted from an uncompressed // zip file. //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); - datafiles = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", systemConfig); + datafiles = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null,null, systemConfig); } catch (IOException ex) { this.logger.log(Level.SEVERE, "Error during ingest of DropBox file {0} from link {1}", new Object[]{fileName, fileLink}); @@ -1717,6 +1755,31 @@ public String getRsyncScriptFilename() { return rsyncScriptFilename; } + public void requestDirectUploadUrl() { + + //Need to assign an identifier at this point if direct upload is used. + if ( isEmpty(dataset.getIdentifier()) ) { + CommandContext ctxt = commandEngine.getContext(); + GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); + dataset.setIdentifier(ctxt.datasets().generateDatasetIdentifier(dataset, idServiceBean)); + } + + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if(s3io == null) { + FacesContext.getCurrentInstance().addMessage(uploadComponentId, new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.uploadWarning"), "Direct upload not supported for this dataset")); + } + String url = null; + String storageIdentifier = null; + try { + url = s3io.generateTemporaryS3UploadUrl(); + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + } catch (IOException io) { + logger.warning(io.getMessage()); + FacesContext.getCurrentInstance().addMessage(uploadComponentId, new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.uploadWarning"), "Issue in connecting to S3 store for direct upload")); + } + + PrimeFaces.current().executeScript("uploadFileDirectly('" + url + "','" + storageIdentifier + "')"); + } public void uploadFinished() { // This method is triggered from the page, by the paramMap = FacesContext.getCurrentInstance().getExternalContext().getRequestParameterMap(); + + this.uploadComponentId = paramMap.get("uploadComponentId"); + String fullStorageIdentifier = paramMap.get("fullStorageIdentifier"); + String fileName = paramMap.get("fileName"); + String contentType = paramMap.get("contentType"); + String checksumType = paramMap.get("checksumType"); + String checksumValue = paramMap.get("checksumValue"); + + int lastColon = fullStorageIdentifier.lastIndexOf(':'); + String storageLocation= fullStorageIdentifier.substring(0,lastColon) + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + fullStorageIdentifier.substring(lastColon+1); + if (!uploadInProgress) { + uploadInProgress = true; + } + logger.fine("handleExternalUpload"); + + StorageIO sio; + String localWarningMessage = null; + try { + sio = DataAccess.getDirectStorageIO(storageLocation); + + //Populate metadata + sio.open(DataAccessOption.READ_ACCESS); + //get file size + long fileSize = sio.getSize(); + + if(StringUtils.isEmpty(contentType)) { + contentType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; + } + + if(DataFile.ChecksumType.fromString(checksumType) != DataFile.ChecksumType.MD5 ) { + String warningMessage = "Non-MD5 checksums not yet supported in external uploads"; + localWarningMessage = warningMessage; + //ToDo - methods like handleReplaceFileUpload and classes like OptionalFileParams will need to track the algorithm in addition to the value to enable this + } + + /* ---------------------------- + Check file size + - Max size NOT specified in db: default is unlimited + - Max size specified in db: check too make sure file is within limits + // ---------------------------- */ + if ((!this.isUnlimitedUploadFileSize()) && (fileSize > this.getMaxFileUploadSizeInBytes())) { + String warningMessage = "Uploaded file \"" + fileName + "\" exceeded the limit of " + fileSize + " bytes and was not uploaded."; + sio.delete(); + localWarningMessage = warningMessage; + } else { + // ----------------------------------------------------------- + // Is this a FileReplaceOperation? If so, then diverge! + // ----------------------------------------------------------- + if (this.isFileReplaceOperation()){ + this.handleReplaceFileUpload(storageLocation, fileName, contentType, checksumValue); + this.setFileMetadataSelectedForTagsPopup(fileReplacePageHelper.getNewFileMetadatasBeforeSave().get(0)); + return; + } + // ----------------------------------------------------------- + List datafiles = new ArrayList<>(); + + // ----------------------------------------------------------- + // Send it through the ingest service + // ----------------------------------------------------------- + try { + + // Note: A single uploaded file may produce multiple datafiles - + // for example, multiple files can be extracted from an uncompressed + // zip file. + //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); + + + datafiles = FileUtil.createDataFiles(workingVersion, null, fileName, contentType, fullStorageIdentifier, checksumValue, systemConfig); + } catch (IOException ex) { + logger.log(Level.SEVERE, "Error during ingest of file {0}", new Object[]{fileName}); + } + + if (datafiles == null){ + logger.log(Level.SEVERE, "Failed to create DataFile for file {0}", new Object[]{fileName}); + }else{ + // ----------------------------------------------------------- + // Check if there are duplicate files or ingest warnings + // ----------------------------------------------------------- + uploadWarningMessage = processUploadedFileList(datafiles); + } + if(!uploadInProgress) { + logger.warning("Upload in progress cancelled"); + for (DataFile newFile : datafiles) { + deleteTempFile(newFile); + } + } + } + } catch (IOException e) { + logger.log(Level.SEVERE, "Failed to create DataFile for file {0}: {1}", new Object[]{fileName, e.getMessage()}); + } + if (localWarningMessage != null) { + if (uploadWarningMessage == null) { + uploadWarningMessage = localWarningMessage; + } else { + uploadWarningMessage = localWarningMessage.concat("; " + uploadWarningMessage); + } + } + } + /** * After uploading via the site or Dropbox, * check the list of DataFile objects @@ -1967,7 +2159,6 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { private boolean uploadInProgress = false; private String processUploadedFileList(List dFileList) { - if (dFileList == null) { return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/Shib.java b/src/main/java/edu/harvard/iq/dataverse/Shib.java index 8af9a22d783..d5ff8d88de5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Shib.java +++ b/src/main/java/edu/harvard/iq/dataverse/Shib.java @@ -11,9 +11,12 @@ import edu.harvard.iq.dataverse.authorization.providers.shib.ShibUserNameFields; import edu.harvard.iq.dataverse.authorization.providers.shib.ShibUtil; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; +import org.apache.commons.lang.StringUtils; + import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; @@ -50,6 +53,8 @@ public class Shib implements java.io.Serializable { GroupServiceBean groupService; @EJB UserNotificationServiceBean userNotificationService; + @EJB + SettingsServiceBean settingsService; HttpServletRequest request; @@ -205,7 +210,11 @@ public void init() { internalUserIdentifer = ShibUtil.generateFriendlyLookingUserIdentifer(usernameAssertion, emailAddress); logger.fine("friendly looking identifer (backend will enforce uniqueness):" + internalUserIdentifer); - String affiliation = shibService.getAffiliation(shibIdp, shibService.getDevShibAccountType()); + String shibAffiliationAttribute = settingsService.getValueForKey(SettingsServiceBean.Key.ShibAffiliationAttribute); + String affiliation = (StringUtils.isNotBlank(shibAffiliationAttribute)) + ? getValueFromAssertion(shibAffiliationAttribute) + : shibService.getAffiliation(shibIdp, shibService.getDevShibAccountType()); + if (affiliation != null) { affiliationToDisplayAtConfirmation = affiliation; friendlyNameForInstitution = affiliation; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index c5f358e8f71..1474104d379 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1665,7 +1665,7 @@ public Response getStorageDriver(@PathParam("alias") String alias) throws Wrappe } catch (WrappedResponse wr) { return wr.getResponse(); } - //Note that this returns what's set directly on this dataverse. If null, the user would have to recurse the chain of parents to find the effective storageDriver + //Note that this returns what's set directly on this dataverse. If null/DataAccess.UNDEFINED_STORAGE_DRIVER_IDENTIFIER, the user would have to recurse the chain of parents to find the effective storageDriver return ok(dataverse.getStorageDriverId()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 38d3979577d..0b2e25a7f02 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1477,6 +1477,43 @@ public Response returnToAuthor(@PathParam("id") String idSupplied, String jsonBo } } +@GET +@Path("{id}/uploadsid") +public Response getUploadUrl(@PathParam("id") String idSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + + boolean canUpdateDataset = false; + try { + canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(findUserOrDie()), dataset).canIssue(UpdateDatasetVersionCommand.class); + } catch (WrappedResponse ex) { + logger.info("Exception thrown while trying to figure out permissions while getting upload URL for dataset id " + dataset.getId() + ": " + ex.getLocalizedMessage()); + } + if (!canUpdateDataset) { + return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); + } + S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); + if(s3io == null) { + return error(Response.Status.NOT_FOUND,"Direct upload not supported for files in this dataset: " + dataset.getId()); + } + String url = null; + String storageIdentifier = null; + try { + url = s3io.generateTemporaryS3UploadUrl(); + storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); + } catch (IOException io) { + logger.warning(io.getMessage()); + throw new WrappedResponse(io, error( Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); + } + + JsonObjectBuilder response = Json.createObjectBuilder() + .add("url", url) + .add("storageIdentifier", storageIdentifier ); + return ok(response); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } +} /** * Add a File to an existing Dataset * @@ -1539,17 +1576,6 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } } - - // ------------------------------------- - // (3) Get the file name and content type - // ------------------------------------- - if(null == contentDispositionHeader) { - return error(BAD_REQUEST, "You must upload a file."); - } - String newFilename = contentDispositionHeader.getFileName(); - String newFileContentType = formDataBodyPart.getMediaType().toString(); - - // (2a) Load up optional params via JSON //--------------------------------------- OptionalFileParams optionalFileParams = null; @@ -1560,6 +1586,31 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, } catch (DataFileTagException ex) { return error( Response.Status.BAD_REQUEST, ex.getMessage()); } + + // ------------------------------------- + // (3) Get the file name and content type + // ------------------------------------- + String newFilename = null; + String newFileContentType = null; + String newStorageIdentifier = null; + if (null == contentDispositionHeader) { + if (optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + // ToDo - check that storageIdentifier is valid + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + } else { + return error(BAD_REQUEST, + "You must upload a file or provide a storageidentifier, filename, and mimetype."); + } + } else { + newFilename = contentDispositionHeader.getFileName(); + newFileContentType = formDataBodyPart.getMediaType().toString(); + } //------------------- @@ -1583,6 +1634,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, addFileHelper.runAddFileByDataset(dataset, newFilename, newFileContentType, + newStorageIdentifier, fileInputStream, optionalFileParams); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index f5cf35276d0..6dfe605774f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -300,7 +300,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au List dataFiles = new ArrayList<>(); try { try { - dataFiles = FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, systemConfig); + dataFiles = FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, null, systemConfig); } catch (EJBException ex) { Throwable cause = ex.getCause(); if (cause != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java index 4eb6e77fe21..ce5f9415fcc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java @@ -124,8 +124,11 @@ public String getTempDirectory() { public int getMaxUploadSize() { int unlimited = -1; - - Long maxUploadInBytes = systemConfig.getMaxFileUploadSize(); + /* It doesn't look like we can determine which store will be used here, so we'll go with the default + * (It looks like the collection or study involved is available where this method is called, but the SwordConfiguration.getMaxUploadSize() + * doesn't allow a parameter) + */ + Long maxUploadInBytes = systemConfig.getMaxFileUploadSizeForStore("default"); if (maxUploadInBytes == null){ // (a) No setting, return unlimited diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index 178a23b6f2f..ce2f646322c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -10,6 +10,8 @@ import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.api.imports.ImportUtil.ImportType; import static edu.harvard.iq.dataverse.export.ddi.DdiExportUtil.NOTE_TYPE_CONTENTTYPE; +import static edu.harvard.iq.dataverse.export.ddi.DdiExportUtil.NOTE_TYPE_TERMS_OF_ACCESS; + import edu.harvard.iq.dataverse.util.StringUtil; import java.io.File; import java.io.FileInputStream; @@ -27,6 +29,8 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import javax.xml.stream.XMLInputFactory; + +import edu.harvard.iq.dataverse.util.json.ControlledVocabularyException; import org.apache.commons.lang.StringUtils; /** @@ -265,18 +269,18 @@ private void processCodeBook(ImportType importType, XMLStreamReader xmlr, Datase } } } - + private void processDocDscr(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws XMLStreamException { for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { - + if (xmlr.getLocalName().equals("IDNo") && StringUtil.isEmpty(datasetDTO.getIdentifier()) ) { // this will set a StudyId if it has not yet been set; it will get overridden by a metadata // id in the StudyDscr section, if one exists if ( AGENCY_HANDLE.equals( xmlr.getAttributeValue(null, "agency") ) ) { parseStudyIdHandle( parseText(xmlr), datasetDTO ); - } - // EMK TODO: we need to save this somewhere when we add harvesting infrastructure + } + // EMK TODO: we need to save this somewhere when we add harvesting infrastructure } /*else if ( xmlr.getLocalName().equals("holdings") && StringUtil.isEmpty(datasetDTO..getHarvestHoldings()) ) { metadata.setHarvestHoldings( xmlr.getAttributeValue(null, "URI") ); }*/ @@ -284,7 +288,7 @@ private void processDocDscr(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws if (xmlr.getLocalName().equals("docDscr")) return; } } - } + } private String parseText(XMLStreamReader xmlr) throws XMLStreamException { return parseText(xmlr,true); } @@ -344,7 +348,8 @@ private void processStdyDscr(ImportType importType, XMLStreamReader xmlr, Datase else if (xmlr.getLocalName().equals("stdyInfo")) processStdyInfo(xmlr, datasetDTO.getDatasetVersion()); else if (xmlr.getLocalName().equals("method")) processMethod(xmlr, datasetDTO.getDatasetVersion()); - else if (xmlr.getLocalName().equals("dataAccs")) processDataAccs(xmlr, datasetDTO.getDatasetVersion()); + else if (xmlr.getLocalName().equals("dataAccs")) processDataAccs(xmlr, datasetDTO.getDatasetVersion()); + else if (xmlr.getLocalName().equals("notes")) processStdyNotes(xmlr, datasetDTO.getDatasetVersion()); else if (xmlr.getLocalName().equals("othrStdyMat")) processOthrStdyMat(xmlr, datasetDTO.getDatasetVersion()); else if (xmlr.getLocalName().equals("notes")) processNotes(xmlr, datasetDTO.getDatasetVersion()); @@ -390,41 +395,12 @@ else if (xmlr.getLocalName().equals("relStdy")) { relStudy.add(parseText(xmlr, "relStdy")); getCitation(dvDTO).addField(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.relatedDatasets, relStudy)); } else if (xmlr.getLocalName().equals("relPubl")) { - HashSet set = new HashSet<>(); - - // call new parse text logic - Object rpFromDDI = parseTextNew(xmlr, "relPubl"); - if (rpFromDDI instanceof Map) { - Map rpMap = (Map) rpFromDDI; - addToSet(set, DatasetFieldConstant.publicationCitation, rpMap.get("text")); - addToSet(set, DatasetFieldConstant.publicationIDNumber, rpMap.get("idNumber")); - addToSet(set, DatasetFieldConstant.publicationURL, rpMap.get("url")); - if (rpMap.get("idType")!=null) { - set.add(FieldDTO.createVocabFieldDTO(DatasetFieldConstant.publicationIDType, rpMap.get("idType").toLowerCase())); - } - // rp.setText((String) rpMap.get("text")); - // rp.setIdType((String) rpMap.get("idType")); - // rp.setIdNumber((String) rpMap.get("idNumber")); - // rp.setUrl((String) rpMap.get("url")); - // TODO: ask about where/whether we want to save this - // if (!replicationForFound && rpMap.get("replicationData") != null) { - // rp.setReplicationData(true); - /// replicationForFound = true; - // } - } else { - addToSet(set, DatasetFieldConstant.publicationCitation, (String) rpFromDDI); - // rp.setText( (String) rpFromDDI ); - } - publications.add(set); - - - } else if (xmlr.getLocalName().equals("otherRefs")) { - + processRelPubl(xmlr, dvDTO, publications); + } else if (xmlr.getLocalName().equals("othRefs")) { List otherRefs = new ArrayList<>(); - otherRefs.add(parseText(xmlr, "otherRefs")); + otherRefs.add(parseText(xmlr, "othRefs")); getCitation(dvDTO).addField(FieldDTO.createMultiplePrimitiveFieldDTO(DatasetFieldConstant.otherReferences, otherRefs)); - - } + } } else if (event == XMLStreamConstants.END_ELEMENT) { if (publications.size()>0) { getCitation(dvDTO).addField(FieldDTO.createMultipleCompoundFieldDTO(DatasetFieldConstant.publication, publications)); @@ -435,6 +411,49 @@ else if (xmlr.getLocalName().equals("relStdy")) { } } } + private void processRelPubl(XMLStreamReader xmlr, DatasetVersionDTO dvDTO, List> publications) throws XMLStreamException { + HashSet set = new HashSet<>(); + for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { + if (event == XMLStreamConstants.START_ELEMENT) { + if (xmlr.getLocalName().equals("citation")) { + for (int event2 = xmlr.next(); event2 != XMLStreamConstants.END_DOCUMENT; event2 = xmlr.next()) { + if (event2 == XMLStreamConstants.START_ELEMENT) { + if (xmlr.getLocalName().equals("titlStmt")) { + int event3 = xmlr.next(); + if (event3 == XMLStreamConstants.START_ELEMENT) { + if (xmlr.getLocalName().equals("IDNo")) { + set.add(FieldDTO.createVocabFieldDTO(DatasetFieldConstant.publicationIDType, xmlr.getAttributeValue(null, "agency"))); + addToSet(set, DatasetFieldConstant.publicationIDNumber, parseText(xmlr)); + } + } + } else if (xmlr.getLocalName().equals("biblCit")) { + if (event2 == XMLStreamConstants.START_ELEMENT) { + if (xmlr.getLocalName().equals("biblCit")) { + addToSet(set, DatasetFieldConstant.publicationCitation, parseText(xmlr)); + } + } + } + } else if (event2 == XMLStreamConstants.END_ELEMENT) { + if (xmlr.getLocalName().equals("citation")) { + break; + } + } + + } + } else if (xmlr.getLocalName().equals("ExtLink")) { + addToSet(set, DatasetFieldConstant.publicationURL, xmlr.getAttributeValue(null, "URI")); + } + } else if (event == XMLStreamConstants.END_ELEMENT) { + if (xmlr.getLocalName().equals("relPubl")) { + if (set.size() > 0) { + publications.add(set); + } + return; + } + } + } + } + private void processCitation(ImportType importType, XMLStreamReader xmlr, DatasetDTO datasetDTO) throws XMLStreamException, ImportException { DatasetVersionDTO dvDTO = datasetDTO.getDatasetVersion(); MetadataBlockDTO citation=datasetDTO.getDatasetVersion().getMetadataBlocks().get("citation"); @@ -620,6 +639,52 @@ private void processNotes (XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throws this.addNote(formattedNotes, dvDTO); } } + private void processStdyNotes(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throws XMLStreamException { + FieldDTO notesText = null; + // Add notes, if they exist + String attrVal = parseText(xmlr, "notes"); + if ((attrVal != null) && (!attrVal.isEmpty())){ + notesText = FieldDTO.createPrimitiveFieldDTO("datasetLevelErrorNotes", attrVal); + getSocialScience(dvDTO).addField(notesText); + } + } + + + private void processNotesSocialScience(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throws XMLStreamException { + //String formattedNotes = this.formatNotesfromXML(xmlr); + if (xmlr==null){ + throw new NullPointerException("XMLStreamReader xmlr cannot be null"); + } + FieldDTO notesSubject = null; + String attrVal; + + // Check for "subject" + attrVal = xmlr.getAttributeValue(null, "subject"); + if (attrVal != null){ + notesSubject = FieldDTO.createPrimitiveFieldDTO("socialScienceNotesSubject", attrVal); + } + + FieldDTO notesType = null; + // Check for "type" + attrVal = xmlr.getAttributeValue(null, "type"); + if (attrVal != null){ + notesType = FieldDTO.createPrimitiveFieldDTO("socialScienceNotesType", attrVal); + } + + FieldDTO notesText = null; + // Add notes, if they exist + attrVal = parseText(xmlr, "notes"); + if ((attrVal != null) && (!attrVal.isEmpty())){ + notesText = FieldDTO.createPrimitiveFieldDTO("socialScienceNotesText", attrVal); + } + + if (notesSubject != null || notesType != null || notesText != null ){ + + //this.addNoteSocialScience(formattedNotes, dvDTO); + + getSocialScience(dvDTO).addField(FieldDTO.createCompoundFieldDTO("socialScienceNotes", notesSubject, notesType, notesText )); + } + } private void addNote(String noteText, DatasetVersionDTO dvDTO ) { MetadataBlockDTO citation = getCitation(dvDTO); @@ -640,12 +705,17 @@ private void processSumDscr(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throw List kindOfData = new ArrayList<>(); List> geoBoundBox = new ArrayList<>(); List> geoCoverages = new ArrayList<>(); + List timePeriod = new ArrayList<>(); + List dateOfCollection = new ArrayList<>(); FieldDTO timePeriodStart = null; FieldDTO timePeriodEnd = null; FieldDTO dateOfCollectionStart = null; FieldDTO dateOfCollectionEnd = null; + HashSet geoCoverageSet = null; + String otherGeographicCoverage = null; for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { + if (event == XMLStreamConstants.START_ELEMENT) { if (xmlr.getLocalName().equals("timePrd")) { @@ -654,6 +724,8 @@ private void processSumDscr(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throw timePeriodStart = FieldDTO.createPrimitiveFieldDTO("timePeriodCoveredStart", parseDate(xmlr, "timePrd")); } else if (EVENT_END.equals(eventAttr)) { timePeriodEnd = FieldDTO.createPrimitiveFieldDTO("timePeriodCoveredEnd", parseDate(xmlr, "timePrd")); + timePeriod.add(FieldDTO.createMultipleCompoundFieldDTO("timePeriodCovered", timePeriodStart, timePeriodEnd)); + } } else if (xmlr.getLocalName().equals("collDate")) { String eventAttr = xmlr.getAttributeValue(null, "event"); @@ -661,16 +733,32 @@ private void processSumDscr(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throw dateOfCollectionStart = FieldDTO.createPrimitiveFieldDTO("dateOfCollectionStart", parseDate(xmlr, "collDate")); } else if (EVENT_END.equals(eventAttr)) { dateOfCollectionEnd = FieldDTO.createPrimitiveFieldDTO("dateOfCollectionEnd", parseDate(xmlr, "collDate")); + dateOfCollection.add(FieldDTO.createMultipleCompoundFieldDTO("dateOfCollection", dateOfCollectionStart, dateOfCollectionEnd )); } } else if (xmlr.getLocalName().equals("nation")) { - HashSet set = new HashSet<>(); - set.add(FieldDTO.createVocabFieldDTO("country", parseText(xmlr))); - geoCoverages.add(set); + if (otherGeographicCoverage != null && !otherGeographicCoverage.equals("")) { + geoCoverageSet.add(FieldDTO.createPrimitiveFieldDTO("otherGeographicCoverage", otherGeographicCoverage)); + otherGeographicCoverage = null; + } + if (geoCoverageSet != null && geoCoverageSet.size() > 0) { + geoCoverages.add(geoCoverageSet); + } + geoCoverageSet = new HashSet<>(); + //HashSet set = new HashSet<>(); + //set.add(FieldDTO.createVocabFieldDTO("country", parseText(xmlr))); + geoCoverageSet.add(FieldDTO.createVocabFieldDTO("country", parseText(xmlr))); + } else if (xmlr.getLocalName().equals("geogCover")) { - HashSet set = new HashSet<>(); - set.add(FieldDTO.createPrimitiveFieldDTO("otherGeographicCoverage", parseText(xmlr))); - geoCoverages.add(set); + if (geoCoverageSet == null) { + geoCoverageSet = new HashSet<>(); + } + if (otherGeographicCoverage != null) { + otherGeographicCoverage = otherGeographicCoverage + "; " + parseText(xmlr); + } else { + otherGeographicCoverage = parseText(xmlr); + } + } else if (xmlr.getLocalName().equals("geogUnit")) { geoUnit.add(parseText(xmlr)); } else if (xmlr.getLocalName().equals("geoBndBox")) { @@ -684,11 +772,11 @@ private void processSumDscr(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throw } } else if (event == XMLStreamConstants.END_ELEMENT) { if (xmlr.getLocalName().equals("sumDscr")) { - if (timePeriodStart!=null || timePeriodEnd!=null) { - getCitation(dvDTO).addField(FieldDTO.createMultipleCompoundFieldDTO("timePeriodCovered", timePeriodStart, timePeriodEnd)); + for (FieldDTO time : timePeriod) { + getCitation(dvDTO).addField( time); } - if (dateOfCollectionStart!=null || dateOfCollectionEnd!=null) { - getCitation(dvDTO).addField(FieldDTO.createMultipleCompoundFieldDTO("dateOfCollection", dateOfCollectionStart, dateOfCollectionEnd)); + for (FieldDTO date : dateOfCollection) { + getCitation(dvDTO).addField(date); } if (geoUnit.size() > 0) { @@ -703,8 +791,14 @@ private void processSumDscr(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throw if (kindOfData.size() > 0) { getCitation(dvDTO).addField(FieldDTO.createMultiplePrimitiveFieldDTO("kindOfData", kindOfData)); } - if (geoCoverages.size()>0) { - getGeospatial(dvDTO).addField(FieldDTO.createMultipleCompoundFieldDTO("geographicCoverage", geoCoverages)); + if (otherGeographicCoverage != null && !otherGeographicCoverage.equals("") ) { + geoCoverageSet.add(FieldDTO.createPrimitiveFieldDTO("otherGeographicCoverage", otherGeographicCoverage)); + } + if (geoCoverageSet != null && geoCoverageSet.size() > 0) { + geoCoverages.add(geoCoverageSet); + } + if (geoCoverages.size() > 0) { + getGeospatial(dvDTO).addField(FieldDTO.createMultipleCompoundFieldDTO(DatasetFieldConstant.geographicCoverage, geoCoverages)); } if (geoBoundBox.size()>0) { getGeospatial(dvDTO).addField(FieldDTO.createMultipleCompoundFieldDTO("geographicBoundingBox", geoBoundBox)); @@ -715,8 +809,6 @@ private void processSumDscr(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) throw } } - - private HashSet processGeoBndBox(XMLStreamReader xmlr) throws XMLStreamException { HashSet set = new HashSet<>(); @@ -748,9 +840,7 @@ private void processMethod(XMLStreamReader xmlr, DatasetVersionDTO dvDTO ) throw if (NOTE_TYPE_EXTENDED_METADATA.equalsIgnoreCase(noteType) ) { processCustomField(xmlr, dvDTO); } else { - processNotes(xmlr, dvDTO); -// addNote("Subject: Study Level Error Note, Notes: "+ parseText( xmlr,"notes" ) +";", dvDTO); - + processNotesSocialScience(xmlr, dvDTO); } } else if (xmlr.getLocalName().equals("anlyInfo")) { processAnlyInfo(xmlr, getSocialScience(dvDTO)); @@ -959,8 +1049,8 @@ private void processDataColl(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) thro socialScience.getFields().add(FieldDTO.createPrimitiveFieldDTO("dataCollectionSituation", parseText( xmlr, "collSitu" ))); } else if (xmlr.getLocalName().equals("actMin")) { socialScience.getFields().add(FieldDTO.createPrimitiveFieldDTO("actionsToMinimizeLoss", parseText( xmlr, "actMin" ))); - } else if (xmlr.getLocalName().equals("ConOps")) { - socialScience.getFields().add(FieldDTO.createPrimitiveFieldDTO("controlOperations", parseText( xmlr, "ConOps" ))); + } else if (xmlr.getLocalName().equals("conOps")) { + socialScience.getFields().add(FieldDTO.createPrimitiveFieldDTO("controlOperations", parseText( xmlr, "conOps" ))); } else if (xmlr.getLocalName().equals("weight")) { String thisValue = parseText( xmlr, "weight" ); if (!StringUtil.isEmpty(thisValue)) { @@ -972,6 +1062,8 @@ private void processDataColl(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) thro //socialScience.getFields().add(FieldDTO.createPrimitiveFieldDTO("weighting", parseText( xmlr, "weight" ))); } else if (xmlr.getLocalName().equals("cleanOps")) { socialScience.getFields().add(FieldDTO.createPrimitiveFieldDTO("cleaningOperations", parseText( xmlr, "cleanOps" ))); + } else if (xmlr.getLocalName().equals("collectorTraining")) { + socialScience.getFields().add(FieldDTO.createPrimitiveFieldDTO("collectorTraining", parseText( xmlr, "collectorTraining" ))); } } else if (event == XMLStreamConstants.END_ELEMENT) { if (xmlr.getLocalName().equals("dataColl")) { @@ -987,6 +1079,7 @@ private void processDataColl(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) thro if (!StringUtil.isEmpty(dataCollector)) { socialScience.getFields().add(FieldDTO.createPrimitiveFieldDTO("dataCollector", dataCollector)); } + return; } } @@ -1082,7 +1175,16 @@ private void processDataAccs(XMLStreamReader xmlr, DatasetVersionDTO dvDTO) thro String noteType = xmlr.getAttributeValue(null, "type"); if (NOTE_TYPE_TERMS_OF_USE.equalsIgnoreCase(noteType) ) { if ( LEVEL_DV.equalsIgnoreCase(xmlr.getAttributeValue(null, "level"))) { - dvDTO.setTermsOfUse(parseText(xmlr, "notes")); + String termOfUse = parseText(xmlr, "notes"); + if (termOfUse != null && termOfUse.trim().equals("CC0 Waiver") ) { + dvDTO.setLicense("CC0"); + } else if (termOfUse != null && !termOfUse.trim().equals("")){ + dvDTO.setTermsOfUse(termOfUse); + } + } + } else if (NOTE_TYPE_TERMS_OF_ACCESS.equalsIgnoreCase(noteType) ) { + if (LEVEL_DV.equalsIgnoreCase(xmlr.getAttributeValue(null, "level"))) { + dvDTO.setTermsOfAccess(parseText(xmlr, "notes")); } } else { processNotes(xmlr, dvDTO); @@ -1189,15 +1291,16 @@ private void processDistStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { if (xmlr.getLocalName().equals("distrbtr")) { - HashSet set = new HashSet<>(); - addToSet(set, "distributorAbbreviation", xmlr.getAttributeValue(null, "abbr")); - addToSet(set, "distributorAffiliation", xmlr.getAttributeValue(null, "affiliation")); - - Map distDetails = parseCompoundText(xmlr, "distrbtr"); - addToSet(set, "distributorName", distDetails.get("name")); - addToSet(set, "distributorURL", distDetails.get("url")); - addToSet(set, "distributorLogoURL", distDetails.get("logo")); - distributors.add(set); + String source = xmlr.getAttributeValue(null, "source"); + if (source == null || !source.equals("archive")) { + HashSet set = new HashSet<>(); + addToSet(set, "distributorAbbreviation", xmlr.getAttributeValue(null, "abbr")); + addToSet(set, "distributorAffiliation", xmlr.getAttributeValue(null, "affiliation")); + addToSet(set, "distributorURL", xmlr.getAttributeValue(null, "URI")); + addToSet(set, "distributorLogoURL", xmlr.getAttributeValue(null, "role")); + addToSet(set, "distributorName", xmlr.getElementText()); + distributors.add(set); + } } else if (xmlr.getLocalName().equals("contact")) { HashSet set = new HashSet<>(); @@ -1240,11 +1343,9 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th HashSet set = new HashSet<>(); addToSet(set,"producerAbbreviation", xmlr.getAttributeValue(null, "abbr")); addToSet(set,"producerAffiliation", xmlr.getAttributeValue(null, "affiliation")); - - Map prodDetails = parseCompoundText(xmlr, "producer"); - addToSet(set,"producerName", prodDetails.get("name")); - addToSet(set,"producerURL", prodDetails.get("url" )); - addToSet(set,"producerLogoURL", prodDetails.get("logo")); + addToSet(set,"producerLogoURL", xmlr.getAttributeValue(null, "role")); + addToSet(set,"producerURL", xmlr.getAttributeValue(null, "URI")); + addToSet(set,"producerName", xmlr.getElementText()); if (!set.isEmpty()) producers.add(set); } else if (xmlr.getLocalName().equals("prodDate")) { @@ -1254,7 +1355,7 @@ private void processProdStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) th } else if (xmlr.getLocalName().equals("software")) { HashSet set = new HashSet<>(); addToSet(set,"softwareVersion", xmlr.getAttributeValue(null, "version")); - addToSet(set,"softwareName", xmlr.getAttributeValue(null, "version")); + addToSet(set,"softwareName", parseText(xmlr)); if (!set.isEmpty()) { software.add(set); } @@ -1341,6 +1442,7 @@ private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws private void processRspStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) throws XMLStreamException { List> authors = new ArrayList<>(); + List> contributors = new ArrayList<>(); for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { if (xmlr.getLocalName().equals("AuthEnty")) { @@ -1351,12 +1453,24 @@ private void processRspStmt(XMLStreamReader xmlr, MetadataBlockDTO citation) thr authors.add(set); } } + if (xmlr.getLocalName().equals("othId")) { + HashSet set = new HashSet<>(); + set.add(FieldDTO.createVocabFieldDTO("contributorType", xmlr.getAttributeValue(null, "role") )); + addToSet(set,"contributorName", parseText(xmlr)); + if (!set.isEmpty()) { + contributors.add(set); + } + } } else if (event == XMLStreamConstants.END_ELEMENT) { if (xmlr.getLocalName().equals("rspStmt")) { if (authors.size()>0) { FieldDTO author = FieldDTO.createMultipleCompoundFieldDTO("author", authors); citation.getFields().add(author); } + if (contributors.size() > 0) { + FieldDTO contributor = FieldDTO.createMultipleCompoundFieldDTO("contributor", contributors); + citation.getFields().add(contributor); + } return; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index c2b7cbffd62..0cf9883b240 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -21,12 +21,11 @@ package edu.harvard.iq.dataverse.dataaccess; import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.util.StringUtil; - import java.io.IOException; import java.util.HashMap; import java.util.Properties; import java.util.logging.Logger; + import org.apache.commons.lang.StringUtils; /** * @@ -42,9 +41,10 @@ public DataAccess() { }; - //Default is only for tests + //Default to "file" is for tests only public static final String DEFAULT_STORAGE_DRIVER_IDENTIFIER = System.getProperty("dataverse.files.storage-driver-id", "file"); - + public static final String UNDEFINED_STORAGE_DRIVER_IDENTIFIER = "undefined"; //Used in dataverse.xhtml as a non-null selection option value (indicating a null driver/inheriting the default) + // The getStorageIO() methods initialize StorageIO objects for // datafiles that are already saved using one of the supported Dataverse // DataAccess IO drivers. @@ -122,6 +122,11 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) { } public static String getStorarageIdFromLocation(String location) { + if(location.contains("://")) { + //It's a full location with a driverId, so strip and reapply the driver id + //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it + return location.substring(0,location.indexOf("://") +3) + location.substring(location.lastIndexOf('/')+1); + } return location.substring(location.lastIndexOf('/')+1); } @@ -165,7 +170,7 @@ public static StorageIO createNewStorageIO(T dvObject, S dvObject.setStorageIdentifier(storageTag); - if (StringUtils.isEmpty(storageDriverId)) { + if (StringUtils.isBlank(storageDriverId)) { storageDriverId = DEFAULT_STORAGE_DRIVER_IDENTIFIER; } String storageType = getDriverType(storageDriverId); @@ -196,7 +201,7 @@ public static String getStorageDriverId(String driverLabel) { if (drivers==null) { populateDrivers(); } - if(StringUtil.nonEmpty(driverLabel) && drivers.containsKey(driverLabel)) { + if(!StringUtils.isBlank(driverLabel) && drivers.containsKey(driverLabel)) { return drivers.get(driverLabel); } return DEFAULT_STORAGE_DRIVER_IDENTIFIER; @@ -219,7 +224,6 @@ private static void populateDrivers() { logger.info("Found Storage Driver: " + driverId + " for " + p.get(property).toString()); drivers.put(p.get(property).toString(), driverId); } - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d7a405d63c7..bd0549622f0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -147,7 +147,6 @@ public void open (DataAccessOption... options) throws IOException { } else if (dvObject instanceof Dataset) { //This case is for uploading a dataset related auxiliary file //e.g. image thumbnails/metadata exports - //TODO: do we really need to do anything here? should we return the dataset directory? dataset = this.getDataset(); if (isReadAccess) { //TODO: Not necessary for dataset as there is no files associated with this @@ -229,10 +228,19 @@ public void saveInputStream(InputStream inputStream) throws IOException { @Override public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { - + Path auxPath = getAuxObjectAsPath(auxItemTag); if (isWriteAccessRequested(options)) { + if (dvObject instanceof Dataset && !this.canWrite()) { + // If this is a dataset-level auxilary file (a cached metadata export, + // dataset logo, etc.) there's a chance that no "real" files + // have been saved for this dataset yet, and thus the filesystem + // directory does not exist yet. Let's force a proper .open() on + // this StorageIO, that will ensure it is created: + open(DataAccessOption.WRITE_ACCESS); + } + FileOutputStream auxOut = new FileOutputStream(auxPath.toFile()); if (auxOut == null) { @@ -287,7 +295,7 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { } String datasetDirectory = getDatasetDirectory(); - + if (dvObject.getStorageIdentifier() == null || "".equals(dvObject.getStorageIdentifier())) { throw new IOException("Data Access: No local storage identifier defined for this datafile."); } @@ -325,6 +333,10 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { // this method copies a local filesystem Path into this DataAccess Auxiliary location: @Override public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + if (dvObject instanceof Dataset && !this.canWrite()) { + // see the comment in openAuxChannel() + open(DataAccessOption.WRITE_ACCESS); + } // quick Files.copy method: try { Path auxPath = getAuxObjectAsPath(auxItemTag); @@ -340,7 +352,10 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon @Override public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { - + if (dvObject instanceof Dataset && !this.canWrite()) { + // see the comment in openAuxChannel() + open(DataAccessOption.WRITE_ACCESS); + } // Since this is a local fileystem file, we can use the // quick NIO Files.copy method: @@ -633,4 +648,4 @@ private String stripDriverId(String storageIdentifier) { } return storageIdentifier; } -} \ No newline at end of file +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 8194ab80c58..b12013b8f8f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -7,10 +7,12 @@ import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.Headers; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectRequest; +import com.amazonaws.services.s3.model.DeleteObjectTaggingRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest; @@ -125,8 +127,7 @@ public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client, private boolean s3chunkedEncoding = true; private String s3profile = "default"; private String bucketName = null; - - private String key; + private String key = null; @Override public void open(DataAccessOption... options) throws IOException { @@ -164,7 +165,36 @@ public void open(DataAccessOption... options) throws IOException { if (storageIdentifier == null || "".equals(storageIdentifier)) { throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); } + + + //Fix new DataFiles: DataFiles that have not yet been saved may use this method when they don't have their storageidentifier in the final ://: form + // So we fix it up here. ToDo: refactor so that storageidentifier is generated by the appropriate StorageIO class and is final from the start. + String newStorageIdentifier = null; + if (storageIdentifier.startsWith(this.driverId + "://")) { + if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { + //Driver id but no bucket + if(bucketName!=null) { + newStorageIdentifier=this.driverId + "://" + bucketName + ":" + storageIdentifier.substring((this.driverId + "://").length()); + } else { + throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") is not associated with a bucket."); + } + } // else we're OK (assumes bucket name in storageidentifier matches the driver's bucketname) + } else { + if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { + //No driver id or bucket + newStorageIdentifier= this.driverId + "://" + bucketName + ":" + storageIdentifier; + } else { + //Just the bucketname + newStorageIdentifier= this.driverId + "://" + storageIdentifier; + } + } + if(newStorageIdentifier != null) { + //Fixup needed: + storageIdentifier = newStorageIdentifier; + dvObject.setStorageIdentifier(newStorageIdentifier); + } + if (isReadAccess) { key = getMainFileKey(); ObjectMetadata objectMetadata = null; @@ -188,14 +218,7 @@ public void open(DataAccessOption... options) throws IOException { } else if (isWriteAccess) { key = dataFile.getOwner().getAuthorityForFileStorage() + "/" + this.getDataFile().getOwner().getIdentifierForFileStorage(); - - if (storageIdentifier.startsWith(this.driverId + "://")) { - key += "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); - } else { - key += "/" + storageIdentifier; - dvObject.setStorageIdentifier(this.driverId + "://" + bucketName + ":" + storageIdentifier); - } - + key += "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); } this.setMimeType(dataFile.getContentType()); @@ -212,7 +235,36 @@ public void open(DataAccessOption... options) throws IOException { } else if (dvObject instanceof Dataverse) { throw new IOException("Data Access: Storage driver does not support dvObject type Dataverse yet"); } else { + // Direct access, e.g. for external upload - no associated DVobject yet, but we want to be able to get the size + // With small files, it looks like we may call before S3 says it exists, so try some retries before failing + if(key!=null) { + ObjectMetadata objectMetadata = null; + int retries = 20; + while(retries > 0) { + try { + objectMetadata = s3.getObjectMetadata(bucketName, key); + if(retries != 20) { + logger.warning("Success for key: " + key + " after " + ((20-retries)*3) + " seconds"); + } + retries = 0; + } catch (SdkClientException sce) { + if(retries > 1) { + retries--; + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + logger.warning("Retrying after: " + sce.getMessage()); + } else { + throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); + } + } + } + this.setSize(objectMetadata.getContentLength()); + }else { throw new IOException("Data Access: Invalid DvObject type"); + } } } @@ -678,6 +730,9 @@ public boolean exists() { String destinationKey = null; if (dvObject instanceof DataFile) { destinationKey = key; + } else if((dvObject==null) && (key !=null)) { + //direct access + destinationKey = key; } else { logger.warning("Trying to check if a path exists is only supported for a data file."); } @@ -751,7 +806,7 @@ String getMainFileKey() throws IOException { if (storageIdentifier.startsWith(this.driverId + "://")) { bucketName = storageIdentifier.substring((this.driverId + "://").length(), storageIdentifier.lastIndexOf(":")); - key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); + key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); } else { throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object."); } @@ -781,7 +836,7 @@ public String generateTemporaryS3Url() throws IOException { key = getMainFileKey(); java.util.Date expiration = new java.util.Date(); long msec = expiration.getTime(); - msec += 1000 * getUrlExpirationMinutes(); + msec += 60 * 1000 * getUrlExpirationMinutes(); expiration.setTime(msec); GeneratePresignedUrlRequest generatePresignedUrlRequest = @@ -828,6 +883,40 @@ public String generateTemporaryS3Url() throws IOException { } } + public String generateTemporaryS3UploadUrl() throws IOException { + + key = getMainFileKey(); + java.util.Date expiration = new java.util.Date(); + long msec = expiration.getTime(); + msec += 60 * 1000 * getUrlExpirationMinutes(); + expiration.setTime(msec); + + GeneratePresignedUrlRequest generatePresignedUrlRequest = + new GeneratePresignedUrlRequest(bucketName, key).withMethod(HttpMethod.PUT).withExpiration(expiration); + //Require user to add this header to indicate a temporary file + generatePresignedUrlRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); + + URL presignedUrl; + try { + presignedUrl = s3.generatePresignedUrl(generatePresignedUrlRequest); + } catch (SdkClientException sce) { + logger.warning("SdkClientException generating temporary S3 url for "+key+" ("+sce.getMessage()+")"); + presignedUrl = null; + } + String urlString = null; + if (presignedUrl != null) { + String endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url"); + String proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url"); + if(proxy!=null) { + urlString = presignedUrl.toString().replace(endpoint, proxy); + } else { + urlString = presignedUrl.toString(); + } + } + + return urlString; + } + int getUrlExpirationMinutes() { String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); if (optionValue != null) { @@ -877,8 +966,34 @@ private void readSettings() { s3profile = System.getProperty("dataverse.files." + this.driverId + ".profile","default"); bucketName = System.getProperty("dataverse.files." + this.driverId + ".bucket-name"); - - + } + + + public void removeTempTag() throws IOException { + if (!(dvObject instanceof DataFile)) { + logger.warning("Attempt to remove tag from non-file DVObject id: " + dvObject.getId()); + throw new IOException("Attempt to remove temp tag from non-file S3 Object"); + } + try { + + key = getMainFileKey(); + DeleteObjectTaggingRequest deleteObjectTaggingRequest = new DeleteObjectTaggingRequest(bucketName, key); + //NOte - currently we only use one tag so delete is the fastest and cheapest way to get rid of that one tag + //Otherwise you have to get tags, remove the one you don't want and post new tags and get charged for the operations + s3.deleteObjectTagging(deleteObjectTaggingRequest); + } catch (SdkClientException sce) { + if(sce.getMessage().contains("Status Code: 501")) { + // In this case, it's likely that tags are not implemented at all (e.g. by Minio) so no tag was set either and it's just something to be aware of + logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); + } else { + // In this case, the assumption is that adding tags has worked, so not removing it is a problem that should be looked into. + logger.severe("Unable to remove temp tag from : " + bucketName + " : " + key); + } + } catch (IOException e) { + logger.warning("Could not create key for S3 object." ); + e.printStackTrace(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 9e0cf7e11b8..2f66eec5f4c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -533,4 +533,13 @@ protected boolean isWriteAccessRequested(DataAccessOption... options) throws IOE // By default, we open the file in read mode: return false; } + + public boolean isBelowIngestSizeLimit() { + long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1")); + if(limit>0 && getSize()>limit) { + return false; + } else { + return true; + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index f44e33404c9..53af964fc14 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -120,10 +120,12 @@ public class AddReplaceFileHelper{ private InputStream newFileInputStream; // step 20 private String newFileName; // step 20 private String newFileContentType; // step 20 + private String newStorageIdentifier; // step 20 + private String newCheckSum; // step 20 + // -- Optional private DataFile fileToReplace; // step 25 - // ----------------------------------- // Instance variables derived from other input // ----------------------------------- @@ -258,6 +260,7 @@ public AddReplaceFileHelper(DataverseRequest dvRequest, public boolean runAddFileByDataset(Dataset chosenDataset, String newFileName, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams){ @@ -272,7 +275,7 @@ public boolean runAddFileByDataset(Dataset chosenDataset, } //return this.runAddFile(this.dataset, newFileName, newFileContentType, newFileInputStream, optionalFileParams); - return this.runAddReplaceFile(dataset, newFileName, newFileContentType, newFileInputStream, optionalFileParams); + return this.runAddReplaceFile(dataset, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); } @@ -342,9 +345,7 @@ public boolean runForceReplaceFile(Long oldFileId, } - - - public boolean runReplaceFile(Long oldFileId, + public boolean runReplaceFile(Long oldFileId, String newFileName, String newFileContentType, InputStream newFileInputStream, @@ -386,13 +387,19 @@ public boolean runReplaceFile(Long oldFileId, * * The UI will call Phase 1 on initial upload and * then run Phase 2 if the user chooses to save the changes. + * @param newStorageIdentifier * * * @return */ + private boolean runAddReplaceFile(Dataset owner, String newFileName, String newFileContentType, + InputStream newFileInputStream, OptionalFileParams optionalFileParams) { + return runAddReplaceFile(dataset,newFileName, newFileContentType, null, newFileInputStream, optionalFileParams); + } + private boolean runAddReplaceFile(Dataset dataset, String newFileName, String newFileContentType, - InputStream newFileInputStream, + String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams){ // Run "Phase 1" - Initial ingest of file + error check @@ -401,6 +408,7 @@ private boolean runAddReplaceFile(Dataset dataset, boolean phase1Success = runAddReplacePhase1(dataset, newFileName, newFileContentType, + newStorageIdentifier, newFileInputStream, optionalFileParams ); @@ -429,6 +437,7 @@ public boolean runReplaceFromUI_Phase1(Long oldFileId, String newFileName, String newFileContentType, InputStream newFileInputStream, + String fullStorageId, OptionalFileParams optionalFileParams){ @@ -449,7 +458,8 @@ public boolean runReplaceFromUI_Phase1(Long oldFileId, return this.runAddReplacePhase1(fileToReplace.getOwner(), newFileName, - newFileContentType, + newFileContentType, + fullStorageId, newFileInputStream, optionalFileParams); @@ -462,13 +472,14 @@ public boolean runReplaceFromUI_Phase1(Long oldFileId, * * Phase 1 (here): Add/replace the file and make sure there are no errors * But don't update the Dataset (yet) + * @param newStorageIdentifier * * @return */ private boolean runAddReplacePhase1(Dataset dataset, String newFileName, String newFileContentType, - InputStream newFileInputStream, + String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams){ if (this.hasError()){ @@ -487,11 +498,16 @@ private boolean runAddReplacePhase1(Dataset dataset, } msgt("step_020_loadNewFile"); - if (!this.step_020_loadNewFile(newFileName, newFileContentType, newFileInputStream)){ + if (!this.step_020_loadNewFile(newFileName, newFileContentType, newStorageIdentifier, newFileInputStream)){ return false; } - + if(optionalFileParams != null) { + if(optionalFileParams.hasCheckSum()) { + newCheckSum = optionalFileParams.getCheckSum(); + } + } + msgt("step_030_createNewFilesViaIngest"); if (!this.step_030_createNewFilesViaIngest()){ return false; @@ -914,7 +930,7 @@ private boolean step_015_auto_check_permissions(Dataset datasetToCheck){ } - private boolean step_020_loadNewFile(String fileName, String fileContentType, InputStream fileInputStream){ + private boolean step_020_loadNewFile(String fileName, String fileContentType, String storageIdentifier, InputStream fileInputStream){ if (this.hasError()){ return false; @@ -932,18 +948,23 @@ private boolean step_020_loadNewFile(String fileName, String fileContentType, In } - if (fileInputStream == null){ - this.addErrorSevere(getBundleErr("file_upload_failed")); - return false; - } - + if (fileInputStream == null) { + if (storageIdentifier == null) { + this.addErrorSevere(getBundleErr("file_upload_failed")); + return false; + } + } + newFileName = fileName; newFileContentType = fileContentType; + + //One of these will be null + newStorageIdentifier = storageIdentifier; newFileInputStream = fileInputStream; return true; } - + /** * Optional: old file to replace @@ -1050,6 +1071,8 @@ private boolean step_030_createNewFilesViaIngest(){ this.newFileInputStream, this.newFileName, this.newFileContentType, + this.newStorageIdentifier, + this.newCheckSum, this.systemConfig); } catch (IOException ex) { diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/FileReplacePageHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/FileReplacePageHelper.java index e6d7c1e5ebe..24ba8b663bc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/FileReplacePageHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/FileReplacePageHelper.java @@ -94,16 +94,17 @@ public boolean resetReplaceFileHelper(){ /** * Handle native file replace + * @param checkSum * @param event */ - public boolean handleNativeFileUpload(InputStream inputStream, String fileName, String fileContentType) { + public boolean handleNativeFileUpload(InputStream inputStream, String fullStorageId, String fileName, String fileContentType, String checkSum) { phase1Success = false; // Preliminary sanity check // - if (inputStream == null){ - throw new NullPointerException("inputStream cannot be null"); + if ((inputStream == null)&&(fullStorageId==null)){ + throw new NullPointerException("inputStream and storageId cannot both be null"); } if (fileName == null){ throw new NullPointerException("fileName cannot be null"); @@ -111,14 +112,25 @@ public boolean handleNativeFileUpload(InputStream inputStream, String fileName, if (fileContentType == null){ throw new NullPointerException("fileContentType cannot be null"); } - + + OptionalFileParams ofp = null; + if(checkSum != null) { + try { + ofp = new OptionalFileParams(null); + } catch (DataFileTagException e) { + //Shouldn't happen with null input + e.printStackTrace(); + } + ofp.setCheckSum(checkSum); + } // Run 1st phase of replace // replaceFileHelper.runReplaceFromUI_Phase1(fileToReplace.getId(), fileName, fileContentType, inputStream, - null + fullStorageId, + ofp ); // Did it work? diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/FileSizeChecker.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/FileSizeChecker.java index 8d24270c76c..06b3f467867 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/FileSizeChecker.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/FileSizeChecker.java @@ -6,9 +6,6 @@ package edu.harvard.iq.dataverse.datasetutility; import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; -import java.util.Collections; -import java.util.logging.Logger; /** * Convenience methods for checking max. file size @@ -16,94 +13,24 @@ */ public class FileSizeChecker { - private static final Logger logger = Logger.getLogger(FileSizeChecker.class.getCanonicalName()); + /* This method turns a number of bytes into a human readable version + */ + public static String bytesToHumanReadable(long v) { + return bytesToHumanReadable(v, 1); + } + + /* This method turns a number of bytes into a human readable version + * with figs decimal places + */ + public static String bytesToHumanReadable(long v, int figs) { + if (v < 1024) { + return v + " " + BundleUtil.getStringFromBundle("file.addreplace.error.byte_abrev"); + } + // 63 - because long has 63 binary digits + int trailingBin0s = (63 - Long.numberOfLeadingZeros(v))/10; + //String base = "%."+figs+"f %s"+ BundleUtil.getStringFromBundle("file.addreplace.error.byte_abrev"); + return String.format("%."+figs+"f %s"+ BundleUtil.getStringFromBundle("file.addreplace.error.byte_abrev"), (double)v / (1L << (trailingBin0s*10)), + " KMGTPE".charAt(trailingBin0s)); + } - SystemConfig systemConfig; - - /** - * constructor - */ - public FileSizeChecker(SystemConfig systemConfig){ - if (systemConfig == null){ - throw new NullPointerException("systemConfig cannot be null"); - } - this.systemConfig = systemConfig; - } - - public FileSizeResponse isAllowedFileSize(Long filesize){ - - if (filesize == null){ - throw new NullPointerException("filesize cannot be null"); - //return new FileSizeResponse(false, "The file size could not be found!"); - } - - Long maxFileSize = systemConfig.getMaxFileUploadSize(); - - // If no maxFileSize in the database, set it to unlimited! - // - if (maxFileSize == null){ - return new FileSizeResponse(true, - BundleUtil.getStringFromBundle("file.addreplace.file_size_ok") - ); - } - - // Good size! - // - if (filesize <= maxFileSize){ - return new FileSizeResponse(true, - BundleUtil.getStringFromBundle("file.addreplace.file_size_ok") - ); - } - - // Nope! Sorry! File is too big - // - String errMsg = BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit", Collections.singletonList(bytesToHumanReadable(maxFileSize))); - - return new FileSizeResponse(false, errMsg); - - } - - /* This method turns a number of bytes into a human readable version - */ - public static String bytesToHumanReadable(long v) { - return bytesToHumanReadable(v, 1); - } - - /* This method turns a number of bytes into a human readable version - * with figs decimal places - */ - public static String bytesToHumanReadable(long v, int figs) { - if (v < 1024) { - return v + " " + BundleUtil.getStringFromBundle("file.addreplace.error.byte_abrev"); - } - // 63 - because long has 63 binary digits - int trailingBin0s = (63 - Long.numberOfLeadingZeros(v))/10; - //String base = "%."+figs+"f %s"+ BundleUtil.getStringFromBundle("file.addreplace.error.byte_abrev"); - return String.format("%."+figs+"f %s"+ BundleUtil.getStringFromBundle("file.addreplace.error.byte_abrev"), (double)v / (1L << (trailingBin0s*10)), - " KMGTPE".charAt(trailingBin0s)); - } - - /** - * Inner class that can also return an error message - */ - public class FileSizeResponse{ - - public boolean fileSizeOK; - public String userMsg; - - public FileSizeResponse(boolean isOk, String msg){ - - fileSizeOK = isOk; - userMsg = msg; - } - - public boolean isFileSizeOK(){ - return fileSizeOK; - } - - public String getUserMessage(){ - return userMsg; - } - - } // end inner class } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java index 6459715e518..e48d96e355d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/OptionalFileParams.java @@ -61,7 +61,15 @@ public class OptionalFileParams { private boolean restrict = false; public static final String RESTRICT_ATTR_NAME = "restrict"; - + + private String storageIdentifier; + public static final String STORAGE_IDENTIFIER_ATTR_NAME = "storageIdentifier"; + private String fileName; + public static final String FILE_NAME_ATTR_NAME = "fileName"; + private String mimeType; + public static final String MIME_TYPE_ATTR_NAME = "mimeType"; + private String checkSum; + public static final String CHECKSUM_ATTR_NAME = "md5Hash"; public OptionalFileParams(String jsonData) throws DataFileTagException{ @@ -184,7 +192,43 @@ public boolean hasProvFreeform(){ } return true; } - + + public boolean hasStorageIdentifier() { + return ((storageIdentifier!=null)&&(!storageIdentifier.isEmpty())); + } + + public String getStorageIdentifier() { + return storageIdentifier; + } + + public boolean hasFileName() { + return ((fileName!=null)&&(!fileName.isEmpty())); + } + + public String getFileName() { + return fileName; + } + + public boolean hasMimetype() { + return ((mimeType!=null)&&(!mimeType.isEmpty())); + } + + public String getMimeType() { + return mimeType; + } + + public void setCheckSum(String checkSum) { + this.checkSum = checkSum; + } + + public boolean hasCheckSum() { + return ((checkSum!=null)&&(!checkSum.isEmpty())); + } + + public String getCheckSum() { + return checkSum; + } + /** * Set tags * @param tags @@ -281,6 +325,38 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{ this.restrict = Boolean.valueOf(jsonObj.get(RESTRICT_ATTR_NAME).getAsString()); } + // ------------------------------- + // get storage identifier as string + // ------------------------------- + if ((jsonObj.has(STORAGE_IDENTIFIER_ATTR_NAME)) && (!jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).isJsonNull())){ + + this.storageIdentifier = jsonObj.get(STORAGE_IDENTIFIER_ATTR_NAME).getAsString(); + } + + // ------------------------------- + // get file name as string + // ------------------------------- + if ((jsonObj.has(FILE_NAME_ATTR_NAME)) && (!jsonObj.get(FILE_NAME_ATTR_NAME).isJsonNull())){ + + this.fileName = jsonObj.get(FILE_NAME_ATTR_NAME).getAsString(); + } + + // ------------------------------- + // get mimetype as string + // ------------------------------- + if ((jsonObj.has(MIME_TYPE_ATTR_NAME)) && (!jsonObj.get(MIME_TYPE_ATTR_NAME).isJsonNull())){ + + this.mimeType = jsonObj.get(MIME_TYPE_ATTR_NAME).getAsString(); + } + + // ------------------------------- + // get checkSum as string + // ------------------------------- + if ((jsonObj.has(CHECKSUM_ATTR_NAME)) && (!jsonObj.get(CHECKSUM_ATTR_NAME).isJsonNull())){ + + this.checkSum = jsonObj.get(CHECKSUM_ATTR_NAME).getAsString(); + } + // ------------------------------- // get tags // ------------------------------- @@ -516,5 +592,5 @@ private void replaceFileDataTagsInFile(DataFile df) throws DataFileTagException{ } } - + } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java index 3ce10e40abe..e97eeb47ab3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java @@ -69,7 +69,7 @@ public CreateNewDatasetCommand(Dataset theDataset, DataverseRequest aRequest, bo protected void additionalParameterTests(CommandContext ctxt) throws CommandException { if ( nonEmpty(getDataset().getIdentifier()) ) { GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(getDataset().getProtocol(), ctxt); - if ( ctxt.datasets().isIdentifierUnique(getDataset().getIdentifier(), getDataset(), idServiceBean) ) { + if ( !ctxt.datasets().isIdentifierUnique(getDataset().getIdentifier(), getDataset(), idServiceBean) ) { throw new IllegalCommandException(String.format("Dataset with identifier '%s', protocol '%s' and authority '%s' already exists", getDataset().getIdentifier(), getDataset().getProtocol(), getDataset().getAuthority()), this); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 0bcf11d371d..fefa8707c8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -15,6 +15,9 @@ import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; + +import javax.validation.ConstraintViolationException; + import org.apache.solr.client.solrj.SolrServerException; /** @@ -119,7 +122,13 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if (editVersion.getId() == null || editVersion.getId() == 0L) { ctxt.em().persist(editVersion); } else { - ctxt.em().merge(editVersion); + try { + ctxt.em().merge(editVersion); + } catch (ConstraintViolationException e) { + logger.log(Level.SEVERE,"Exception: "); + e.getConstraintViolations().forEach(err->logger.log(Level.SEVERE,err.toString())); + throw e; + } } for (DataFile dataFile : theDataset.getFiles()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 1cb588f288f..a29d6ac7fbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -72,6 +72,12 @@ public class DdiExportUtil { private static final Logger logger = Logger.getLogger(DdiExportUtil.class.getCanonicalName()); + public static final String NOTE_TYPE_TERMS_OF_USE = "DVN:TOU"; + public static final String NOTE_TYPE_TERMS_OF_ACCESS = "DVN:TOA"; + public static final String NOTE_TYPE_DATA_ACCESS_PLACE = "DVN:DAP"; + + + public static final String LEVEL_DV = "dv"; @EJB VariableServiceBean variableService; @@ -174,7 +180,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title)); + writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title)); writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); writeFullElement(xmlw, "altTitl", dto2Primitive(version, DatasetFieldConstant.alternativeTitle)); @@ -182,17 +188,29 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); xmlw.writeEndElement(); // IDNo - + writeOtherIdElement(xmlw, version); xmlw.writeEndElement(); // titlStmt writeAuthorsElement(xmlw, version); writeProducersElement(xmlw, version); xmlw.writeStartElement("distStmt"); - writeFullElement(xmlw, "distrbtr", datasetDto.getPublisher()); - writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); + if (datasetDto.getPublisher() != null && !datasetDto.getPublisher().equals("")) { + xmlw.writeStartElement("distrbtr"); + writeAttribute(xmlw, "source", "archive"); + xmlw.writeCharacters(datasetDto.getPublisher()); + xmlw.writeEndElement(); //distrbtr + } + writeDistributorsElement(xmlw, version); + writeContactsElement(xmlw, version); + writeFullElement(xmlw, "distDate", dto2Primitive(version, DatasetFieldConstant.distributionDate)); + writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); + writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); + xmlw.writeEndElement(); // diststmt + writeSeriesElement(xmlw, version); + xmlw.writeEndElement(); // citation //End Citation Block @@ -202,53 +220,65 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeSubjectElement(xmlw, version); //Subject and Keywords writeAbstractElement(xmlw, version); // Description - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.notesText)); - writeSummaryDescriptionElement(xmlw, version); - writeRelPublElement(xmlw, version); + writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.notesText)); + //////// + xmlw.writeEndElement(); // stdyInfo - writeOtherIdElement(xmlw, version); - writeDistributorsElement(xmlw, version); - writeContactsElement(xmlw, version); - writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); - writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); + writeMethodElement(xmlw, version); + writeDataAccess(xmlw , version); + writeOtherStudyMaterial(xmlw , version); + + writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + xmlw.writeEndElement(); // stdyDscr + + } + + private static void writeOtherStudyMaterial(XMLStreamWriter xmlw , DatasetVersionDTO version) throws XMLStreamException { + xmlw.writeStartElement("othrStdyMat"); writeFullElementList(xmlw, "relMat", dto2PrimitiveList(version, DatasetFieldConstant.relatedMaterial)); writeFullElementList(xmlw, "relStdy", dto2PrimitiveList(version, DatasetFieldConstant.relatedDatasets)); + writeRelPublElement(xmlw, version); writeFullElementList(xmlw, "othRefs", dto2PrimitiveList(version, DatasetFieldConstant.otherReferences)); - writeSeriesElement(xmlw, version); - writeSoftwareElement(xmlw, version); - writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); - writeFullElement(xmlw, "srcOrig", dto2Primitive(version, DatasetFieldConstant.originOfSources)); - writeFullElement(xmlw, "srcChar", dto2Primitive(version, DatasetFieldConstant.characteristicOfSources)); - writeFullElement(xmlw, "srcDocu", dto2Primitive(version, DatasetFieldConstant.accessToSources)); - xmlw.writeEndElement(); // stdyInfo - // End Info Block - - //Social Science Metadata block - - writeMethodElement(xmlw, version); - - //Terms of Use and Access - writeFullElement(xmlw, "useStmt", version.getTermsOfUse()); - writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); - writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); - writeFullElement(xmlw, "restrctn", version.getRestrictions()); - writeFullElement(xmlw, "citeReq", version.getCitationRequirements()); - writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); - writeFullElement(xmlw, "dataAccs", version.getTermsOfAccess()); - writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); - writeFullElement(xmlw, "conditions", version.getConditions()); - writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); - writeFullElement(xmlw, "origArch", version.getOriginalArchive()); - writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); - writeFullElement(xmlw, "contact", version.getContactForAccess()); - writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); - writeFullElement(xmlw, "complete", version.getStudyCompletion()); - - - xmlw.writeEndElement(); // stdyDscr + xmlw.writeEndElement(); //othrStdyMat + } + + private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO version) throws XMLStreamException { + xmlw.writeStartElement("dataAccs"); + if (version.getTermsOfUse() != null && !version.getTermsOfUse().trim().equals("")) { + xmlw.writeStartElement("notes"); + writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_USE); + writeAttribute(xmlw, "level", LEVEL_DV); + xmlw.writeCharacters(version.getTermsOfUse()); + xmlw.writeEndElement(); //notes + } + if (version.getTermsOfAccess() != null && !version.getTermsOfAccess().trim().equals("")) { + xmlw.writeStartElement("notes"); + writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_ACCESS); + writeAttribute(xmlw, "level", LEVEL_DV); + xmlw.writeCharacters(version.getTermsOfAccess()); + xmlw.writeEndElement(); //notes + } + xmlw.writeStartElement("setAvail"); + writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); + writeFullElement(xmlw, "origArch", version.getOriginalArchive()); + writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); + writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); + writeFullElement(xmlw, "complete", version.getStudyCompletion()); + xmlw.writeEndElement(); //setAvail + xmlw.writeStartElement("useStmt"); + writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); + writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); + writeFullElement(xmlw, "restrctn", version.getRestrictions()); + writeFullElement(xmlw, "contact", version.getContactForAccess()); + writeFullElement(xmlw, "citReq", version.getCitationRequirements()); + writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); + writeFullElement(xmlw, "conditions", version.getConditions()); + writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); + xmlw.writeEndElement(); //useStmt + xmlw.writeEndElement(); //dataAccs } private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datasetDto) throws XMLStreamException { @@ -275,10 +305,15 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase xmlw.writeStartElement("IDNo"); writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); - xmlw.writeEndElement(); // IDNo + xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt xmlw.writeStartElement("distStmt"); - writeFullElement(xmlw, "distrbtr", datasetDto.getPublisher()); + if (datasetDto.getPublisher() != null && !datasetDto.getPublisher().equals("")) { + xmlw.writeStartElement("distrbtr"); + writeAttribute(xmlw, "source", "archive"); + xmlw.writeCharacters(datasetDto.getPublisher()); + xmlw.writeEndElement(); // distrbtr + } writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); xmlw.writeEndElement(); // diststmt @@ -364,26 +399,44 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset if("geospatial".equals(key)){ for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.geographicCoverage.equals(fieldDTO.getTypeName())) { + for (HashSet foo : fieldDTO.getMultipleCompound()) { + HashMap geoMap = new HashMap<>(); for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.country.equals(next.getTypeName())) { - writeFullElement(xmlw, "nation", next.getSinglePrimitive()); + geoMap.put("country", next.getSinglePrimitive()); } if (DatasetFieldConstant.city.equals(next.getTypeName())) { - writeFullElement(xmlw, "geogCover", next.getSinglePrimitive()); + geoMap.put("city", next.getSinglePrimitive()); } if (DatasetFieldConstant.state.equals(next.getTypeName())) { - writeFullElement(xmlw, "geogCover", next.getSinglePrimitive()); + geoMap.put("state", next.getSinglePrimitive()); } if (DatasetFieldConstant.otherGeographicCoverage.equals(next.getTypeName())) { - writeFullElement(xmlw, "geogCover", next.getSinglePrimitive()); + geoMap.put("otherGeographicCoverage", next.getSinglePrimitive()); } } + + if (geoMap.get("country") != null) { + writeFullElement(xmlw, "nation", geoMap.get("country")); + } + if (geoMap.get("city") != null) { + writeFullElement(xmlw, "geogCover", geoMap.get("city")); + } + if (geoMap.get("state") != null) { + writeFullElement(xmlw, "geogCover", geoMap.get("state")); + } + if (geoMap.get("otherGeographicCoverage") != null) { + writeFullElement(xmlw, "geogCover", geoMap.get("otherGeographicCoverage")); + } + } } if (DatasetFieldConstant.geographicBoundingBox.equals(fieldDTO.getTypeName())) { + for (HashSet foo : fieldDTO.getMultipleCompound()) { + xmlw.writeStartElement("geoBndBox"); for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) { @@ -400,7 +453,9 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset } } + xmlw.writeEndElement(); } + } } writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); @@ -444,22 +499,32 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO writeFullElement(xmlw, "dataCollector", dto2Primitive(version, DatasetFieldConstant.dataCollector)); writeFullElement(xmlw, "collectorTraining", dto2Primitive(version, DatasetFieldConstant.collectorTraining)); writeFullElement(xmlw, "frequenc", dto2Primitive(version, DatasetFieldConstant.frequencyOfDataCollection)); - writeFullElement(xmlw, "sampProc", dto2Primitive(version, DatasetFieldConstant.samplingProcedure)); + writeFullElement(xmlw, "sampProc", dto2Primitive(version, DatasetFieldConstant.samplingProcedure)); + writeTargetSampleElement(xmlw, version); - writeFullElement(xmlw, "deviat", dto2Primitive(version, DatasetFieldConstant.deviationsFromSampleDesign)); + + writeFullElement(xmlw, "deviat", dto2Primitive(version, DatasetFieldConstant.deviationsFromSampleDesign)); + + xmlw.writeStartElement("sources"); + writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); + writeFullElement(xmlw, "srcOrig", dto2Primitive(version, DatasetFieldConstant.originOfSources)); + writeFullElement(xmlw, "srcChar", dto2Primitive(version, DatasetFieldConstant.characteristicOfSources)); + writeFullElement(xmlw, "srcDocu", dto2Primitive(version, DatasetFieldConstant.accessToSources)); + xmlw.writeEndElement(); //sources + writeFullElement(xmlw, "collMode", dto2Primitive(version, DatasetFieldConstant.collectionMode)); writeFullElement(xmlw, "resInstru", dto2Primitive(version, DatasetFieldConstant.researchInstrument)); writeFullElement(xmlw, "collSitu", dto2Primitive(version, DatasetFieldConstant.dataCollectionSituation)); writeFullElement(xmlw, "actMin", dto2Primitive(version, DatasetFieldConstant.actionsToMinimizeLoss)); - writeFullElement(xmlw, "conOps", dto2Primitive(version, DatasetFieldConstant.controlOperations)); + writeFullElement(xmlw, "conOps", dto2Primitive(version, DatasetFieldConstant.controlOperations)); writeFullElement(xmlw, "weight", dto2Primitive(version, DatasetFieldConstant.weighting)); writeFullElement(xmlw, "cleanOps", dto2Primitive(version, DatasetFieldConstant.cleaningOperations)); xmlw.writeEndElement(); //dataColl xmlw.writeStartElement("anlyInfo"); - writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + //writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); writeFullElement(xmlw, "respRate", dto2Primitive(version, DatasetFieldConstant.responseRate)); - writeFullElement(xmlw, "estSmpErr", dto2Primitive(version, DatasetFieldConstant.samplingErrorEstimates)); + writeFullElement(xmlw, "EstSmpErr", dto2Primitive(version, DatasetFieldConstant.samplingErrorEstimates)); writeFullElement(xmlw, "dataAppr", dto2Primitive(version, DatasetFieldConstant.otherDataAppraisal)); xmlw.writeEndElement(); //anlyInfo writeNotesElement(xmlw, version); @@ -508,7 +573,7 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO writeAttribute(xmlw,"vocab",keywordVocab); } if(!keywordURI.isEmpty()){ - writeAttribute(xmlw,"URI",keywordURI); + writeAttribute(xmlw,"vocabURI",keywordURI); } xmlw.writeCharacters(keywordValue); xmlw.writeEndElement(); //Keyword @@ -539,7 +604,7 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO writeAttribute(xmlw,"vocab",topicClassificationVocab); } if(!topicClassificationURI.isEmpty()){ - writeAttribute(xmlw,"URI",topicClassificationURI); + writeAttribute(xmlw,"vocabURI",topicClassificationURI); } xmlw.writeCharacters(topicClassificationValue); xmlw.writeEndElement(); //topcClas @@ -558,9 +623,9 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { + xmlw.writeStartElement("rspStmt"); for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.author.equals(fieldDTO.getTypeName())) { - xmlw.writeStartElement("rspStmt"); String authorName = ""; String authorAffiliation = ""; for (HashSet foo : fieldDTO.getMultipleCompound()) { @@ -582,10 +647,34 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO xmlw.writeEndElement(); //AuthEnty } } - xmlw.writeEndElement(); //rspStmt + + } else if (DatasetFieldConstant.contributor.equals(fieldDTO.getTypeName())) { + String contributorName = ""; + String contributorType = ""; + for (HashSet foo : fieldDTO.getMultipleCompound()) { + for (Iterator iterator = foo.iterator(); iterator.hasNext();) { + FieldDTO next = iterator.next(); + if (DatasetFieldConstant.contributorName.equals(next.getTypeName())) { + contributorName = next.getSinglePrimitive(); + } + if (DatasetFieldConstant.contributorType.equals(next.getTypeName())) { + contributorType = next.getSinglePrimitive(); + } + } + if (!contributorName.isEmpty()){ + xmlw.writeStartElement("othId"); + if(!contributorType.isEmpty()){ + writeAttribute(xmlw,"role", contributorType); + } + xmlw.writeCharacters(contributorName); + xmlw.writeEndElement(); //othId + } + } } } + xmlw.writeEndElement(); //rspStmt } + } } @@ -692,6 +781,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); writeFullElement(xmlw, "prodPlac", dto2Primitive(version, DatasetFieldConstant.productionPlace)); + writeSoftwareElement(xmlw, version); writeGrantElement(xmlw, version); xmlw.writeEndElement(); //prodStmt @@ -704,7 +794,7 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.distributor.equals(fieldDTO.getTypeName())) { - xmlw.writeStartElement("distrbtr"); + //xmlw.writeStartElement("distrbtr"); for (HashSet foo : fieldDTO.getMultipleCompound()) { String distributorName = ""; String distributorAffiliation = ""; @@ -747,8 +837,9 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio xmlw.writeEndElement(); //AuthEnty } } - xmlw.writeEndElement(); //rspStmt + //xmlw.writeEndElement(); //rspStmt } + } } } @@ -782,12 +873,27 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO url = next.getSinglePrimitive(); } } - pubString = appendCommaSeparatedValue(citation, IDType); - pubString = appendCommaSeparatedValue(pubString, IDNo); - pubString = appendCommaSeparatedValue(pubString, url); - if (!pubString.isEmpty()){ - xmlw.writeStartElement("relPubl"); - xmlw.writeCharacters(pubString); + if (citation != null && !citation.trim().equals("")) { + xmlw.writeStartElement("relPubl"); + xmlw.writeStartElement("citation"); + if (IDNo != null && !IDNo.trim().equals("")) { + xmlw.writeStartElement("titlStmt"); + xmlw.writeStartElement("IDNo"); + if (IDType != null && !IDType.trim().equals("")) { + xmlw.writeAttribute("agency", IDType ); + } + xmlw.writeCharacters(IDNo); + xmlw.writeEndElement(); //IDNo + xmlw.writeEndElement(); // titlStmt + } + + writeFullElement(xmlw,"biblCit",citation); + xmlw.writeEndElement(); //citation + if (url != null && !url.trim().equals("") ) { + xmlw.writeStartElement("ExtLink"); + xmlw.writeAttribute("URI", url); + xmlw.writeEndElement(); //ExtLink + } xmlw.writeEndElement(); //relPubl } } @@ -988,6 +1094,7 @@ private static void writeTargetSampleElement(XMLStreamWriter xmlw, DatasetVersio if ("socialscience".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.targetSampleSize.equals(fieldDTO.getTypeName())) { + xmlw.writeStartElement("targetSampleSize"); String sizeFormula = ""; String actualSize = ""; Set foo = fieldDTO.getSingleCompound(); @@ -1000,6 +1107,7 @@ private static void writeTargetSampleElement(XMLStreamWriter xmlw, DatasetVersio actualSize = next.getSinglePrimitive(); } } + if (!sizeFormula.isEmpty()) { xmlw.writeStartElement("sampleSizeFormula"); xmlw.writeCharacters(sizeFormula); @@ -1010,6 +1118,7 @@ private static void writeTargetSampleElement(XMLStreamWriter xmlw, DatasetVersio xmlw.writeCharacters(actualSize); xmlw.writeEndElement(); //sampleSize } + xmlw.writeEndElement(); // targetSampleSize } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index b06877df2ae..8c624521a56 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -44,6 +44,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccessOption; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; +import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator; import edu.harvard.iq.dataverse.datavariable.SummaryStatistic; import edu.harvard.iq.dataverse.datavariable.DataVariable; @@ -72,6 +73,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.nio.channels.FileChannel; @@ -175,16 +177,16 @@ public List saveAndAddFilesToDataset(DatasetVersion version, List saveAndAddFilesToDataset(DatasetVersion version, List dataAccess = DataAccess.getStorageIO(dataFile); @@ -347,18 +302,74 @@ public List saveAndAddFilesToDataset(DatasetVersion version, List)dataAccess).removeTempTag(); + } } catch (IOException ioex) { logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); } savedSuccess = true; - logger.info("unattached: " + unattached); - dataFile.setOwner(null); - + dataFile.setOwner(null); } logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset."); - + boolean belowLimit = false; + + try { + belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit(); + } catch (IOException e) { + logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage()); + } + + if (savedSuccess && belowLimit) { + // These are all brand new files, so they should all have + // one filemetadata total. -- L.A. + FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); + String fileName = fileMetadata.getLabel(); + + boolean metadataExtracted = false; + if (FileUtil.canIngestAsTabular(dataFile)) { + /* + * Note that we don't try to ingest the file right away - instead we mark it as + * "scheduled for ingest", then at the end of the save process it will be queued + * for async. ingest in the background. In the meantime, the file will be + * ingested as a regular, non-tabular file, and appear as such to the user, + * until the ingest job is finished with the Ingest Service. + */ + dataFile.SetIngestScheduled(); + } else if (fileMetadataExtractable(dataFile)) { + + try { + // FITS is the only type supported for metadata + // extraction, as of now. -- L.A. 4.0 + dataFile.setContentType("application/fits"); + metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); + } catch (IOException mex) { + logger.severe("Caught exception trying to extract indexable metadata from file " + + fileName + ", " + mex.getMessage()); + } + if (metadataExtracted) { + logger.fine("Successfully extracted indexable metadata from file " + fileName); + } else { + logger.fine("Failed to extract indexable metadata from file " + fileName); + } + } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) { + // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"! + // "text/tsv" should be used instead: + dataFile.setContentType(FileUtil.MIME_TYPE_TSV); + } + } + // ... and let's delete the main temp file if it exists: + if(tempLocationPath!=null) { + try { + logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); + Files.delete(tempLocationPath); + } catch (IOException ex) { + // (non-fatal - it's just a temp file.) + logger.warning("Failed to delete temp file " + tempLocationPath.toString()); + } + } if (savedSuccess) { // temp dbug line // System.out.println("ADDING FILE: " + fileName + "; for dataset: " + @@ -955,7 +966,7 @@ public boolean ingestAsTabular(Long datafile_id) { throw new EJBException("Deliberate database save failure"); } */ - dataFile = fileService.save(dataFile); + dataFile = fileService.saveInTransaction(dataFile); databaseSaveSuccessful = true; logger.fine("Ingest (" + dataFile.getFileMetadata().getLabel() + "."); @@ -982,7 +993,7 @@ public boolean ingestAsTabular(Long datafile_id) { } if (!databaseSaveSuccessful) { - logger.warning("Ingest failure (!databaseSaveSuccessful)."); + logger.warning("Ingest failure (failed to save the tabular data in the database; file left intact as uploaded)."); return false; } @@ -1005,6 +1016,9 @@ public boolean ingestAsTabular(Long datafile_id) { dataAccess.savePath(Paths.get(tabFile.getAbsolutePath())); // Reset the file size: dataFile.setFilesize(dataAccess.getSize()); + + dataFile = fileService.save(dataFile); + logger.fine("saved data file after updating the size"); // delete the temp tab-file: tabFile.delete(); @@ -1146,12 +1160,17 @@ public boolean fileMetadataExtractable(DataFile dataFile) { public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException { boolean ingestSuccessful = false; - FileInputStream tempFileInputStream = null; - - try { - tempFileInputStream = new FileInputStream(new File(tempFileLocation)); - } catch (FileNotFoundException notfoundEx) { - throw new IOException("Could not open temp file "+tempFileLocation); + InputStream tempFileInputStream = null; + if(tempFileLocation == null) { + StorageIO sio = dataFile.getStorageIO(); + sio.open(DataAccessOption.READ_ACCESS); + tempFileInputStream = sio.getInputStream(); + } else { + try { + tempFileInputStream = new FileInputStream(new File(tempFileLocation)); + } catch (FileNotFoundException notfoundEx) { + throw new IOException("Could not open temp file "+tempFileLocation); + } } // Locate metadata extraction plugin for the file format by looking diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java index ce9e281e986..eb313631077 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java @@ -15,7 +15,11 @@ public class FakePidProviderServiceBean extends AbstractGlobalIdServiceBean { @Override public boolean alreadyExists(DvObject dvo) throws Exception { - return true; + /* Direct upload creates an identifier prior to calling the CreateNewDatasetCommand - if this is true, that call fails. + * In that case, the local test (DatasetServiceBean.isIdentifierLocallyUnique()) correctly returns false since it tests the database. + * This provider could do the same check or use some other method to test alreadyExists(DvObject) =true failures. (no tests found currently) + */ + return false; } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 473bea561b4..710060ef817 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -1,9 +1,13 @@ package edu.harvard.iq.dataverse.settings; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; import edu.harvard.iq.dataverse.api.ApiBlockingFilter; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.util.StringUtil; + +import java.io.StringReader; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -12,6 +16,8 @@ import javax.ejb.EJB; import javax.ejb.Stateless; import javax.inject.Named; +import javax.json.Json; +import javax.json.JsonObject; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; @@ -162,7 +168,7 @@ public enum Key { /** Enable full-text indexing in solr up to max file size */ SolrFullTextIndexing, //true or false (default) SolrMaxFileSizeForFullTextIndexing, //long - size in bytes (default unset/no limit) - /** Key for limiting the number of bytes uploaded via the Data Deposit API, UI (web site and . */ + /** Default Key for limiting the number of bytes uploaded via the Data Deposit API, UI (web site and . */ MaxFileUploadSizeInBytes, /** Key for if ScrubMigrationData is enabled or disabled. */ ScrubMigrationData, @@ -411,7 +417,13 @@ Whether Harvesting (OAI) service is enabled * Lifespan, in minutes, of a login user session  * (both DataverseSession and the underlying HttpSession) */ - LoginSessionTimeout; + LoginSessionTimeout, + + /** + * Shibboleth affiliation attribute which holds information about the affiliation of the user (e.g. ou) + */ + ShibAffiliationAttribute + ; @Override public String toString() { @@ -477,6 +489,44 @@ public Long getValueForKeyAsLong(Key key){ } + /** + * Attempt to convert a value in a compound key to a long + * - Applicable for keys such as MaxFileUploadSizeInBytes after multistore capabilities were added in ~v4.20 + * backward compatible with a single value. For multi values, the key's value must be an object with param:value pairs. + * A "default":value pair is allowed and will be returned for any param that doesn't have a defined value. + * + * On failure (key not found or string not convertible to a long), returns null + * @param key + * @return + */ + public Long getValueForCompoundKeyAsLong(Key key, String param){ + + String val = this.getValueForKey(key); + + if (val == null){ + return null; + } + + try { + return Long.parseLong(val); + } catch (NumberFormatException ex) { + try ( StringReader rdr = new StringReader(val) ) { + JsonObject settings = Json.createReader(rdr).readObject(); + if(settings.containsKey(param)) { + return Long.parseLong(settings.getString(param)); + } else if(settings.containsKey("default")) { + return Long.parseLong(settings.getString("default")); + } else { + return null; + } + + } catch (Exception e) { + logger.log(Level.WARNING, "Incorrect setting. Could not convert \"{0}\" from setting {1} to long: {2}", new Object[]{val, key.toString(), e.getMessage()}); + return null; + } + } + + } /** * Return the value stored, or the default value, in case no setting by that diff --git a/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java index 98a2d84aee6..5ceff6b35ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/BundleUtil.java @@ -159,19 +159,20 @@ public static String getStringFromDefaultPropertyFile(String key, String propert } return getStringFromBundleNoMissingCheck(key, null, bundle); } - + + /** + * Return JVM default locale. + * + * For now, this simply forwards default system behaviour. + * That means on JDK8 the system property user.language will be set on startup + * from environment variables like LANG or via Maven arguments (which is important for testing). + * (See also pom.xml for an example how we pinpoint this for reproducible tests!) + * (You should also be aware that good IDEs are honoring settings from pom.xml.) + * + * Nonetheless, someday we might want to have more influence on how this is determined, thus this wrapper. + * @return Dataverse default locale + */ public static Locale getDefaultLocale() { - String localeEnvVar = System.getenv().get("LANG"); - if (localeEnvVar != null) { - if (localeEnvVar.indexOf('.') > 0) { - localeEnvVar = localeEnvVar.substring(0, localeEnvVar.indexOf('.')); - } - if (!"en_US".equals(localeEnvVar)) { - logger.fine("BundleUtil: LOCALE code from the environmental variable is "+localeEnvVar); - return new Locale(localeEnvVar); - } - } - - return new Locale("en"); + return Locale.getDefault(); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index ec77e53d790..a4370c7b38f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -79,7 +79,6 @@ import java.util.zip.GZIPInputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import org.apache.commons.io.FilenameUtils; import com.amazonaws.AmazonServiceException; @@ -707,7 +706,7 @@ public static String generateOriginalExtension(String fileType) { return ""; } - public static List createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, SystemConfig systemConfig) throws IOException { + public static List createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, String newCheckSum, SystemConfig systemConfig) throws IOException { List datafiles = new ArrayList<>(); String warningMessage = null; @@ -715,381 +714,399 @@ public static List createDataFiles(DatasetVersion version, InputStream // save the file, in the temporary location for now: Path tempFile = null; - Long fileSizeLimit = systemConfig.getMaxFileUploadSize(); - - if (getFilesTempDirectory() != null) { - tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); - // "temporary" location is the key here; this is why we are not using - // the DataStore framework for this - the assumption is that - // temp files will always be stored on the local filesystem. - // -- L.A. Jul. 2014 - logger.fine("Will attempt to save the file as: " + tempFile.toString()); - Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING); - - // A file size check, before we do anything else: - // (note that "no size limit set" = "unlimited") - // (also note, that if this is a zip file, we'll be checking - // the size limit for each of the individual unpacked files) - Long fileSize = tempFile.toFile().length(); - if (fileSizeLimit != null && fileSize > fileSizeLimit) { - try {tempFile.toFile().delete();} catch (Exception ex) {} - throw new IOException (MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit))); - } - - } else { - throw new IOException ("Temp directory is not configured."); - } - logger.fine("mime type supplied: "+suppliedContentType); - // Let's try our own utilities (Jhove, etc.) to determine the file type - // of the uploaded file. (We may already have a mime type supplied for this - // file - maybe the type that the browser recognized on upload; or, if - // it's a harvest, maybe the remote server has already given us the type - // for this file... with our own type utility we may or may not do better - // than the type supplied: - // -- L.A. - String recognizedType = null; + Long fileSizeLimit = systemConfig.getMaxFileUploadSizeForStore(version.getDataset().getOwner().getEffectiveStorageDriverId()); String finalType = null; - try { - recognizedType = determineFileType(tempFile.toFile(), fileName); - logger.fine("File utility recognized the file as " + recognizedType); - if (recognizedType != null && !recognizedType.equals("")) { - // is it any better than the type that was supplied to us, - // if any? - // This is not as trivial a task as one might expect... - // We may need a list of "good" mime types, that should always - // be chosen over other choices available. Maybe it should - // even be a weighed list... as in, "application/foo" should - // be chosen over "application/foo-with-bells-and-whistles". - - // For now the logic will be as follows: - // - // 1. If the contentType supplied (by the browser, most likely) - // is some form of "unknown", we always discard it in favor of - // whatever our own utilities have determined; - // 2. We should NEVER trust the browser when it comes to the - // following "ingestable" types: Stata, SPSS, R; - // 2a. We are willing to TRUST the browser when it comes to - // the CSV and XSLX ingestable types. - // 3. We should ALWAYS trust our utilities when it comes to - // ingestable types. - - if (suppliedContentType == null + if (newStorageIdentifier == null) { + if (getFilesTempDirectory() != null) { + tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); + // "temporary" location is the key here; this is why we are not using + // the DataStore framework for this - the assumption is that + // temp files will always be stored on the local filesystem. + // -- L.A. Jul. 2014 + logger.fine("Will attempt to save the file as: " + tempFile.toString()); + Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING); + + // A file size check, before we do anything else: + // (note that "no size limit set" = "unlimited") + // (also note, that if this is a zip file, we'll be checking + // the size limit for each of the individual unpacked files) + Long fileSize = tempFile.toFile().length(); + if (fileSizeLimit != null && fileSize > fileSizeLimit) { + try {tempFile.toFile().delete();} catch (Exception ex) {} + throw new IOException (MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit))); + } + + } else { + throw new IOException("Temp directory is not configured."); + } + logger.fine("mime type supplied: " + suppliedContentType); + // Let's try our own utilities (Jhove, etc.) to determine the file type + // of the uploaded file. (We may already have a mime type supplied for this + // file - maybe the type that the browser recognized on upload; or, if + // it's a harvest, maybe the remote server has already given us the type + // for this file... with our own type utility we may or may not do better + // than the type supplied: + // -- L.A. + String recognizedType = null; + + try { + recognizedType = determineFileType(tempFile.toFile(), fileName); + logger.fine("File utility recognized the file as " + recognizedType); + if (recognizedType != null && !recognizedType.equals("")) { + // is it any better than the type that was supplied to us, + // if any? + // This is not as trivial a task as one might expect... + // We may need a list of "good" mime types, that should always + // be chosen over other choices available. Maybe it should + // even be a weighed list... as in, "application/foo" should + // be chosen over "application/foo-with-bells-and-whistles". + + // For now the logic will be as follows: + // + // 1. If the contentType supplied (by the browser, most likely) + // is some form of "unknown", we always discard it in favor of + // whatever our own utilities have determined; + // 2. We should NEVER trust the browser when it comes to the + // following "ingestable" types: Stata, SPSS, R; + // 2a. We are willing to TRUST the browser when it comes to + // the CSV and XSLX ingestable types. + // 3. We should ALWAYS trust our utilities when it comes to + // ingestable types. + + if (suppliedContentType == null || suppliedContentType.equals("") - || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_DEFAULT) - || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_BINARY) - || (canIngestAsTabular(suppliedContentType) - && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV) - && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV_ALT) - && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX)) + || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_DEFAULT) + || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_BINARY) + || (canIngestAsTabular(suppliedContentType) + && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV) + && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV_ALT) + && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX)) || canIngestAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") - || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) - || recognizedType.equals(MIME_TYPE_ZIP)) { - finalType = recognizedType; - } - } - - } catch (Exception ex) { - logger.warning("Failed to run the file utility mime type check on file " + fileName); - } - - if (finalType == null) { - finalType = (suppliedContentType == null || suppliedContentType.equals("")) - ? MIME_TYPE_UNDETERMINED_DEFAULT - : suppliedContentType; - } - - // A few special cases: - - // if this is a gzipped FITS file, we'll uncompress it, and ingest it as - // a regular FITS file: - - if (finalType.equals("application/fits-gzipped")) { - - InputStream uncompressedIn = null; - String finalFileName = fileName; - // if the file name had the ".gz" extension, remove it, - // since we are going to uncompress it: - if (fileName != null && fileName.matches(".*\\.gz$")) { - finalFileName = fileName.replaceAll("\\.gz$", ""); - } - - DataFile datafile = null; - try { - uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile())); - File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit); - datafile = createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm()); - } catch (IOException | FileExceedsMaxSizeException ioex) { - datafile = null; - } finally { - if (uncompressedIn != null) { - try {uncompressedIn.close();} catch (IOException e) {} - } - } - - // If we were able to produce an uncompressed file, we'll use it - // to create and return a final DataFile; if not, we're not going - // to do anything - and then a new DataFile will be created further - // down, from the original, uncompressed file. - if (datafile != null) { - // remove the compressed temp file: - try { - tempFile.toFile().delete(); - } catch (SecurityException ex) { - // (this is very non-fatal) - logger.warning("Failed to delete temporary file "+tempFile.toString()); - } - - datafiles.add(datafile); - return datafiles; - } - - // If it's a ZIP file, we are going to unpack it and create multiple - // DataFile objects from its contents: - } else if (finalType.equals("application/zip")) { - - ZipInputStream unZippedIn = null; - ZipEntry zipEntry = null; - - int fileNumberLimit = systemConfig.getZipUploadFilesLimit(); - - try { - Charset charset = null; - /* - TODO: (?) - We may want to investigate somehow letting the user specify - the charset for the filenames in the zip file... - - otherwise, ZipInputStream bails out if it encounteres a file - name that's not valid in the current charest (i.e., UTF-8, in - our case). It would be a bit trickier than what we're doing for - SPSS tabular ingests - with the lang. encoding pulldown menu - - because this encoding needs to be specified *before* we upload and - attempt to unzip the file. - -- L.A. 4.0 beta12 - logger.info("default charset is "+Charset.defaultCharset().name()); - if (Charset.isSupported("US-ASCII")) { - logger.info("charset US-ASCII is supported."); - charset = Charset.forName("US-ASCII"); - if (charset != null) { - logger.info("was able to obtain charset for US-ASCII"); - } - - } - */ - - if (charset != null) { - unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset); - } else { - unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile())); - } - - while (true) { - try { - zipEntry = unZippedIn.getNextEntry(); - } catch (IllegalArgumentException iaex) { - // Note: - // ZipInputStream documentation doesn't even mention that - // getNextEntry() throws an IllegalArgumentException! - // but that's what happens if the file name of the next - // entry is not valid in the current CharSet. - // -- L.A. - warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is."; - logger.warning(warningMessage); - throw new IOException(); - } - - if (zipEntry == null) { - break; - } - // Note that some zip entries may be directories - we - // simply skip them: + || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) + || recognizedType.equals(MIME_TYPE_ZIP)) { + finalType = recognizedType; + } + } + + } catch (Exception ex) { + logger.warning("Failed to run the file utility mime type check on file " + fileName); + } + + if (finalType == null) { + finalType = (suppliedContentType == null || suppliedContentType.equals("")) + ? MIME_TYPE_UNDETERMINED_DEFAULT + : suppliedContentType; + } + + // A few special cases: + + // if this is a gzipped FITS file, we'll uncompress it, and ingest it as + // a regular FITS file: + + if (finalType.equals("application/fits-gzipped")) { + + InputStream uncompressedIn = null; + String finalFileName = fileName; + // if the file name had the ".gz" extension, remove it, + // since we are going to uncompress it: + if (fileName != null && fileName.matches(".*\\.gz$")) { + finalFileName = fileName.replaceAll("\\.gz$", ""); + } + + DataFile datafile = null; + try { + uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile())); + File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit); + datafile = createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm()); + } catch (IOException | FileExceedsMaxSizeException ioex) { + datafile = null; + } finally { + if (uncompressedIn != null) { + try {uncompressedIn.close();} catch (IOException e) {} + } + } + + // If we were able to produce an uncompressed file, we'll use it + // to create and return a final DataFile; if not, we're not going + // to do anything - and then a new DataFile will be created further + // down, from the original, uncompressed file. + if (datafile != null) { + // remove the compressed temp file: + try { + tempFile.toFile().delete(); + } catch (SecurityException ex) { + // (this is very non-fatal) + logger.warning("Failed to delete temporary file " + tempFile.toString()); + } + + datafiles.add(datafile); + return datafiles; + } + + // If it's a ZIP file, we are going to unpack it and create multiple + // DataFile objects from its contents: + } else if (finalType.equals("application/zip")) { + + ZipInputStream unZippedIn = null; + ZipEntry zipEntry = null; + + int fileNumberLimit = systemConfig.getZipUploadFilesLimit(); + + try { + Charset charset = null; + /* + TODO: (?) + We may want to investigate somehow letting the user specify + the charset for the filenames in the zip file... + - otherwise, ZipInputStream bails out if it encounteres a file + name that's not valid in the current charest (i.e., UTF-8, in + our case). It would be a bit trickier than what we're doing for + SPSS tabular ingests - with the lang. encoding pulldown menu - + because this encoding needs to be specified *before* we upload and + attempt to unzip the file. + -- L.A. 4.0 beta12 + logger.info("default charset is "+Charset.defaultCharset().name()); + if (Charset.isSupported("US-ASCII")) { + logger.info("charset US-ASCII is supported."); + charset = Charset.forName("US-ASCII"); + if (charset != null) { + logger.info("was able to obtain charset for US-ASCII"); + } - if (!zipEntry.isDirectory()) { - if (datafiles.size() > fileNumberLimit) { - logger.warning("Zip upload - too many files."); - warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + - "); please upload a zip archive with fewer files, if you want them to be ingested " + - "as individual DataFiles."; - throw new IOException(); + } + */ + + if (charset != null) { + unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset); + } else { + unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile())); + } + + while (true) { + try { + zipEntry = unZippedIn.getNextEntry(); + } catch (IllegalArgumentException iaex) { + // Note: + // ZipInputStream documentation doesn't even mention that + // getNextEntry() throws an IllegalArgumentException! + // but that's what happens if the file name of the next + // entry is not valid in the current CharSet. + // -- L.A. + warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is."; + logger.warning(warningMessage); + throw new IOException(); + } + + if (zipEntry == null) { + break; + } + // Note that some zip entries may be directories - we + // simply skip them: + + if (!zipEntry.isDirectory()) { + if (datafiles.size() > fileNumberLimit) { + logger.warning("Zip upload - too many files."); + warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + + "); please upload a zip archive with fewer files, if you want them to be ingested " + + "as individual DataFiles."; + throw new IOException(); + } + + String fileEntryName = zipEntry.getName(); + logger.fine("ZipEntry, file: " + fileEntryName); + + if (fileEntryName != null && !fileEntryName.equals("")) { + + String shortName = fileEntryName.replaceFirst("^.*[\\/]", ""); + + // Check if it's a "fake" file - a zip archive entry + // created for a MacOS X filesystem element: (these + // start with "._") + if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { + // OK, this seems like an OK file entry - we'll try + // to read it and create a DataFile with it: + + File unZippedTempFile = saveInputStreamInTempFile(unZippedIn, fileSizeLimit); + DataFile datafile = createSingleDataFile(version, unZippedTempFile, null, shortName, + MIME_TYPE_UNDETERMINED_DEFAULT, + systemConfig.getFileFixityChecksumAlgorithm(), null, false); + + if (!fileEntryName.equals(shortName)) { + // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), + // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all + // the leading, trailing and duplicate slashes; then replace all the characters that + // don't pass our validation rules. + String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); + directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); + // if (!"".equals(directoryName)) { + if (!StringUtil.isEmpty(directoryName)) { + logger.fine("setting the directory label to " + directoryName); + datafile.getFileMetadata().setDirectoryLabel(directoryName); + } + } + + if (datafile != null) { + // We have created this datafile with the mime type "unknown"; + // Now that we have it saved in a temporary location, + // let's try and determine its real type: + + String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); + + try { + recognizedType = determineFileType(new File(tempFileName), shortName); + logger.fine("File utility recognized unzipped file as " + recognizedType); + if (recognizedType != null && !recognizedType.equals("")) { + datafile.setContentType(recognizedType); + } + } catch (Exception ex) { + logger.warning("Failed to run the file utility mime type check on file " + fileName); + } + + datafiles.add(datafile); + } + } + } + } + unZippedIn.closeEntry(); + + } + + } catch (IOException ioex) { + // just clear the datafiles list and let + // ingest default to creating a single DataFile out + // of the unzipped file. + logger.warning("Unzipping failed; rolling back to saving the file as is."); + if (warningMessage == null) { + warningMessage = "Failed to unzip the file. Saving the file as is."; + } + + datafiles.clear(); + } catch (FileExceedsMaxSizeException femsx) { + logger.warning("One of the unzipped files exceeds the size limit; resorting to saving the file as is. " + femsx.getMessage()); + warningMessage = femsx.getMessage() + "; saving the zip file as is, unzipped."; + datafiles.clear(); + } finally { + if (unZippedIn != null) { + try {unZippedIn.close();} catch (Exception zEx) {} + } + } + if (datafiles.size() > 0) { + // link the data files to the dataset/version: + // (except we no longer want to do this! -- 4.6) + /*Iterator itf = datafiles.iterator(); + while (itf.hasNext()) { + DataFile datafile = itf.next(); + datafile.setOwner(version.getDataset()); + if (version.getFileMetadatas() == null) { + version.setFileMetadatas(new ArrayList()); } - - String fileEntryName = zipEntry.getName(); - logger.fine("ZipEntry, file: "+fileEntryName); - - if (fileEntryName != null && !fileEntryName.equals("")) { - - String shortName = fileEntryName.replaceFirst("^.*[\\/]", ""); - - // Check if it's a "fake" file - a zip archive entry - // created for a MacOS X filesystem element: (these - // start with "._") - if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { - // OK, this seems like an OK file entry - we'll try - // to read it and create a DataFile with it: - - File unZippedTempFile = saveInputStreamInTempFile(unZippedIn, fileSizeLimit); - DataFile datafile = createSingleDataFile(version, unZippedTempFile, shortName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm(), false); - - if (!fileEntryName.equals(shortName)) { - // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), - // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all - // the leading, trailing and duplicate slashes; then replace all the characters that - // don't pass our validation rules. - String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); - directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); - //if (!"".equals(directoryName)) { - if (!StringUtil.isEmpty(directoryName)) { - logger.fine("setting the directory label to " + directoryName); - datafile.getFileMetadata().setDirectoryLabel(directoryName); - } - } - - if (datafile != null) { - // We have created this datafile with the mime type "unknown"; - // Now that we have it saved in a temporary location, - // let's try and determine its real type: - - String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); - - try { - recognizedType = determineFileType(new File(tempFileName), shortName); - logger.fine("File utility recognized unzipped file as " + recognizedType); - if (recognizedType != null && !recognizedType.equals("")) { - datafile.setContentType(recognizedType); - } - } catch (Exception ex) { - logger.warning("Failed to run the file utility mime type check on file " + fileName); - } - - datafiles.add(datafile); - } - } - } - } - unZippedIn.closeEntry(); + version.getFileMetadatas().add(datafile.getFileMetadata()); + datafile.getFileMetadata().setDatasetVersion(version); - } - - } catch (IOException ioex) { - // just clear the datafiles list and let - // ingest default to creating a single DataFile out - // of the unzipped file. - logger.warning("Unzipping failed; rolling back to saving the file as is."); - if (warningMessage == null) { - warningMessage = "Failed to unzip the file. Saving the file as is."; - } - - datafiles.clear(); - } catch (FileExceedsMaxSizeException femsx) { - logger.warning("One of the unzipped files exceeds the size limit; resorting to saving the file as is. " + femsx.getMessage()); - warningMessage = femsx.getMessage() + "; saving the zip file as is, unzipped."; - datafiles.clear(); - } finally { - if (unZippedIn != null) { - try {unZippedIn.close();} catch (Exception zEx) {} - } - } - if (datafiles.size() > 0) { - // link the data files to the dataset/version: - // (except we no longer want to do this! -- 4.6) - /*Iterator itf = datafiles.iterator(); - while (itf.hasNext()) { - DataFile datafile = itf.next(); - datafile.setOwner(version.getDataset()); - if (version.getFileMetadatas() == null) { - version.setFileMetadatas(new ArrayList()); - } - version.getFileMetadatas().add(datafile.getFileMetadata()); - datafile.getFileMetadata().setDatasetVersion(version); - - version.getDataset().getFiles().add(datafile); - } */ - // remove the uploaded zip file: - try { - Files.delete(tempFile); - } catch (IOException ioex) { - // do nothing - it's just a temp file. - logger.warning("Could not remove temp file "+tempFile.getFileName().toString()); - } - // and return: - return datafiles; - } - - } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) { - // Shape files may have to be split into multiple files, - // one zip archive per each complete set of shape files: - - //File rezipFolder = new File(this.getFilesTempDirectory()); - File rezipFolder = getShapefileUnzipTempDirectory(); - - IngestServiceShapefileHelper shpIngestHelper; - shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder); - - boolean didProcessWork = shpIngestHelper.processFile(); - if (!(didProcessWork)){ - logger.severe("Processing of zipped shapefile failed."); - return null; - } - - try { - for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) { - FileInputStream finalFileInputStream = new FileInputStream(finalFile); - finalType = determineContentType(finalFile); - if (finalType == null) { - logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'"); - continue; - } - - File unZippedShapeTempFile = saveInputStreamInTempFile(finalFileInputStream, fileSizeLimit); - DataFile new_datafile = createSingleDataFile(version, unZippedShapeTempFile, finalFile.getName(), finalType, systemConfig.getFileFixityChecksumAlgorithm()); - if (new_datafile != null) { - datafiles.add(new_datafile); - } else { - logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName()); - } - finalFileInputStream.close(); - - } - } catch (FileExceedsMaxSizeException femsx) { - logger.severe("One of the unzipped shape files exceeded the size limit; giving up. " + femsx.getMessage()); - datafiles.clear(); - } - - // Delete the temp directory used for unzipping - // The try-catch is due to error encountered in using NFS for stocking file, - // cf. https://github.com/IQSS/dataverse/issues/5909 - try { - FileUtils.deleteDirectory(rezipFolder); - } catch (IOException ioex) { - // do nothing - it's a tempo folder. - logger.warning("Could not remove temp folder, error message : " + ioex.getMessage()); - } - - if (datafiles.size() > 0) { - // remove the uploaded zip file: - try { - Files.delete(tempFile); - } catch (IOException ioex) { - // do nothing - it's just a temp file. - logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); - } catch (SecurityException se) { - logger.warning("Unable to delete: " + tempFile.toString() + "due to Security Exception: " - + se.getMessage()); - } - return datafiles; - }else{ - logger.severe("No files added from directory of rezipped shapefiles"); - } - return null; - - } + version.getDataset().getFiles().add(datafile); + } */ + // remove the uploaded zip file: + try { + Files.delete(tempFile); + } catch (IOException ioex) { + // do nothing - it's just a temp file. + logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); + } + // and return: + return datafiles; + } + + } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) { + // Shape files may have to be split into multiple files, + // one zip archive per each complete set of shape files: + + // File rezipFolder = new File(this.getFilesTempDirectory()); + File rezipFolder = getShapefileUnzipTempDirectory(); + + IngestServiceShapefileHelper shpIngestHelper; + shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder); + + boolean didProcessWork = shpIngestHelper.processFile(); + if (!(didProcessWork)) { + logger.severe("Processing of zipped shapefile failed."); + return null; + } + + try { + for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) { + FileInputStream finalFileInputStream = new FileInputStream(finalFile); + finalType = determineContentType(finalFile); + if (finalType == null) { + logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'"); + continue; + } + + File unZippedShapeTempFile = saveInputStreamInTempFile(finalFileInputStream, fileSizeLimit); + DataFile new_datafile = createSingleDataFile(version, unZippedShapeTempFile, finalFile.getName(), finalType, systemConfig.getFileFixityChecksumAlgorithm()); + if (new_datafile != null) { + datafiles.add(new_datafile); + } else { + logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName()); + } + finalFileInputStream.close(); + + } + } catch (FileExceedsMaxSizeException femsx) { + logger.severe("One of the unzipped shape files exceeded the size limit; giving up. " + femsx.getMessage()); + datafiles.clear(); + } + + // Delete the temp directory used for unzipping + // The try-catch is due to error encountered in using NFS for stocking file, + // cf. https://github.com/IQSS/dataverse/issues/5909 + try { + FileUtils.deleteDirectory(rezipFolder); + } catch (IOException ioex) { + // do nothing - it's a tempo folder. + logger.warning("Could not remove temp folder, error message : " + ioex.getMessage()); + } + + if (datafiles.size() > 0) { + // remove the uploaded zip file: + try { + Files.delete(tempFile); + } catch (IOException ioex) { + // do nothing - it's just a temp file. + logger.warning("Could not remove temp file " + tempFile.getFileName().toString()); + } catch (SecurityException se) { + logger.warning("Unable to delete: " + tempFile.toString() + "due to Security Exception: " + + se.getMessage()); + } + return datafiles; + } else { + logger.severe("No files added from directory of rezipped shapefiles"); + } + return null; + + } + } else { + //Remote file, trust supplier + finalType = suppliedContentType; + } // Finally, if none of the special cases above were applicable (or // if we were unable to unpack an uploaded file, etc.), we'll just // create and return a single DataFile: + File newFile = null; + if(tempFile!=null) { + newFile = tempFile.toFile(); + } + ChecksumType checkSumType = DataFile.ChecksumType.MD5; + if(newStorageIdentifier==null) { + checkSumType=systemConfig.getFileFixityChecksumAlgorithm(); + } - DataFile datafile = createSingleDataFile(version, tempFile.toFile(), fileName, finalType, systemConfig.getFileFixityChecksumAlgorithm()); - - if (datafile != null && tempFile.toFile() != null) { + DataFile datafile = createSingleDataFile(version, newFile, newStorageIdentifier, fileName, finalType, checkSumType, newCheckSum); + File f = null; + if(tempFile!=null) { + f=tempFile.toFile(); + } + if (datafile != null && ((f != null) || (newStorageIdentifier!=null))) { if (warningMessage != null) { createIngestFailureReport(datafile, warningMessage); @@ -1133,14 +1150,18 @@ private static File saveInputStreamInTempFile(InputStream inputStream, Long file * individual files, etc., and once the file name and mime type have already * been figured out. */ - + private static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String fileName, String contentType, DataFile.ChecksumType checksumType) { - return createSingleDataFile(version, tempFile, fileName, contentType, checksumType, false); + return createSingleDataFile(version, tempFile, null, fileName, contentType, checksumType, null, false); + } + + private static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String storageIdentifier, String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum) { + return createSingleDataFile(version, tempFile, storageIdentifier, fileName, contentType, checksumType, checksum, false); } - private static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String fileName, String contentType, DataFile.ChecksumType checksumType, boolean addToDataset) { + private static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String storageIdentifier, String fileName, String contentType, DataFile.ChecksumType checksumType, String checksum, boolean addToDataset) { - if (tempFile == null) { + if ((tempFile == null) && (storageIdentifier == null)) { return null; } @@ -1171,20 +1192,27 @@ private static DataFile createSingleDataFile(DatasetVersion version, File tempFi fmd.setDatasetVersion(version); version.getDataset().getFiles().add(datafile); } - + if(storageIdentifier==null) { generateStorageIdentifier(datafile); if (!tempFile.renameTo(new File(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier()))) { return null; } - - try { - // We persist "SHA1" rather than "SHA-1". - datafile.setChecksumType(checksumType); - datafile.setChecksumValue(calculateChecksum(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(), datafile.getChecksumType())); - } catch (Exception cksumEx) { - logger.warning("Could not calculate " + checksumType + " signature for the new file " + fileName); + } else { + datafile.setStorageIdentifier(storageIdentifier); } + if ((checksum !=null)&&(!checksum.isEmpty())) { + datafile.setChecksumType(checksumType); + datafile.setChecksumValue(checksum); + } else { + try { + // We persist "SHA1" rather than "SHA-1". + datafile.setChecksumType(checksumType); + datafile.setChecksumValue(calculateChecksum(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(), datafile.getChecksumType())); + } catch (Exception cksumEx) { + logger.warning("Could not calculate " + checksumType + " signature for the new file " + fileName); + } + } return datafile; } @@ -1617,10 +1645,40 @@ public static DatasetThumbnail getThumbnail(DataFile file) { public static boolean isPackageFile(DataFile dataFile) { return DataFileServiceBean.MIME_TYPE_PACKAGE_FILE.equalsIgnoreCase(dataFile.getContentType()); } + + public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { + String driverId = dataset.getDataverseContext().getEffectiveStorageDriverId(); + boolean directEnabled = Boolean.getBoolean("dataverse.files." + driverId + ".upload-redirect"); + //Should only be requested when it is allowed, but we'll log a warning otherwise + if(!directEnabled) { + logger.warning("Direct upload not supported for files in this dataset: " + dataset.getId()); + return null; + } + S3AccessIO s3io = null; + String bucket = System.getProperty("dataverse.files." + driverId + ".bucket-name") + "/"; + String sid = null; + int i=0; + while (s3io==null && i<5) { + sid = bucket+ dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + FileUtil.generateStorageIdentifier(); + try { + s3io = new S3AccessIO(sid, driverId); + if(s3io.exists()) { + s3io=null; + i=i+1; + } + + } catch (Exception e) { + i=i+1; + } + } + return s3io; + } + public static String getStorageIdentifierFromLocation(String location) { int driverEnd = location.indexOf("://") + 3; int bucketEnd = driverEnd + location.substring(driverEnd).indexOf("/"); return location.substring(0,bucketEnd) + ":" + location.substring(location.lastIndexOf("/") + 1); } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 8d0cb276a93..aefb01992f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -539,14 +539,10 @@ public boolean isFilesOnDatasetPageFromSolr() { return settingsService.isTrueForKey(SettingsServiceBean.Key.FilesOnDatasetPageFromSolr, safeDefaultIfKeyNotFound); } - public Long getMaxFileUploadSize(){ - return settingsService.getValueForKeyAsLong(SettingsServiceBean.Key.MaxFileUploadSizeInBytes); + public Long getMaxFileUploadSizeForStore(String driverId){ + return settingsService.getValueForCompoundKeyAsLong(SettingsServiceBean.Key.MaxFileUploadSizeInBytes, driverId); } - public String getHumanMaxFileUploadSize(){ - return bytesToHumanReadable(getMaxFileUploadSize()); - } - public Integer getSearchHighlightFragmentSize() { String fragSize = settingsService.getValueForKey(SettingsServiceBean.Key.SearchHighlightFragmentSize); if (fragSize != null) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 35229c5162d..2f89a3742ae 100755 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -695,7 +695,6 @@ dataverse.host.autocomplete.nomatches=No matches dataverse.identifier.title=Short name used for the URL of this dataverse. dataverse.affiliation.title=The organization with which this dataverse is affiliated. dataverse.storage.title=A storage service to be used for datasets in this dataverse. -dataverse.storage.usedefault=Use Default dataverse.category=Category dataverse.category.title=The type that most closely reflects this dataverse. dataverse.type.selectTab.top=Select one... diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index db7620bab71..d907aa5aceb 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -1363,7 +1363,6 @@
- diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 811e6c4d55f..7f7d3e5c594 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -11,7 +11,9 @@ xmlns:o="http://omnifaces.org/ui" xmlns:iqbs="http://xmlns.jcp.org/jsf/composite/iqbs"> - + + +
    @@ -71,8 +73,8 @@ - + rendered="#{!EditDatafilesPage.isUnlimitedUploadFileSize()}"> +

    @@ -90,8 +92,10 @@ function uploadWidgetDropRemoveMsg() { $('div[id$="fileUpload"] div.ui-fileupload-content div#dragdropMsg').remove(); } + $(document).ready(function () { uploadWidgetDropMsg(); + setupDirectUpload(#{EditDatafilesPage.directUploadEnabled()}, #{EditDatafilesPage.workingVersion.dataset.id}); }); //]]> @@ -123,7 +127,7 @@ + + diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index b5c764c1e15..e6fd0a110b6 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -972,3 +972,12 @@ span.ui-autocomplete input.ui-autocomplete-input {width:100%;} #citation-banner {width:100%; height:45px; position: absolute; z-index: 999999; border-radius: 0; border-width: 0 0 1px 0;} #citation-banner a.close, #citation-banner a.close span.glyphicon {line-height:.2;} #citation-forward {position: absolute; top:45px; height: calc(100% - 65px); border:0; background:url(/resources/images/ajax-loading.gif) no-repeat 50% 50%;} + +/*Direct upload progress bar*/ +progress::-webkit-progress-bar { + background-color:white; +} + +progress::-webkit-progress-value { + background-color:green; +} diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index f26218913f3..126d5d47c87 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -1,42 +1,203 @@ +var fileList = []; +var observer2=null; +var datasetId=null; +//How many files have started being processed but aren't yet being uploaded +var filesInProgress=0; +//The # of the current file being processed (total number of files for which upload has at least started) +var curFile=0; +//The number of upload ids that have been assigned in the files table +var getUpId = (function () { + var counter = -1; + return function () {counter += 1; return counter} +})(); +//How many files are completely done +var finishFile = (function () { + var counter = 0; + return function () {counter += 1; return counter} +})(); + + +function setupDirectUpload(enabled, theDatasetId) { + if(enabled) { + datasetId=theDatasetId; + $('.ui-fileupload-upload').hide(); + $('.ui-fileupload-cancel').hide(); + //Catch files entered via upload dialog box. Since this 'select' widget is replaced by PF, we need to add a listener again when it is replaced + var fileInput=document.getElementById('datasetForm:fileUpload_input'); + if(fileInput !==null) { + fileInput.addEventListener('change', function(event) { + fileList=[]; + for(var i=0;i').attr('class', 'ui-progressbar ui-widget ui-widget-content ui-corner-all')); + $.ajax({ + url: url, + headers: {"x-amz-tagging":"dv-state=temp"}, + type: 'PUT', + data: file, + cache: false, + processData: false, + success: function () { + reportUpload(storageId, file) + }, + error: function(jqXHR, textStatus, errorThrown) { + + console.log('Failure: ' + jqXHR.status); + console.log('Failure: ' + errorThrown); + uploadFailure(jqXHR, thisFile); + }, + xhr: function() { + var myXhr = $.ajaxSettings.xhr(); + if(myXhr.upload) { + myXhr.upload.addEventListener('progress', function(e) { + if(e.lengthComputable) { + var doublelength = 2 * e.total; + progBar.children('progress').attr({ + value:e.loaded, + max:doublelength + }); + } + }); + } + return myXhr; + } + }); +} + +function reportUpload(storageId, file){ + console.log('S3 Upload complete for ' + file.name + ' : ' + storageId); + getMD5( + file, + prog => { + + var current = 1 + prog; + $('progress').attr({ + value:current, + max:2 + }); + } + ).then( + md5 => { + //storageId is not the location - has a : separator and no path elements from dataset + //(String uploadComponentId, String fullStorageIdentifier, String fileName, String contentType, String checksumType, String checksumValue) + handleExternalUpload([{name:'uploadComponentId', value:'datasetForm:fileUpload'}, {name:'fullStorageIdentifier', value:storageId}, {name:'fileName', value:file.name}, {name:'contentType', value:file.type}, {name:'checksumType', value:'MD5'}, {name:'checksumValue', value:md5}]); + }, + err => console.error(err) + ); +} + function removeErrors() { - var errors = document.getElementsByClassName("ui-fileupload-error"); + var errors = document.getElementsByClassName("ui-fileupload-error"); for(i=errors.length-1; i >=0; i--) { errors[i].parentNode.removeChild(errors[i]); } } + var observer=null; + function uploadStarted() { - // If this is not the first upload, remove error messages since - // the upload of any files that failed will be tried again. - removeErrors(); - var curId=0; - //Find the upload table body - var files = $('.ui-fileupload-files .ui-fileupload-row'); - //Add an id attribute to each entry so we can later match errors with the right entry - for(i=0;i< files.length;i++) { - files[i].setAttribute('upid', curId); - curId = curId+1; - } - //Setup an observer to watch for additional rows being added - var config={childList: true}; - var callback = function(mutations) { - //Add an id attribute to all new entries - mutations.forEach(function(mutation) { - for(i=0; i= fileSize) { + endCallback(null); + return; + } + readNext(); + }; + + reader.onerror = function(err) { + endCallback(err || {}); + }; + + function readNext() { + var fileSlice = file.slice(offset, offset + chunkSize); + reader.readAsBinaryString(fileSlice); + } + readNext(); +} + +function getMD5(blob, cbProgress) { + return new Promise((resolve, reject) => { + var md5 = CryptoJS.algo.MD5.create(); + readChunked(blob, (chunk, offs, total) => { + md5.update(CryptoJS.enc.Latin1.parse(chunk)); + if (cbProgress) { + cbProgress(offs / total); + } + }, err => { + if (err) { + reject(err); + } else { + // TODO: Handle errors + var hash = md5.finalize(); + var hashHex = hash.toString(CryptoJS.enc.Hex); + resolve(hashHex); + } + }); + }); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 51d3a214d5d..713fb770cb0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -446,9 +446,9 @@ public void testCreatePublishDestroyDataset() { */ boolean nameRequiredForContactToAppear = true; if (nameRequiredForContactToAppear) { - assertEquals("Finch, Fiona", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.stdyInfo.contact")); + assertEquals("Finch, Fiona", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.citation.distStmt.contact")); } else { - assertEquals("finch@mailinator.com", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.stdyInfo.contact.@email")); + assertEquals("finch@mailinator.com", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.citation.distStmt.contact.@email")); } assertEquals(datasetPersistentId, XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.docDscr.citation.titlStmt.IDNo")); @@ -659,10 +659,10 @@ public void testExcludeEmail() { exportDatasetAsDdi.then().assertThat() .statusCode(OK.getStatusCode()); - assertEquals("Dataverse, Admin", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.stdyInfo.contact")); + assertEquals("Dataverse, Admin", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.citation.distStmt.contact")); // no "sammi@sample.com" to be found https://github.com/IQSS/dataverse/issues/3443 - assertEquals("[]", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.stdyInfo.contact.@email")); - assertEquals("Sample Datasets, inc.", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.stdyInfo.contact.@affiliation")); + assertEquals("[]", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.citation.distStmt.contact.@email")); + assertEquals("Sample Datasets, inc.", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.citation.distStmt.contact.@affiliation")); assertEquals(datasetPersistentId, XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.docDscr.citation.titlStmt.IDNo")); List datasetContactsFromNativeGet = with(getDatasetJsonAfterPublishing.body().asString()).param("datasetContact", "datasetContact") diff --git a/src/test/java/edu/harvard/iq/dataverse/datasetutility/FileSizeCheckerTest.java b/src/test/java/edu/harvard/iq/dataverse/datasetutility/FileSizeCheckerTest.java index e562e3f9e0c..824dc6794fe 100644 --- a/src/test/java/edu/harvard/iq/dataverse/datasetutility/FileSizeCheckerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/datasetutility/FileSizeCheckerTest.java @@ -7,22 +7,13 @@ import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; -import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.FileSizeResponse; import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; - import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; /** * @@ -44,62 +35,4 @@ public void testBytesToHumanReadable() { assertEquals(expAns, ans); assertEquals(expLongAns, longAns); } - - @Test - public void testIsAllowedFileSize_throwsOnNull() { - FileSizeChecker fileSizeChecker = new FileSizeChecker(new SystemConfig() { - @Override - public Long getMaxFileUploadSize() { - return 1000L; - } - }); - assertThrows(NullPointerException.class, () -> { - fileSizeChecker.isAllowedFileSize(null); - }); - } - - @ParameterizedTest - @ValueSource(longs = { 0L, 999L, 1000L }) - public void testIsAllowedFileSize_allowsSmallerOrEqualFileSize(Long fileSize) { - // initialize a system config and instantiate a file size checker - // override the max file upload side to allow for testing - FileSizeChecker fileSizeChecker = new FileSizeChecker(new SystemConfig() { - @Override - public Long getMaxFileUploadSize() { - return 1000L; - } - }); - FileSizeResponse response = fileSizeChecker.isAllowedFileSize(fileSize); - assertTrue(response.fileSizeOK); - } - - @ParameterizedTest - @ValueSource(longs = { 1001L, Long.MAX_VALUE }) - public void testIsAllowedFileSize_rejectsBiggerFileSize(Long fileSize) { - // initialize a system config and instantiate a file size checker - // override the max file upload side to allow for testing - FileSizeChecker fileSizeChecker = new FileSizeChecker(new SystemConfig() { - @Override - public Long getMaxFileUploadSize() { - return 1000L; - } - }); - FileSizeResponse response = fileSizeChecker.isAllowedFileSize(fileSize); - assertFalse(response.fileSizeOK); - } - - @ParameterizedTest - @ValueSource(longs = { 0L, 1000L, Long.MAX_VALUE }) - public void testIsAllowedFileSize_allowsOnUnboundedFileSize(Long fileSize) { - // initialize a system config and instantiate a file size checker - // ensure that a max filesize is not set - FileSizeChecker unboundedFileSizeChecker = new FileSizeChecker(new SystemConfig() { - @Override - public Long getMaxFileUploadSize() { - return null; - } - }); - FileSizeResponse response = unboundedFileSizeChecker.isAllowedFileSize(fileSize); - assertTrue(response.fileSizeOK); - } } diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtilTest.java index b3155e6d7ae..a76ce8475f2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtilTest.java @@ -35,6 +35,18 @@ public void testJson2DdiNoFiles() throws Exception { assertEquals(datasetAsDdi, result); } + @Test + public void testExportDDI() throws Exception { + File datasetVersionJson = new File("src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json"); + String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + File ddiFile = new File("src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml"); + String datasetAsDdi = XmlPrinter.prettyPrintXml(new String(Files.readAllBytes(Paths.get(ddiFile.getAbsolutePath())))); + logger.info(datasetAsDdi); + String result = DdiExportUtil.datasetDtoAsJson2ddi(datasetVersionAsJson); + logger.info(result); + assertEquals(datasetAsDdi, result); + } + @Test public void testJson2ddiHasFiles() throws Exception { /** diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json new file mode 100644 index 00000000000..8930938d1af --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -0,0 +1,1067 @@ +{ + "id": 11, + "identifier": "WKUKGV", + "persistentUrl": "https://doi.org/10.5072/FK2/WKUKGV", + "protocol": "doi", + "authority": "10.5072/FK2", + "publisher": "Root", + "publicationDate": "2020-02-19", + "datasetVersion": { + "id": 2, + "versionNumber": 1, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "productionDate": "Production Date", + "lastUpdateTime": "2015-09-24T17:07:57Z", + "releaseTime": "2020-02-19", + "createTime": "2015-09-24T16:47:51Z", + "license": "CC0", + "termsOfUse": "CC0 Waiver", + "termsOfAccess": "Terms of Access", + "dataAccessPlace": "Data Access Place", + "originalArchive": "Original Archive", + "availabilityStatus": "Availability Status", + "sizeOfCollection": "Size of Collection", + "studyCompletion": "Study Completion", + "confidentialityDeclaration": "Confidentiality Declaration", + "specialPermissions": "Special Permissions", + "restrictions": "Restrictions", + "contactForAccess": "Contact for Access", + "citationRequirements": "Citation Requirements", + "depositorRequirements": "Depositor Requirements", + "conditions": "Conditions ", + "disclaimer": "Disclaimer", + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Replication Data for: Title" + }, + { + "typeName": "subtitle", + "multiple": false, + "typeClass": "primitive", + "value": "Subtitle" + }, + { + "typeName": "alternativeTitle", + "multiple": false, + "typeClass": "primitive", + "value": "Alternative Title" + }, + { + "typeName": "otherId", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "otherIdAgency": { + "typeName": "otherIdAgency", + "multiple": false, + "typeClass": "primitive", + "value": "OtherIDAgency1" + }, + "otherIdValue": { + "typeName": "otherIdValue", + "multiple": false, + "typeClass": "primitive", + "value": "OtherIDIdentifier1" + } + }, + { + "otherIdAgency": { + "typeName": "otherIdAgency", + "multiple": false, + "typeClass": "primitive", + "value": "OtherIDAgency2" + }, + "otherIdValue": { + "typeName": "otherIdValue", + "multiple": false, + "typeClass": "primitive", + "value": "OtherIDIdentifier2" + } + } + ] + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastAuthor1, FirstAuthor1" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "AuthorAffiliation1" + } + }, + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastAuthor2, FirstAuthor2" + }, + "authorAffiliation": { + "typeName": "authorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "AuthorAffiliation2" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "LastContact1, FirstContact1" + }, + "datasetContactAffiliation": { + "typeName": "datasetContactAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "ContactAffiliation1" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "ContactEmail1@mailinator.com" + } + }, + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "LastContact2, FirstContact2" + }, + "datasetContactAffiliation": { + "typeName": "datasetContactAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "ContactAffiliation2" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "ContactEmail2@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "DescriptionText 1" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1000-01-01" + } + }, + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "DescriptionText2" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1000-02-02" + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Agricultural Sciences", + "Business and Management", + "Engineering", + "Law" + ] + }, + { + "typeName": "keyword", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm1" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary1" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL1.org" + } + }, + { + "keywordValue": { + "typeName": "keywordValue", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordTerm2" + }, + "keywordVocabulary": { + "typeName": "keywordVocabulary", + "multiple": false, + "typeClass": "primitive", + "value": "KeywordVocabulary2" + }, + "keywordVocabularyURI": { + "typeName": "keywordVocabularyURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://KeywordVocabularyURL2.org" + } + } + ] + }, + { + "typeName": "publication", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationCitation1" + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "ark" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationIDNumber1" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://RelatedPublicationURL1.org" + } + }, + { + "publicationCitation": { + "typeName": "publicationCitation", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationCitation2" + }, + "publicationIDType": { + "typeName": "publicationIDType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "arXiv" + }, + "publicationIDNumber": { + "typeName": "publicationIDNumber", + "multiple": false, + "typeClass": "primitive", + "value": "RelatedPublicationIDNumber2" + }, + "publicationURL": { + "typeName": "publicationURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://RelatedPublicationURL2.org" + } + } + ] + }, + { + "typeName": "notesText", + "multiple": false, + "typeClass": "primitive", + "value": "Notes1" + }, + { + "typeName": "producer", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "producerName": { + "typeName": "producerName", + "multiple": false, + "typeClass": "primitive", + "value": "LastProducer1, FirstProducer1" + }, + "producerAffiliation": { + "typeName": "producerAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "ProducerAffiliation1" + }, + "producerAbbreviation": { + "typeName": "producerAbbreviation", + "multiple": false, + "typeClass": "primitive", + "value": "ProducerAbbreviation1" + }, + "producerURL": { + "typeName": "producerURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://ProducerURL1.org" + }, + "producerLogoURL": { + "typeName": "producerLogoURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://ProducerLogoURL1.org" + } + }, + { + "producerName": { + "typeName": "producerName", + "multiple": false, + "typeClass": "primitive", + "value": "LastProducer2, FirstProducer2" + }, + "producerAffiliation": { + "typeName": "producerAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "ProducerAffiliation2" + }, + "producerAbbreviation": { + "typeName": "producerAbbreviation", + "multiple": false, + "typeClass": "primitive", + "value": "ProducerAbbreviation2" + }, + "producerURL": { + "typeName": "producerURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://ProducerURL2.org" + }, + "producerLogoURL": { + "typeName": "producerLogoURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://ProducerLogoURL2.org" + } + } + ] + }, + { + "typeName": "productionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1003-01-01" + }, + { + "typeName": "productionPlace", + "multiple": false, + "typeClass": "primitive", + "value": "ProductionPlace" + }, + { + "typeName": "contributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Collector" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastContributor1, FirstContributor1" + } + }, + { + "contributorType": { + "typeName": "contributorType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Data Curator" + }, + "contributorName": { + "typeName": "contributorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastContributor2, FirstContributor2" + } + } + ] + }, + { + "typeName": "grantNumber", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "grantNumberAgency": { + "typeName": "grantNumberAgency", + "multiple": false, + "typeClass": "primitive", + "value": "GrantInformationGrantAgency1" + }, + "grantNumberValue": { + "typeName": "grantNumberValue", + "multiple": false, + "typeClass": "primitive", + "value": "GrantInformationGrantNumber1" + } + }, + { + "grantNumberAgency": { + "typeName": "grantNumberAgency", + "multiple": false, + "typeClass": "primitive", + "value": "GrantInformationGrantAgency2" + }, + "grantNumberValue": { + "typeName": "grantNumberValue", + "multiple": false, + "typeClass": "primitive", + "value": "GrantInformationGrantNumber2" + } + } + ] + }, + { + "typeName": "distributor", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "distributorName": { + "typeName": "distributorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastDistributor1, FirstDistributor1" + }, + "distributorAffiliation": { + "typeName": "distributorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "DistributorAffiliation1" + }, + "distributorAbbreviation": { + "typeName": "distributorAbbreviation", + "multiple": false, + "typeClass": "primitive", + "value": "DistributorAbbreviation1" + }, + "distributorURL": { + "typeName": "distributorURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://DistributorURL1.org" + }, + "distributorLogoURL": { + "typeName": "distributorLogoURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://DistributorLogoURL1.org" + } + }, + { + "distributorName": { + "typeName": "distributorName", + "multiple": false, + "typeClass": "primitive", + "value": "LastDistributor2, FirstDistributor2" + }, + "distributorAffiliation": { + "typeName": "distributorAffiliation", + "multiple": false, + "typeClass": "primitive", + "value": "DistributorAffiliation2" + }, + "distributorAbbreviation": { + "typeName": "distributorAbbreviation", + "multiple": false, + "typeClass": "primitive", + "value": "DistributorAbbreviation2" + }, + "distributorURL": { + "typeName": "distributorURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://DistributorURL2.org" + }, + "distributorLogoURL": { + "typeName": "distributorLogoURL", + "multiple": false, + "typeClass": "primitive", + "value": "http://DistributorLogoURL2.org" + } + } + ] + }, + { + "typeName": "distributionDate", + "multiple": false, + "typeClass": "primitive", + "value": "1004-01-01" + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "LastDepositor, FirstDepositor" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "1002-01-01" + }, + { + "typeName": "timePeriodCovered", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "1005-01-01" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "1005-01-02" + } + }, + { + "timePeriodCoveredStart": { + "typeName": "timePeriodCoveredStart", + "multiple": false, + "typeClass": "primitive", + "value": "1005-02-01" + }, + "timePeriodCoveredEnd": { + "typeName": "timePeriodCoveredEnd", + "multiple": false, + "typeClass": "primitive", + "value": "1005-02-02" + } + } + ] + }, + { + "typeName": "dateOfCollection", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dateOfCollectionStart": { + "typeName": "dateOfCollectionStart", + "multiple": false, + "typeClass": "primitive", + "value": "1006-01-01" + }, + "dateOfCollectionEnd": { + "typeName": "dateOfCollectionEnd", + "multiple": false, + "typeClass": "primitive", + "value": "1006-01-01" + } + }, + { + "dateOfCollectionStart": { + "typeName": "dateOfCollectionStart", + "multiple": false, + "typeClass": "primitive", + "value": "1006-02-01" + }, + "dateOfCollectionEnd": { + "typeName": "dateOfCollectionEnd", + "multiple": false, + "typeClass": "primitive", + "value": "1006-02-02" + } + } + ] + }, + { + "typeName": "kindOfData", + "multiple": true, + "typeClass": "primitive", + "value": [ + "KindOfData1", + "KindOfData2" + ] + }, + { + "typeName": "series", + "multiple": false, + "typeClass": "compound", + "value": { + "seriesName": { + "typeName": "seriesName", + "multiple": false, + "typeClass": "primitive", + "value": "SeriesName" + }, + "seriesInformation": { + "typeName": "seriesInformation", + "multiple": false, + "typeClass": "primitive", + "value": "SeriesInformation" + } + } + }, + { + "typeName": "software", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "softwareName": { + "typeName": "softwareName", + "multiple": false, + "typeClass": "primitive", + "value": "SoftwareName1" + }, + "softwareVersion": { + "typeName": "softwareVersion", + "multiple": false, + "typeClass": "primitive", + "value": "SoftwareVersion1" + } + }, + { + "softwareName": { + "typeName": "softwareName", + "multiple": false, + "typeClass": "primitive", + "value": "SoftwareName2" + }, + "softwareVersion": { + "typeName": "softwareVersion", + "multiple": false, + "typeClass": "primitive", + "value": "SoftwareVersion2" + } + } + ] + }, + { + "typeName": "relatedMaterial", + "multiple": true, + "typeClass": "primitive", + "value": [ + "RelatedMaterial1", + "RelatedMaterial2" + ] + }, + { + "typeName": "relatedDatasets", + "multiple": true, + "typeClass": "primitive", + "value": [ + "RelatedDatasets1", + "RelatedDatasets2" + ] + }, + { + "typeName": "otherReferences", + "multiple": true, + "typeClass": "primitive", + "value": [ + "OtherReferences1", + "OtherReferences2" + ] + }, + { + "typeName": "dataSources", + "multiple": true, + "typeClass": "primitive", + "value": [ + "DataSources1", + "DataSources2" + ] + }, + { + "typeName": "originOfSources", + "multiple": false, + "typeClass": "primitive", + "value": "OriginOfSources" + }, + { + "typeName": "characteristicOfSources", + "multiple": false, + "typeClass": "primitive", + "value": "CharacteristicOfSourcesNoted" + }, + { + "typeName": "accessToSources", + "multiple": false, + "typeClass": "primitive", + "value": "DocumentationAndAccessToSources" + } + ] + }, + "geospatial": { + "displayName": "Geospatial Metadata", + "fields": [ + { + "typeName": "geographicCoverage", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Afghanistan" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageStateProvince1" + }, + "city": { + "typeName": "city", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageCity1" + }, + "otherGeographicCoverage": { + "typeName": "otherGeographicCoverage", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageOther1" + } + }, + { + "country": { + "typeName": "country", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "Albania" + }, + "state": { + "typeName": "state", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageStateProvince2" + }, + "city": { + "typeName": "city", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageCity2" + }, + "otherGeographicCoverage": { + "typeName": "otherGeographicCoverage", + "multiple": false, + "typeClass": "primitive", + "value": "GeographicCoverageOther2" + } + } + ] + }, + { + "typeName": "geographicUnit", + "multiple": true, + "typeClass": "primitive", + "value": [ + "GeographicUnit1", + "GeographicUnit2" + ] + }, + { + "typeName": "geographicBoundingBox", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "westLongitude": { + "typeName": "westLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "10" + }, + "eastLongitude": { + "typeName": "eastLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "20" + }, + "northLongitude": { + "typeName": "northLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "30" + }, + "southLongitude": { + "typeName": "southLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "40" + } + }, + { + "westLongitude": { + "typeName": "westLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "50" + }, + "eastLongitude": { + "typeName": "eastLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "60" + }, + "northLongitude": { + "typeName": "northLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "70" + }, + "southLongitude": { + "typeName": "southLongitude", + "multiple": false, + "typeClass": "primitive", + "value": "80" + } + } + ] + } + ] + }, + "socialscience": { + "displayName": "Social Science and Humanities Metadata", + "fields": [ + { + "typeName": "unitOfAnalysis", + "multiple": true, + "typeClass": "primitive", + "value": [ + "UnitOfAnalysis1", + "UnitOfAnalysis2" + ] + }, + { + "typeName": "universe", + "multiple": true, + "typeClass": "primitive", + "value": [ + "Universe1", + "Universe2" + ] + }, + { + "typeName": "timeMethod", + "multiple": false, + "typeClass": "primitive", + "value": "TimeMethod" + }, + { + "typeName": "dataCollector", + "multiple": false, + "typeClass": "primitive", + "value": "LastDataCollector1, FirstDataCollector1" + }, + { + "typeName": "collectorTraining", + "multiple": false, + "typeClass": "primitive", + "value": "CollectorTraining" + }, + { + "typeName": "frequencyOfDataCollection", + "multiple": false, + "typeClass": "primitive", + "value": "Frequency" + }, + { + "typeName": "samplingProcedure", + "multiple": false, + "typeClass": "primitive", + "value": "SamplingProcedure" + }, + { + "typeName": "targetSampleSize", + "multiple": false, + "typeClass": "compound", + "value": { + "targetSampleActualSize": { + "typeName": "targetSampleActualSize", + "multiple": false, + "typeClass": "primitive", + "value": "100" + }, + "targetSampleSizeFormula": { + "typeName": "targetSampleSizeFormula", + "multiple": false, + "typeClass": "primitive", + "value": "TargetSampleSizeFormula" + } + } + }, + { + "typeName": "deviationsFromSampleDesign", + "multiple": false, + "typeClass": "primitive", + "value": "MajorDeviationsForSampleDesign" + }, + { + "typeName": "collectionMode", + "multiple": false, + "typeClass": "primitive", + "value": "CollectionMode" + }, + { + "typeName": "researchInstrument", + "multiple": false, + "typeClass": "primitive", + "value": "TypeOfResearchInstrument" + }, + { + "typeName": "dataCollectionSituation", + "multiple": false, + "typeClass": "primitive", + "value": "CharacteristicsOfDataCollectionSituation" + }, + { + "typeName": "actionsToMinimizeLoss", + "multiple": false, + "typeClass": "primitive", + "value": "ActionsToMinimizeLosses" + }, + { + "typeName": "controlOperations", + "multiple": false, + "typeClass": "primitive", + "value": "ControlOperations" + }, + { + "typeName": "weighting", + "multiple": false, + "typeClass": "primitive", + "value": "Weighting" + }, + { + "typeName": "cleaningOperations", + "multiple": false, + "typeClass": "primitive", + "value": "CleaningOperations" + }, + { + "typeName": "datasetLevelErrorNotes", + "multiple": false, + "typeClass": "primitive", + "value": "StudyLevelErrorNotes" + }, + { + "typeName": "responseRate", + "multiple": false, + "typeClass": "primitive", + "value": "ResponseRate" + }, + { + "typeName": "samplingErrorEstimates", + "multiple": false, + "typeClass": "primitive", + "value": "EstimatesOfSamplingError" + }, + { + "typeName": "otherDataAppraisal", + "multiple": false, + "typeClass": "primitive", + "value": "OtherFormsOfDataAppraisal" + }, + { + "typeName": "socialScienceNotes", + "multiple": false, + "typeClass": "compound", + "value": { + "socialScienceNotesType": { + "typeName": "socialScienceNotesType", + "multiple": false, + "typeClass": "primitive", + "value": "NotesType" + }, + "socialScienceNotesSubject": { + "typeName": "socialScienceNotesSubject", + "multiple": false, + "typeClass": "primitive", + "value": "NotesSubject" + }, + "socialScienceNotesText": { + "typeName": "socialScienceNotesText", + "multiple": false, + "typeClass": "primitive", + "value": "NotesText" + } + } + } + ] + } + }, + "files": [], + "citation": "LastAuthor1, FirstAuthor1; LastAuthor2, FirstAuthor2, 2020, \"Replication Data for: Title\", https://doi.org/10.5072/FK2/WKUKGV, Root, V1" + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml index cc7ecca7f9d..79e9e363994 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml @@ -25,14 +25,18 @@ Johnny Hawk - + + Odin Raven + Jimmy Finch + Added, Depositor + Medicine, Health and Life Sciences - Keyword Value 1 - Keyword Value Two - TC Value 1 + Keyword Value 1 + Keyword Value Two + TC Value 1 Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds. @@ -41,24 +45,28 @@ 20070831 20130630 Kind of Data - Cambridge USA + Cambridge MA Other Geographic Coverage - 41.6 - 60.3 - 59.8 - 43.8 + + 41.6 + 60.3 + 59.8 + 43.8 + - - Odin Raven - - Jimmy Finch - Added, Depositor - + + + + + + + + diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml new file mode 100644 index 00000000000..d9be1217fc9 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml @@ -0,0 +1,194 @@ + + + + + + Replication Data for: Title + doi:10.5072/FK2/WKUKGV + + + Root + 2020-02-19 + + + 1 + + LastAuthor1, FirstAuthor1; LastAuthor2, FirstAuthor2, 2020, "Replication Data for: Title", https://doi.org/10.5072/FK2/WKUKGV, Root, V1 + + + + + + Replication Data for: Title + Subtitle + Alternative Title + doi:10.5072/FK2/WKUKGV + OtherIDIdentifier1 + OtherIDIdentifier2 + + + LastAuthor1, FirstAuthor1 + LastAuthor2, FirstAuthor2 + LastContributor1, FirstContributor1 + LastContributor2, FirstContributor2 + + + LastProducer1, FirstProducer1 + LastProducer2, FirstProducer2 + 1003-01-01 + ProductionPlace + SoftwareName1 + SoftwareName2 + GrantInformationGrantNumber1 + GrantInformationGrantNumber2 + + + Root + LastDistributor1, FirstDistributor1 + LastDistributor2, FirstDistributor2 + LastContact1, FirstContact1 + LastContact2, FirstContact2 + 1004-01-01 + LastDepositor, FirstDepositor + 1002-01-01 + + + SeriesName + SeriesInformation + + + + + Agricultural Sciences + Business and Management + Engineering + Law + KeywordTerm1 + KeywordTerm2 + + DescriptionText 1 + DescriptionText2 + + 1005-01-01 + 1005-01-02 + 1005-02-01 + 1005-02-02 + 1006-01-01 + 1006-01-01 + 1006-02-01 + 1006-02-02 + KindOfData1 + KindOfData2 + Afghanistan + GeographicCoverageCity1 + GeographicCoverageStateProvince1 + GeographicCoverageOther1 + Albania + GeographicCoverageCity2 + GeographicCoverageStateProvince2 + GeographicCoverageOther2 + + 10 + 20 + 30 + 40 + + + 80 + 70 + 60 + 50 + + GeographicUnit1 + GeographicUnit2 + UnitOfAnalysis1 + UnitOfAnalysis2 + Universe1 + Universe2 + + Notes1 + + + + TimeMethod + LastDataCollector1, FirstDataCollector1 + CollectorTraining + Frequency + SamplingProcedure + + TargetSampleSizeFormula + 100 + + MajorDeviationsForSampleDesign + + DataSources1 + DataSources2 + OriginOfSources + CharacteristicOfSourcesNoted + DocumentationAndAccessToSources + + CollectionMode + TypeOfResearchInstrument + CharacteristicsOfDataCollectionSituation + ActionsToMinimizeLosses + ControlOperations + Weighting + CleaningOperations + + + ResponseRate + EstimatesOfSamplingError + OtherFormsOfDataAppraisal + + NotesText + + + CC0 Waiver + Terms of Access + + Data Access Place + Original Archive + Availability Status + Size of Collection + Study Completion + + + Confidentiality Declaration + Special Permissions + Restrictions + Contact for Access + Citation Requirements + Depositor Requirements + Conditions + Disclaimer + + + + RelatedMaterial1 + RelatedMaterial2 + RelatedDatasets1 + RelatedDatasets2 + + + + RelatedPublicationIDNumber1 + + RelatedPublicationCitation1 + + + + + + + RelatedPublicationIDNumber2 + + RelatedPublicationCitation2 + + + + OtherReferences1 + OtherReferences2 + + StudyLevelErrorNotes + +