diff --git a/Dockerfile b/Dockerfile index f2a939d..a165185 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,103 +1,30 @@ -FROM ubuntu:latest - -MAINTAINER Xavier Garnier "xavier.garnier@irisa.fr" - -# Set Virtuoso commit SHA to Virtuoso 7.2.4 release (25/04/2016) -ENV VIRTUOSO_COMMIT 96055f6a70a92c3098a7e786592f4d8ba8aae214 - -# Prerequisites -RUN apt-get update && apt-get install -y \ - #virtuoso - build-essential \ - debhelper \ - autotools-dev \ - autoconf \ - automake \ - unzip \ - wget \ - net-tools \ - git \ - libtool \ - flex \ - bison \ - gperf \ - gawk \ - m4 \ - libssl-dev \ - libreadline-dev \ - libreadline-dev \ - openssl \ - python-pip \ - #AskOmics - build-essential \ - python3 \ - python3-pip \ - python3-venv \ - vim \ - ruby \ - npm \ - nodejs-legacy - - -## VIRTUOSO ################################################################### - -# Get Virtuoso source code from GitHub and checkout specific commit -# Make and install Virtuoso (by default in /usr/local/virtuoso-opensource) -RUN git clone https://github.com/openlink/virtuoso-opensource.git \ - && cd virtuoso-opensource \ - && git checkout ${VIRTUOSO_COMMIT} \ - && ./autogen.sh \ - && CFLAGS="-O2 -m64" && export CFLAGS && ./configure --disable-bpel-vad --enable-conductor-vad --enable-fct-vad --disable-dbpedia-vad --disable-demo-vad --disable-isparql-vad --disable-ods-vad --disable-sparqldemo-vad --disable-syncml-vad --disable-tutorial-vad --with-readline --program-transform-name="s/isql/isql-v/" \ - && make && make install \ - && ln -s /usr/local/virtuoso-opensource/var/lib/virtuoso/ /var/lib/virtuoso \ - && ln -s /var/lib/virtuoso/db /data \ - && cd .. \ - && rm -r /virtuoso-opensource - -# Add Virtuoso bin to the PATH -ENV PATH /usr/local/virtuoso-opensource/bin/:$PATH - -# Add Virtuoso config -ADD virtuoso/virtuoso.ini /virtuoso.ini - -# Add dump_nquads_procedure -ADD virtuoso/dump_nquads_procedure.sql /dump_nquads_procedure.sql - -# Add Virtuoso log cleaning script -ADD virtuoso/clean-logs.sh /clean-logs.sh - -# Add startup script -ADD virtuoso/virtuoso.sh /virtuoso.sh - -# Add dum template -ADD virtuoso/dump.template.nq /dump.template.nq - -ENV SPARQL_UPDATE true - -## ASKOMICS ################################################################### - -# AskOmics github repo -ENV ASKOMICS_URL https://github.com/xgaia/askomics.git -# AskOmics commit -ENV ASKOMICS_COMMIT 2b61cdef966530e2365a9d810c05c8b2662a6999 - -RUN git clone ${ASKOMICS_URL} /usr/local/askomics/ +FROM xgaia/virtuoso +MAINTAINER Xavier Garnier 'xavier.garnier@irisa.fr' + + +# Environment variables +ENV ASKOMICS="https://github.com/xgaia/askomics.git" \ + ASKOMICS_DIR="/usr/local/askomics" \ + ASKOMICS_COMMIT="9816964e5d226888f6e4fd04e968e1fd9744adfe" + +# Copy files +COPY monitor_traffic.sh /monitor_traffic.sh +COPY start.sh /start.sh + +# Install prerequisites, clone repository and install +RUN apk add --update bash make gcc g++ zlib-dev libzip-dev bzip2-dev xz-dev git python3 python3-dev nodejs nodejs-npm wget && \ + git clone ${ASKOMICS} ${ASKOMICS_DIR} && \ + cd ${ASKOMICS_DIR} && \ + git checkout ${ASKOMICS_COMMIT} && \ + npm install gulp -g && \ + npm install --production && \ + chmod +x startAskomics.sh && \ + rm -rf /usr/local/askomics/venv && \ + bash ./startAskomics.sh -b && \ + rm -rf /var/cache/apk/* && \ + chmod +x /start.sh WORKDIR /usr/local/askomics/ -RUN git checkout ${ASKOMICS_COMMIT} - -RUN npm install gulp -g -RUN npm install -RUN chmod +x startAskomics.sh - -# Delete the local venv if exist and build the new one -RUN rm -rf /usr/local/askomics/venv && \ - ./startAskomics.sh -b - -ADD monitor_traffic.sh / -ADD start.sh ./ -RUN chmod +x start.sh /virtuoso.sh /monitor_traffic.sh - EXPOSE 6543 -CMD ["./start.sh"] +CMD ["/start.sh"] diff --git a/README.md b/README.md index edc3042..682634f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,26 @@ -# docker-askomics-virtuoso_-ie -Docker AskOmics + Virtuoso for Galaxy Interactive Environment +# Docker AskOmics + Virtuoso (For Galaxy Interactive Environment) + +![Docker Build](https://img.shields.io/docker/pulls/xgaia/docker-askomics-virtuoso-ie.svg) +[![Build Status](https://travis-ci.org/xgaia/docker-askomics-virtuoso-ie.svg?branch=master)](https://travis-ci.org/xgaia/docker-askomics-virtuoso-ie) + +AskOmics and Virtuoso dockerized + +## Pull from dockerHub + + docker pull xgaia/askomics-ie + +## Or build + + # Clone the repo + git clone https://github.com/xgaia/docker-askomics-virtuoso-ie.git + cd docker-askomics-virtuoso-ie + docker build -t askomics-ie . + +## Run + + docker run --name myAskOmics \ + -p 6543:6543 \ + d xgaia/askomics-ie + + +This image is for the Galaxy Interactive Environment. To use AskOmics alone, please use [this image](https://github.com/askomics/docker-askomics) or [this docker compose](https://github.com/askomics/askomics-docker-compose). diff --git a/monitor_traffic.sh b/monitor_traffic.sh index 2e8a6d9..ab5ca35 100644 --- a/monitor_traffic.sh +++ b/monitor_traffic.sh @@ -1,6 +1,6 @@ #!/bin/bash while true; do - sleep 1h # Set to 1h for testing, change to 1m in production mode + sleep 1m if [ `netstat -t | grep -v CLOSE_WAIT | grep ':6543' | wc -l` -lt 3 ] then pkill python3 diff --git a/start.sh b/start.sh index 0b73063..38009b4 100644 --- a/start.sh +++ b/start.sh @@ -28,9 +28,11 @@ mkdir -p $ASKOMICS_FILES_DIR/upload ln -s /import $ASKOMICS_FILES_DIR/upload/$username # Monitor traffic +chmod +x /monitor_traffic.sh /monitor_traffic.sh & # Start Virtuoso +chmod +x /virtuoso.sh /virtuoso.sh & # Wait for virtuoso to be up @@ -39,4 +41,4 @@ while ! wget -o /dev/null http://localhost:8890/conductor; do done # Start AskOmics -./startAskomics.sh -r -d dev +${ASKOMICS_DIR}/startAskomics.sh -r -d dev diff --git a/virtuoso/clean-logs.sh b/virtuoso/clean-logs.sh deleted file mode 100644 index aa380f4..0000000 --- a/virtuoso/clean-logs.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -echo "$(date) Cleaning Virtuoso HTTP logs. Only keep 5 most recent files" -find . -maxdepth 1 -type f -name 'http*.log' | xargs -x ls -t | awk 'NR>5' | xargs -L1 rm diff --git a/virtuoso/dump.template.nq b/virtuoso/dump.template.nq deleted file mode 100644 index c791035..0000000 --- a/virtuoso/dump.template.nq +++ /dev/null @@ -1,19 +0,0 @@ - . - . - . - "__USERNAME__" . - "__PASSWORD_HASH__" . - "true"^^ . - "false"^^ . - "__SALT__" . - - . - . - "__ASKOMICS_KEY_NAME__" . - "__ASKOMICS_API_KEY__" . - - - . - . - "__GALAXY_URL__" . - "__GALAXY_KEY__" . \ No newline at end of file diff --git a/virtuoso/dump_nquads_procedure.sql b/virtuoso/dump_nquads_procedure.sql deleted file mode 100644 index 7cfa542..0000000 --- a/virtuoso/dump_nquads_procedure.sql +++ /dev/null @@ -1,47 +0,0 @@ -create procedure dump_nquads (in dir varchar := 'dumps', in start_from int := 1, in file_length_limit integer := 100000000, in comp int := 1) -{ - declare inx, ses_len int; - declare file_name varchar; - declare env, ses any; - - inx := start_from; - set isolation = 'uncommitted'; - env := vector (0,0,0); - ses := string_output (10000000); - for (select * from (sparql define input:storage "" select ?s ?p ?o ?g { graph ?g { ?s ?p ?o } . filter ( ?g != virtrdf: ) } ) as sub option (loop)) do - { - declare exit handler for sqlstate '22023' - { - goto next; - }; - http_nquad (env, "s", "p", "o", "g", ses); - ses_len := length (ses); - if (ses_len >= file_length_limit) - { - file_name := sprintf ('%s/output%06d.nq', dir, inx); - string_to_file (file_name, ses, -2); - if (comp) - { - gz_compress_file (file_name, file_name||'.gz'); - file_delete (file_name); - } - inx := inx + 1; - env := vector (0,0,0); - ses := string_output (10000000); - } - next:; - } - if (length (ses)) - { - file_name := sprintf ('%s/output%06d.nq', dir, inx); - string_to_file (file_name, ses, -2); - if (comp) - { - gz_compress_file (file_name, file_name||'.gz'); - file_delete (file_name); - } - inx := inx + 1; - env := vector (0,0,0); - } -} -; diff --git a/virtuoso/virtuoso.ini b/virtuoso/virtuoso.ini deleted file mode 100644 index 573f996..0000000 --- a/virtuoso/virtuoso.ini +++ /dev/null @@ -1,260 +0,0 @@ -; -; virtuoso.ini -; -; Configuration file for the OpenLink Virtuoso VDBMS Server -; -; To learn more about this product, or any other product in our -; portfolio, please check out our web site at: -; -; http://virtuoso.openlinksw.com/ -; -; or contact us at: -; -; general.information@openlinksw.com -; -; If you have any technical questions, please contact our support -; staff at: -; -; technical.support@openlinksw.com -; - -; -; Database setup -; -[Database] -DatabaseFile = /usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.db -ErrorLogFile = /usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.log -LockFile = /usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.lck -TransactionFile = /usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.trx -xa_persistent_file = /usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.pxa -ErrorLogLevel = 7 -FileExtend = 200 -MaxCheckpointRemap = 2000 -Striping = 0 -TempStorage = TempDatabase - - -[TempDatabase] -DatabaseFile = /usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso-temp.db -TransactionFile = /usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso-temp.trx -MaxCheckpointRemap = 2000 -Striping = 0 - - -; -; Server parameters -; -[Parameters] -ServerPort = 1111 -LiteMode = 0 -DisableUnixSocket = 1 -DisableTcpSocket = 0 -;SSLServerPort = 2111 -;SSLCertificate = cert.pem -;SSLPrivateKey = pk.pem -;X509ClientVerify = 0 -;X509ClientVerifyDepth = 0 -;X509ClientVerifyCAFile = ca.pem -MaxClientConnections = 10 -CheckpointInterval = 60 -O_DIRECT = 0 -CaseMode = 2 -MaxStaticCursorRows = 5000 -CheckpointAuditTrail = 0 -AllowOSCalls = 0 -SchedulerInterval = 10 -DirsAllowed = ., /usr/local/virtuoso-opensource/share/virtuoso/vad -ThreadCleanupInterval = 10 -ThreadThreshold = 10 -ResourcesCleanupInterval = 10 -FreeTextBatchSize = 100000 -SingleCPU = 0 -VADInstallDir = /usr/local/virtuoso-opensource/share/virtuoso/vad/ -PrefixResultNames = 0 -RdfFreeTextRulesSize = 100 -IndexTreeMaps = 256 -MaxMemPoolSize = 200000000 -PrefixResultNames = 0 -MacSpotlight = 0 -IndexTreeMaps = 64 -MaxQueryMem = 2G ; memory allocated to query processor -VectorSize = 1000 ; initial parallel query vector (array of query operations) size -MaxVectorSize = 1000000 ; query vector size threshold. -AdjustVectorSize = 0 -ThreadsPerQuery = 4 -AsyncQueueMaxThreads = 10 -;; -;; When running with large data sets, one should configure the Virtuoso -;; process to use between 2/3 to 3/5 of free system memory and to stripe -;; storage on all available disks. -;; -;; Uncomment next two lines if there is 2 GB system memory free -;NumberOfBuffers = 170000 -;MaxDirtyBuffers = 130000 -;; Uncomment next two lines if there is 4 GB system memory free -;NumberOfBuffers = 340000 -; MaxDirtyBuffers = 250000 -;; Uncomment next two lines if there is 8 GB system memory free -;NumberOfBuffers = 680000 -;MaxDirtyBuffers = 500000 -;; Uncomment next two lines if there is 16 GB system memory free -;NumberOfBuffers = 1360000 -;MaxDirtyBuffers = 1000000 -;; Uncomment next two lines if there is 32 GB system memory free -;NumberOfBuffers = 2720000 -;MaxDirtyBuffers = 2000000 -;; Uncomment next two lines if there is 48 GB system memory free -;NumberOfBuffers = 4000000 -;MaxDirtyBuffers = 3000000 -;; Uncomment next two lines if there is 64 GB system memory free -;NumberOfBuffers = 5450000 -;MaxDirtyBuffers = 4000000 -;; -;; Note the default settings will take very little memory -;; but will not result in very good performance -;; -NumberOfBuffers = 10000 -MaxDirtyBuffers = 6000 - - -[HTTPServer] -ServerPort = 8890 -ServerRoot = /usr/local/virtuoso-opensource/var/lib/virtuoso/vsp -MaxClientConnections = 10 -DavRoot = DAV -EnabledDavVSP = 0 -HTTPProxyEnabled = 0 -TempASPXDir = 0 -DefaultMailServer = localhost:25 -ServerThreads = 10 -MaxKeepAlives = 10 -KeepAliveTimeout = 10 -MaxCachedProxyConnections = 10 -ProxyConnectionCacheTimeout = 15 -HTTPThreadSize = 280000 -HttpPrintWarningsInOutput = 0 -Charset = UTF-8 -;;HTTPLogFile = http.log -MaintenancePage = atomic.html -EnabledGzipContent = 1 - - -[AutoRepair] -BadParentLinks = 0 - -[Client] -SQL_PREFETCH_ROWS = 100 -SQL_PREFETCH_BYTES = 16000 -SQL_QUERY_TIMEOUT = 0 -SQL_TXN_TIMEOUT = 0 -;SQL_NO_CHAR_C_ESCAPE = 1 -;SQL_UTF8_EXECS = 0 -;SQL_NO_SYSTEM_TABLES = 0 -;SQL_BINARY_TIMESTAMP = 1 -;SQL_ENCRYPTION_ON_PASSWORD = -1 - -[VDB] -ArrayOptimization = 0 -NumArrayParameters = 10 -VDBDisconnectTimeout = 1000 -KeepConnectionOnFixedThread = 0 - -[Replication] -ServerName = db-D602566B774E -ServerEnable = 1 -QueueMax = 50000 - - -; -; Striping setup -; -; These parameters have only effect when Striping is set to 1 in the -; [Database] section, in which case the DatabaseFile parameter is ignored. -; -; With striping, the database is spawned across multiple segments -; where each segment can have multiple stripes. -; -; Format of the lines below: -; Segment = , [, .. ] -; -; must be ordered from 1 up. -; -; The is the total size of the segment which is equally divided -; across all stripes forming the segment. Its specification can be in -; gigabytes (g), megabytes (m), kilobytes (k) or in database blocks -; (b, the default) -; -; Note that the segment size must be a multiple of the database page size -; which is currently 8k. Also, the segment size must be divisible by the -; number of stripe files forming the segment. -; -; The example below creates a 200 meg database striped on two segments -; with two stripes of 50 meg and one of 100 meg. -; -; You can always add more segments to the configuration, but once -; added, do not change the setup. -; -[Striping] -Segment1 = 100M, db-seg1-1.db, db-seg1-2.db -Segment2 = 100M, db-seg2-1.db -;... - -;[TempStriping] -;Segment1 = 100M, db-seg1-1.db, db-seg1-2.db -;Segment2 = 100M, db-seg2-1.db -;... - -;[Ucms] -;UcmPath = -;Ucm1 = -;Ucm2 = -;... - - -[Zero Config] -ServerName = virtuoso (D602566B774E) -;ServerDSN = ZDSN -;SSLServerName = -;SSLServerDSN = - - -[Mono] -;MONO_TRACE = Off -;MONO_PATH = -;MONO_ROOT = -;MONO_CFG_DIR = -;virtclr.dll = - - -[URIQA] -DynamicLocal = 0 -DefaultHost = localhost:8890 - - -[SPARQL] -;ExternalQuerySource = 1 -;ExternalXsltSource = 1 -;DefaultGraph = http://localhost:8890/dataspace -;ImmutableGraphs = http://localhost:8890/dataspace -ResultSetMaxRows = 10000 -MaxQueryCostEstimationTime = 400 ; in seconds -MaxQueryExecutionTime = 60 ; in seconds -DefaultQuery = select distinct ?Concept where {[] a ?Concept} LIMIT 100 -DeferInferenceRulesInit = 0 ; controls inference rules loading -;PingService = http://rpc.pingthesemanticweb.com/ - - -[Plugins] -LoadPath = /usr/local/virtuoso-opensource/lib/virtuoso/hosting -;Load1 = plain, wikiv -;Load2 = plain, mediawiki -;Load3 = plain, creolewiki -;Load4 = plain, im -;Load5 = plain, wbxml2 -;Load6 = plain, hslookup -;Load7 = attach, libphp5.so -;Load8 = Hosting, hosting_php.so -;Load9 = Hosting,hosting_perl.so -;Load10 = Hosting,hosting_python.so -;Load11 = Hosting,hosting_ruby.so -;Load12 = msdtc,msdtc_sample diff --git a/virtuoso/virtuoso.sh b/virtuoso/virtuoso.sh deleted file mode 100644 index a7db90a..0000000 --- a/virtuoso/virtuoso.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -cd /data - -mkdir -p dumps - -if [ ! -f ./virtuoso.ini ]; -then - mv /virtuoso.ini . 2>/dev/null -fi - -chmod +x /clean-logs.sh -mv /clean-logs.sh . 2>/dev/null - -if [ ! -f ".config_set" ]; -then - echo "Converting environment variables to ini file" - printenv | grep -P "^VIRT_" | while read setting - do - section=`echo "$setting" | grep -o -P "^VIRT_[^_]+" | sed 's/^.\{5\}//g'` - key=`echo "$setting" | grep -o -P "_[^_]+=" | sed 's/[_=]//g'` - value=`echo "$setting" | grep -o -P "=.*$" | sed 's/^=//g'` - echo "Registering $section[$key] to be $value" - crudini --set virtuoso.ini $section $key $value - done - echo `date +%Y-%m%-dT%H:%M:%S%:z` > .config_set - echo "Finished converting environment variables to ini file" -fi - -if [ ! -f ".dba_pwd_set" ]; -then - touch /sql-query.sql - if [ "$DBA_PASSWORD" ]; then echo "user_set_password('dba', '$DBA_PASSWORD');" >> /sql-query.sql ; fi - if [ "$SPARQL_UPDATE" = "true" ]; then echo "GRANT SPARQL_UPDATE to \"SPARQL\";" >> /sql-query.sql ; fi - virtuoso-t +wait && isql-v -U dba -P dba < /dump_nquads_procedure.sql && isql-v -U dba -P dba < /sql-query.sql - kill $(ps aux | grep '[v]irtuoso-t' | awk '{print $2}') - echo `date +%Y-%m-%dT%H:%M:%S%:z` > .dba_pwd_set -fi - -if [ ! -f ".data_loaded" -a -d "toLoad" ] ; -then - echo "Start data loading from toLoad folder" - pwd="dba" - graph="http://localhost:8890/DAV" - - if [ "$DBA_PASSWORD" ]; then pwd="$DBA_PASSWORD" ; fi - if [ "$DEFAULT_GRAPH" ]; then graph="$DEFAULT_GRAPH" ; fi - echo "ld_dir('toLoad', '*', '$graph');" >> /load_data.sql - echo "rdf_loader_run();" >> /load_data.sql - echo "exec('checkpoint');" >> /load_data.sql - echo "WAIT_FOR_CHILDREN; " >> /load_data.sql - echo "$(cat /load_data.sql)" - virtuoso-t +wait && isql-v -U dba -P "$pwd" < /load_data.sql - kill $(ps aux | grep '[v]irtuoso-t' | awk '{print $2}') - echo `date +%Y-%m-%dT%H:%M:%S%:z` > .data_loaded -fi - -exec virtuoso-t +wait +foreground