diff --git a/Dockerfile b/Dockerfile index 3211f51..de10e18 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,7 @@ RUN apk add --no-cache --update openssl py-pip fuse libcurl libxml2 && \ ln -s /usr/local/virtuoso-opensource/var/lib/virtuoso/db /data COPY --from=builder /usr/local/virtuoso-opensource /usr/local/virtuoso-opensource -COPY virtuoso.ini dump_nquads_procedure.sql clean-logs.sh virtuoso.sh /virtuoso/ +COPY virtuoso.ini dump_nquads_procedure.sql dump_one_graph_procedure.sql clean-logs.sh virtuoso.sh /virtuoso/ WORKDIR /data EXPOSE 8890 1111 diff --git a/README.md b/README.md index f348471..b4a724c 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,14 @@ isql-v -U dba -P $DBA_PASSWORD SQL> dump_nquads ('dumps', 1, 10000000, 1); ``` +You can also use the `dump_one_graph` procedure if you need to dump a specific graph + +```bash +docker exec -it my-virtuoso sh +isql-v -U dba -P $DBA_PASSWORD +SQL> dump_one_graph ('http://daas.openlinksw.com/data#', 'dumps', 1000000000); +``` + For more information, see http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/VirtRDFDumpNQuad ## Loading quads in Virtuoso diff --git a/dump_one_graph_procedure.sql b/dump_one_graph_procedure.sql new file mode 100644 index 0000000..d70f744 --- /dev/null +++ b/dump_one_graph_procedure.sql @@ -0,0 +1,71 @@ +CREATE PROCEDURE dump_one_graph + ( IN srcgraph VARCHAR + , IN out_file VARCHAR + , IN file_length_limit INTEGER := 1000000000 + ) + { + DECLARE file_name VARCHAR; + DECLARE env, ses ANY; + DECLARE ses_len + , max_ses_len + , file_len + , file_idx INTEGER; + SET ISOLATION = 'uncommitted'; + max_ses_len := 10000000; + file_len := 0; + file_idx := 1; + file_name := sprintf ('%s%06d.ttl', out_file, file_idx); + string_to_file ( file_name || '.graph', + srcgraph, + -2 + ); + string_to_file ( file_name, + sprintf ( '# Dump of graph <%s>, as of %s\n@base <> .\n', + srcgraph, + CAST (NOW() AS VARCHAR) + ), + -2 + ); + env := vector (dict_new (16000), 0, '', '', '', 0, 0, 0, 0, 0); + ses := string_output (); + FOR (SELECT * FROM ( SPARQL DEFINE input:storage "" + SELECT ?s ?p ?o { GRAPH `iri(?:srcgraph)` { ?s ?p ?o } } + ) AS sub OPTION (LOOP)) DO + { + http_ttl_triple (env, "s", "p", "o", ses); + ses_len := length (ses); + IF (ses_len > max_ses_len) + { + file_len := file_len + ses_len; + IF (file_len > file_length_limit) + { + http (' .\n', ses); + string_to_file (file_name, ses, -1); + gz_compress_file (file_name, file_name||'.gz'); + file_delete (file_name); + file_len := 0; + file_idx := file_idx + 1; + file_name := sprintf ('%s%06d.ttl', out_file, file_idx); + string_to_file ( file_name, + sprintf ( '# Dump of graph <%s>, as of %s (part %d)\n@base <> .\n', + srcgraph, + CAST (NOW() AS VARCHAR), + file_idx), + -2 + ); + env := VECTOR (dict_new (16000), 0, '', '', '', 0, 0, 0, 0, 0); + } + ELSE + string_to_file (file_name, ses, -1); + ses := string_output (); + } + } + IF (LENGTH (ses)) + { + http (' .\n', ses); + string_to_file (file_name, ses, -1); + gz_compress_file (file_name, file_name||'.gz'); + file_delete (file_name); + } + } +; diff --git a/virtuoso.sh b/virtuoso.sh index 2e90a0b..dbfdf20 100755 --- a/virtuoso.sh +++ b/virtuoso.sh @@ -35,7 +35,10 @@ echo "Updating dba password and sparql update..." if [ "$DBA_PASSWORD" ]; then echo "user_set_password('dba', '$DBA_PASSWORD');" >> /sql-query.sql ; fi if [ "$SPARQL_UPDATE" = "true" ]; then echo 'GRANT SPARQL_UPDATE to "SPARQL";' >> /sql-query.sql ; fi if [ "$SPARQL_UPDATE" = "true" ]; then echo 'GRANT execute on "DB.DBA.L_O_LOOK_NE" to "SPARQL";' >> /sql-query.sql ; fi -virtuoso-t +configfile ${CONFIG_FILE} +wait && isql-v -U dba -P dba < /virtuoso/dump_nquads_procedure.sql && isql-v -U dba -P dba < /sql-query.sql +virtuoso-t +configfile ${CONFIG_FILE} +wait +isql-v -U dba -P dba < /virtuoso/dump_nquads_procedure.sql +isql-v -U dba -P dba < /virtuoso/dump_one_graph_procedure.sql +isql-v -U dba -P dba < /sql-query.sql kill $(ps ax | egrep '[v]irtuoso-t' | awk '{print $1}') # Make sure killing is done