diff --git a/Changes b/Changes index 4cce76f..ff85d57 100644 --- a/Changes +++ b/Changes @@ -1,7 +1,9 @@ -0.45 2022-02-24 +0.45 2022-03-04 - Due to problems installing Archive::Tar::Builder in certain environments, this is now optional, with a pure perl fallback archiver. + - Support externalLink and internalLink universally in + i5 meta data. 0.44 2022-02-17 - Improve Gingko Metadata support. diff --git a/lib/KorAP/XML/Meta/Gingko.pm b/lib/KorAP/XML/Meta/Gingko.pm index 83ac9ac..7f38de9 100644 --- a/lib/KorAP/XML/Meta/Gingko.pm +++ b/lib/KorAP/XML/Meta/Gingko.pm @@ -69,15 +69,17 @@ sub parse { $self->{S_gingko_collection_short} = $temp if $temp; }; - if ($temp = $mono->at('biblNote[n="url"]')) { - $temp = $squish->($temp->all_text); - $self->{A_external_link} = $self->korap_data_uri($temp, title => 'Gingko-Webseite an der Universität Leipzig'); - }; - - if ($temp = $mono->at('biblNote[n="url.ids"]')) { - $temp = $squish->($temp->all_text); - $self->{A_internal_link} = $self->korap_data_uri($temp, title => 'IDS webpage on Gingko in the DeReKo archive'); - }; +# if ($temp = $mono->at('biblNote[n="url"]')) { +# my $title = $temp->attr('rend') || 'Gingko-Webseite an der Universität Leipzig'; +# $temp = $squish->($temp->all_text); +# $self->{A_external_link} = $self->korap_data_uri($temp, title => $title); +# }; + +# if ($temp = $mono->at('biblNote[n="url.ids"]')) { +# my $title = $temp->attr('rend') || 'IDS webpage on Gingko in the DeReKo archive'; +# $temp = $squish->($temp->all_text); +# $self->{A_internal_link} = $self->korap_data_uri($temp, title => $title); +# }; }; }; diff --git a/lib/KorAP/XML/Meta/I5.pm b/lib/KorAP/XML/Meta/I5.pm index 928b9be..295485d 100644 --- a/lib/KorAP/XML/Meta/I5.pm +++ b/lib/KorAP/XML/Meta/I5.pm @@ -135,6 +135,18 @@ sub parse { $sub_title = $sub_title ? _squish $sub_title->all_text : undef; $author = $author ? _squish $author->all_text : undef; + if (my $temp = $analytic->at('biblNote[n="url"]')) { + my $url = _squish $temp->all_text; + my $title = $temp->attr('rend') || $url; + $self->{"A_${type}_external_link"} = $self->korap_data_uri($url, title => $title); + }; + + if (my $temp = $analytic->at('biblNote[n="url.ids"]')) { + my $url = _squish $temp->all_text; + my $title = $temp->attr('rend') || $url; + $self->{"A_${type}_internal_link"} = $self->korap_data_uri($url, title => $title); + }; + # Text meta data if ($type eq 'text') { unless ($self->{T_title} || $self->{T_sub_title}) { diff --git a/script/korapxml2krill b/script/korapxml2krill index a4a8276..5ed2af1 100644 --- a/script/korapxml2krill +++ b/script/korapxml2krill @@ -160,7 +160,7 @@ use Fcntl qw(:flock SEEK_END); # - Introduced support for Gingko # ---------------------------------------------------------- -our $LAST_CHANGE = '2022/02/24'; +our $LAST_CHANGE = '2022/03/04'; our $LOCAL = $FindBin::Bin; our $KORAL_VERSION = 0.03; our $VERSION_MSG = <<"VERSION"; diff --git a/t/real/corpus/Gingko/ATZ07/JAN/00001/base/tokens.xml b/t/real/corpus/Gingko/ATZ07/JAN/00001/base/tokens.xml new file mode 100644 index 0000000..0c2d815 --- /dev/null +++ b/t/real/corpus/Gingko/ATZ07/JAN/00001/base/tokens.xml @@ -0,0 +1,2201 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/t/real/corpus/Gingko/ATZ07/JAN/00001/struct/structure.xml b/t/real/corpus/Gingko/ATZ07/JAN/00001/struct/structure.xml index 1c624de..1b8a4a3 100644 --- a/t/real/corpus/Gingko/ATZ07/JAN/00001/struct/structure.xml +++ b/t/real/corpus/Gingko/ATZ07/JAN/00001/struct/structure.xml @@ -32,7 +32,7 @@ head - + main diff --git a/t/real/corpus/Gingko/ATZ07/JAN/header.xml b/t/real/corpus/Gingko/ATZ07/JAN/header.xml index 89bedcd..fe5b813 100644 --- a/t/real/corpus/Gingko/ATZ07/JAN/header.xml +++ b/t/real/corpus/Gingko/ATZ07/JAN/header.xml @@ -24,8 +24,8 @@ 2007 - http://www.uni-leipzig.de/gingko/ - https://www.ids-mannheim.de/digspra/kl/projekte/korpora/archiv-1/gingko/ + http://www.uni-leipzig.de/gingko/ + https://www.ids-mannheim.de/digspra/kl/projekte/korpora/archiv-1/gingko/ diff --git a/t/real/corpus/Gingko/ATZ07/header.xml b/t/real/corpus/Gingko/ATZ07/header.xml index c1eb061..b6ff79b 100644 --- a/t/real/corpus/Gingko/ATZ07/header.xml +++ b/t/real/corpus/Gingko/ATZ07/header.xml @@ -52,8 +52,8 @@ Leipzig 2021 - http://www.uni-leipzig.de/gingko/ - https://www.ids-mannheim.de/digspra/kl/projekte/korpora/archiv-1/gingko/ + http://www.uni-leipzig.de/gingko/ + https://www.ids-mannheim.de/digspra/kl/projekte/korpora/archiv-1/gingko/ Gingko - Geschriebenes Ingenieurwissenschaftliches Korpus Gingko diff --git a/t/real/gingko.t b/t/real/gingko.t index 6895c19..b124d20 100644 --- a/t/real/gingko.t +++ b/t/real/gingko.t @@ -82,8 +82,8 @@ is($meta->{T_gingko_collection}, 'Gingko - Geschriebenes Ingenieurwissenschaftli is($meta->{S_gingko_collection_short}, 'Gingko'); is($meta->{A_gingko_article_DOI}, 'data:application/x.korap-link;title=doi%3A10.1007%2FBF03221854,https%3A%2F%2Fdoi.org%2F10.1007%2FBF03221854', 'Gingko Article DOI'); is($meta->{I_gingko_text_tokens}, '2191', 'Gingko Text Tokens'); -is($meta->{A_internal_link}, 'data:application/x.korap-link;title=IDS%20webpage%20on%20Gingko%20in%20the%20DeReKo%20archive,https%3A%2F%2Fwww.ids-mannheim.de%2Fdigspra%2Fkl%2Fprojekte%2Fkorpora%2Farchiv-1%2Fgingko%2F', 'Gingko Internal Link'); -is($meta->{A_external_link}, 'data:application/x.korap-link;title=Gingko-Webseite%20an%20der%20Universit%C3%A4t%20Leipzig,http%3A%2F%2Fwww.uni-leipzig.de%2Fgingko%2F', 'Gingko External Link'); +is($meta->{A_corpus_internal_link}, 'data:application/x.korap-link;title=IDS%20webpage%20on%20Gingko%20in%20the%20DeReKo%20archive,https%3A%2F%2Fwww.ids-mannheim.de%2Fdigspra%2Fkl%2Fprojekte%2Fkorpora%2Farchiv-1%2Fgingko%2F', 'Gingko Internal Link'); +is($meta->{A_corpus_external_link}, 'data:application/x.korap-link;title=Gingko-Webseite%20an%20der%20Universit%C3%A4t%20Leipzig,http%3A%2F%2Fwww.uni-leipzig.de%2Fgingko%2F', 'Gingko External Link'); # Tokenization