diff --git a/Changes b/Changes index e646130..c6863f2 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,16 @@ Revision history for Perl extension URI::PackageURL. +2.03 2023-11-09 + - Improved validation during encode and decode "purl" string + - Fixed CPAN repository URL + - FIX Qualifiers are case sensitive + (giterlizzi/perl-URI-PackageURL#4) + - FIX PURLs containing multiple namespaces segments parse incorrectly + (giterlizzi/perl-URI-PackageURL#5) + - FIX Incorrect parsing of PURLs that begin with "pkg:/" + (giterlizzi/perl-URI-PackageURL#6) + - Improved "t/99-official-purl-test-suite.t" test + 2.02 2023-09-22 - Added core "JSON" module prerequisite in Makefile.PL (#4) diff --git a/README.md b/README.md index 1cbca98..7710261 100644 --- a/README.md +++ b/README.md @@ -10,19 +10,66 @@ use URI::PackageURL; # OO-interface # Encode components in PackageURL string -$purl = URI::PackageURL->new(type => cpan, namespace => 'GDT', name => 'URI-PackageURL', version => '2.02'); +$purl = URI::PackageURL->new(type => cpan, namespace => 'GDT', name => 'URI-PackageURL', version => '2.03'); -say $purl; # pkg:cpan/GDT/URI-PackageURL@2.02 +say $purl; # pkg:cpan/GDT/URI-PackageURL@2.03 # Parse PackageURL string -$purl = URI::PackageURL->from_string('pkg:cpan/GDT/URI-PackageURL@2.02'); +$purl = URI::PackageURL->from_string('pkg:cpan/GDT/URI-PackageURL@2.03'); # exported funtions -$purl = decode_purl('pkg:cpan/GDT/URI-PackageURL@2.02'); +$purl = decode_purl('pkg:cpan/GDT/URI-PackageURL@2.03'); say $purl->type; # cpan -$purl_string = encode_purl(type => cpan, namespace => 'GDT', name => 'URI::PackageURL', version => '2.02'); +$purl_string = encode_purl(type => cpan, namespace => 'GDT', name => 'URI::PackageURL', version => '2.03'); +``` + + +## purl-tool a CLI for URI::PackageURL module + +Inspect and export "purl" string in various formats (JSON, YAML, Data::Dumper, ENV): + +```console +$ purl-tool pkg:cpan/GDT/URI-PackageURL@2.03 --json | jq +{ + "name": "URI-PackageURL", + "namespace": "GDT", + "qualifiers": {}, + "subpath": null, + "type": "cpan", + "version": "2.03" +} +``` + + +Download package using "purl" string: + +```console +$ wget $(purl-tool pkg:cpan/GDT/URI-PackageURL@2.03 --download-url) +``` + + +Use "purl" string in your shell-scripts: + +```.bash +#!bash + +set -e + +PURL="pkg:cpan/GDT/URI-PackageURL@2.03" + +eval $(purl-tool "$PURL" --env) + +echo "Download $PURL_NAME $PURL_VERSION" +wget $PURL_DOWNLOAD_URL + +echo "Build and install module $PURL_NAME $PURL_VERSION" +tar xvf $PURL_NAME-$PURL_VERSION.tar.gz + +cd $PURL_NAME-$PURL_VERSION +perl Makefile.PL +make && make install ``` ## Install @@ -45,6 +92,7 @@ Using App::cpanminus: - `perldoc URI::PackageURL` - https://metacpan.org/release/URI-PackageURL + - /~https://github.com/package-url/purl-spec ## Copyright diff --git a/bin/purl-tool b/bin/purl-tool index 84cbb2d..a2df3f4 100644 --- a/bin/purl-tool +++ b/bin/purl-tool @@ -40,9 +40,9 @@ purl-tool - PackageURL tool Examples: - purl-tool pkg:cpan/GDT/URI-PackageURL@2.02 --json | jq + purl-tool pkg:cpan/GDT/URI-PackageURL@2.03 --json | jq - wget $(purl-tool pkg:cpan/GDT/URI-PackageURL@2.02 --download-url) + wget $(purl-tool pkg:cpan/GDT/URI-PackageURL@2.03 --download-url) =head1 DESCRIPTION @@ -53,7 +53,7 @@ C PackageURL tool Parse the given PackageURL string and return JSON and send the STDOUT to L: -B +B Download the package from the repository using PackageURL string: diff --git a/lib/URI/PackageURL.pm b/lib/URI/PackageURL.pm index ae366ce..20e671d 100644 --- a/lib/URI/PackageURL.pm +++ b/lib/URI/PackageURL.pm @@ -9,9 +9,11 @@ use Carp; use Exporter qw(import); use URI::PackageURL::Util qw(purl_to_urls); +use constant PURL_DEBUG => $ENV{PURL_DEBUG}; + use overload '""' => 'to_string', fallback => 1; -our $VERSION = '2.02'; +our $VERSION = '2.03'; our @EXPORT = qw(encode_purl decode_purl); my $PURL_REGEXP = qr{^pkg:[A-Za-z\\.\\-\\+][A-Za-z0-9\\.\\-\\+]*/.+}; @@ -28,6 +30,8 @@ sub new { my $qualifiers = delete $params{qualifiers} // {}; my $subpath = delete $params{subpath}; + Carp::croak "Invalid PackageURL: '$scheme' is not a valid scheme" if (!$scheme eq 'pkg'); + $type = lc $type; if (grep { $_ eq $type } qw(alpm apk bitbucket composer deb github gitlab hex npm oci pypi)) { @@ -42,6 +46,7 @@ sub new { foreach my $qualifier (keys %{$qualifiers}) { Carp::croak "Invalid PackageURL: '$qualifier' is not a valid qualifier" if ($qualifier =~ /\s/); + Carp::croak "Invalid PackageURL: '$qualifier' is not a valid qualifier" if ($qualifier =~ /\%/); } $name =~ s/_/-/g if $type eq 'pypi'; @@ -71,6 +76,25 @@ sub new { } + if ($type eq 'mlflow') { + + # The "name" case sensitivity depends on the server implementation: + # - Azure ML: it is case sensitive and must be kept as-is in the package URL. + # - Databricks: it is case insensitive and must be lowercased in the package URL. + + if (defined $qualifiers->{repository_url} && $qualifiers->{repository_url} =~ /azuredatabricks/) { + $name = lc $name; + } + + } + + if ($type eq 'huggingface') { + + # The version is the model revision Git commit hash. It is case insensitive and must be lowercased in the package URL. + $version = lc $version; + + } + my $self = { scheme => $scheme, type => $type, @@ -105,6 +129,11 @@ sub from_string { my ($class, $string) = @_; + # Strip slash / after scheme + while ($string =~ m|^pkg:/|) { + $string =~ s|^pkg:/|pkg:|; + } + if ($string !~ /$PURL_REGEXP/) { Carp::croak 'Malformed PackageURL string'; } @@ -158,7 +187,7 @@ sub from_string { $value = [split(',', $value)]; } - $components{qualifiers}->{$key} = $value; + $components{qualifiers}->{lc $key} = $value; } @@ -200,8 +229,8 @@ sub from_string { # Apply type-specific normalization to the name if needed # This is the name - my @s6 = split('/', $s5[0], 2); - $components{name} = (scalar @s6 > 1) ? _url_decode($s6[1]) : _url_decode($s6[0]); + my @s6 = split('/', $s5[0], -1); + $components{name} = _url_decode(pop @s6); # Split the remainder on '/' @@ -212,9 +241,17 @@ sub from_string { # Join segments back with a '/' # This is the namespace - if (scalar @s6 > 1) { - my @s7 = split('/', $s6[0]); - $components{namespace} = join '/', map { _url_decode($_) } @s7; + if (@s6) { + $components{namespace} = join '/', map { _url_decode($_) } @s6; + } + + if (PURL_DEBUG) { + say STDERR "-- S1: @s1"; + say STDERR "-- S2: @s2"; + say STDERR "-- S3: @s3"; + say STDERR "-- S4: @s4"; + say STDERR "-- S5: @s5"; + say STDERR "-- S6: @s6"; } return $class->new(%components); @@ -244,7 +281,7 @@ sub to_string { # Qualifiers if (my $qualifiers = $self->qualifiers) { - my @qualifiers = map { sprintf('%s=%s', $_, _url_encode($qualifiers->{$_})) } sort keys %{$qualifiers}; + my @qualifiers = map { sprintf('%s=%s', lc $_, _url_encode($qualifiers->{$_})) } sort keys %{$qualifiers}; push @purl, ('?', join('&', @qualifiers)) if (@qualifiers); } @@ -286,9 +323,13 @@ sub _url_encode { } sub _url_decode { + my $string = shift; + return unless $string; + $string =~ s/%([0-9a-fA-F]{2})/chr hex $1/ge; return $string; + } 1; @@ -309,21 +350,21 @@ URI::PackageURL - Perl extension for Package URL (aka "purl") type => cpan, namespace => 'GDT', name => 'URI-PackageURL', - version => '2.02' + version => '2.03' ); - say $purl; # pkg:cpan/GDT/URI-PackageURL@2.02 + say $purl; # pkg:cpan/GDT/URI-PackageURL@2.03 # Parse PackageURL string - $purl = URI::PackageURL->from_string('pkg:cpan/URI-PackageURL@2.02'); + $purl = URI::PackageURL->from_string('pkg:cpan/URI-PackageURL@2.03'); # exported funtions - $purl = decode_purl('pkg:cpan/GDT/URI-PackageURL@2.02'); + $purl = decode_purl('pkg:cpan/GDT/URI-PackageURL@2.03'); say $purl->type; # cpan - $purl_string = encode_purl(type => cpan, name => 'URI-PackageURL', version => '2.02'); - say $purl_string; # pkg:cpan/URI-PackageURL@2.02 + $purl_string = encode_purl(type => cpan, name => 'URI-PackageURL', version => '2.03'); + say $purl_string; # pkg:cpan/URI-PackageURL@2.03 =head1 DESCRIPTION @@ -439,7 +480,7 @@ Helper method for JSON modules (L, L, L, L use Mojo::JSON qw(encode_json); - say encode_json($purl); # {"name":"URI-PackageURL","namespace":"GDT","qualifiers":null,"subpath":null,"type":"cpan","version":"2.02"} + say encode_json($purl); # {"name":"URI-PackageURL","namespace":"GDT","qualifiers":null,"subpath":null,"type":"cpan","version":"2.03"} =back diff --git a/lib/URI/PackageURL/CLI.pm b/lib/URI/PackageURL/CLI.pm index fadcbee..e33dd58 100644 --- a/lib/URI/PackageURL/CLI.pm +++ b/lib/URI/PackageURL/CLI.pm @@ -12,7 +12,7 @@ use Data::Dumper; use URI::PackageURL; -our $VERSION = '2.02'; +our $VERSION = '2.03'; sub cli_error { my ($error) = @_; diff --git a/lib/URI/PackageURL/Util.pm b/lib/URI/PackageURL/Util.pm index 808daed..fc1e446 100644 --- a/lib/URI/PackageURL/Util.pm +++ b/lib/URI/PackageURL/Util.pm @@ -8,7 +8,7 @@ use warnings; use Carp; use Exporter qw(import); -our $VERSION = '2.02'; +our $VERSION = '2.03'; our @EXPORT = qw(purl_to_urls); sub purl_to_urls { @@ -206,7 +206,7 @@ sub _cpan_urls { $name =~ s/\:\:/-/g; # TODO - my $urls = {repository => "https://metacpan.org/pod/$name"}; + my $urls = {repository => "https://metacpan.org/dist/$name"}; if ($name && $version && $author) { diff --git a/t/99-official-purl-test-suite.t b/t/99-official-purl-test-suite.t index 0f229fe..872a64f 100644 --- a/t/99-official-purl-test-suite.t +++ b/t/99-official-purl-test-suite.t @@ -5,19 +5,11 @@ use Test::More; require_ok('URI::PackageURL'); -my $test_suite_data_json = ''; +sub test_purl_encode { -while () { - $test_suite_data_json .= $_; -} + my ($test) = @_; -my $test_suite_data = JSON::decode_json($test_suite_data_json); - -foreach my $test (@{$test_suite_data}) { - - my $is_invalid = $test->{is_invalid}; - my $expected = $test->{canonical_purl}; - my $test_name = $test->{description}; + my $test_name = $test->{description}; my $purl = eval { URI::PackageURL->new( @@ -30,14 +22,60 @@ foreach my $test (@{$test_suite_data}) { ); }; - if ($is_invalid) { - like($@, qr/Invalid PackageURL/i, $test_name); - next; + if ($test->{is_invalid}) { + like($@, qr/Invalid PackageURL/i, "ENCODE: $test_name"); + return; + } + + if (!$test->{is_invalid} && $@) { + fail("ENCODE: $test_name"); + return; + } + + if (!$test->{is_invalid}) { + is($purl->to_string, $test->{canonical_purl}, "ENCODE: $test_name"); + return; } - my $got = $purl->to_string; +} + +sub test_purl_decode { + + my ($test) = @_; + + my $test_name = $test->{description}; + + my $purl = eval { URI::PackageURL->from_string($test->{purl}) }; + + if ($test->{is_invalid}) { + like($@, qr/(Invalid|Malformed) PackageURL/i, "DECODE: $test_name"); + return; + } + + if (!$test->{is_invalid} && $@) { + fail("DECODE: $test_name"); + return; + } + + if (!$test->{is_invalid}) { + is($purl->to_string, $test->{canonical_purl}, "DECODE: $test_name"); + return; + } + +} + +my $test_suite_data_json = ''; + +while () { + $test_suite_data_json .= $_; +} + +my $test_suite_data = JSON::decode_json($test_suite_data_json); + +foreach my $test (@{$test_suite_data}) { - is($got, $expected, $test_name); + test_purl_encode($test); + test_purl_decode($test); }