From 9bdf7091693d2d7bb4b05f49c644e1877af6dd33 Mon Sep 17 00:00:00 2001 From: Giuseppe Di Terlizzi Date: Sun, 2 Jun 2024 00:57:08 +0200 Subject: [PATCH] Move "normalization" check in URI::PackageURL::Util --- lib/URI/PackageURL.pm | 107 +-------- lib/URI/PackageURL/Util.pm | 461 +++++++++++++++++++++++++------------ 2 files changed, 316 insertions(+), 252 deletions(-) diff --git a/lib/URI/PackageURL.pm b/lib/URI/PackageURL.pm index ba9cd14..4d3c135 100644 --- a/lib/URI/PackageURL.pm +++ b/lib/URI/PackageURL.pm @@ -7,13 +7,13 @@ use warnings; use Carp (); use Exporter qw(import); -use URI::PackageURL::Util qw(purl_to_urls); +use URI::PackageURL::Util qw(purl_to_urls purl_components_normalize); use constant DEBUG => $ENV{PURL_DEBUG}; use overload '""' => 'to_string', fallback => 1; -our $VERSION = '2.20_1'; +our $VERSION = '2.20_2'; our @EXPORT = qw(encode_purl decode_purl); my $PURL_REGEXP = qr{^pkg:[A-Za-z\\.\\-\\+][A-Za-z0-9\\.\\-\\+]*/.+}; @@ -30,100 +30,7 @@ sub new { my $qualifiers = delete $params{qualifiers} // {}; my $subpath = delete $params{subpath}; - Carp::croak "Invalid Package URL: '$scheme' is not a valid scheme" if (!$scheme eq 'pkg'); - - $type = lc $type; - - if (grep { $_ eq $type } qw(alpm apk bitbucket composer deb github gitlab hex npm oci pypi)) { - $name = lc $name; - } - - if ($namespace) { - if (grep { $_ eq $type } qw(alpm apk bitbucket composer deb github gitlab golang hex rpm)) { - $namespace = lc $namespace; - } - } - - foreach my $qualifier (keys %{$qualifiers}) { - Carp::croak "Invalid Package URL: '$qualifier' is not a valid qualifier" if ($qualifier =~ /\s/); - Carp::croak "Invalid Package URL: '$qualifier' is not a valid qualifier" if ($qualifier =~ /\%/); - } - - - # A PyPI package name must be lowercased and underscore "_" replaced with a dash "-". - $name =~ s/_/-/g if $type eq 'pypi'; - - if ($type eq 'cpan') { - - # To refer to a CPAN distribution name, the "namespace" MUST be present. In this - # case, the "namespace" is the CPAN id of the author/publisher. It MUST be - # written uppercase, followed by the distribution name in the "name" component. A - # distribution name MUST NOT contain the string "::". - - # To refer to a CPAN module, the "namespace" MUST be absent. The module name MAY - # contain zero or more "::" strings, and the module name MUST NOT contain a "-" - - $namespace = uc $namespace if ($namespace); - - if (($namespace && $name) && $namespace =~ /\:/) { - Carp::croak "Invalid Package URL: CPAN 'namespace' component must have the distribution author"; - } - - if (($namespace && $name) && $name =~ /\:/) { - Carp::croak "Invalid Package URL: CPAN 'name' component must have the distribution name"; - } - - if (!$namespace && $name =~ /\-/) { - Carp::croak "Invalid Package URL: CPAN 'name' component must have the module name"; - } - - } - - if ($type eq 'swift') { - Carp::croak "Invalid Package URL: Swift 'version' is required" unless defined $version; - Carp::croak "Invalid Package URL: Swift 'namespace' is required" unless defined $namespace; - } - - if ($type eq 'cran') { - Carp::croak "Invalid Package URL: Cran 'version' is required" unless defined $version; - } - - if ($type eq 'conan') { - - if ($namespace && $namespace ne '') { - if (!defined $qualifiers->{channel}) { - Carp::croak "Invalid Package URL: Conan 'channel' qualifier does not exist for namespace '$namespace'"; - } - } - else { - if (defined $qualifiers->{channel}) { - Carp::croak - "Invalid Package URL: Conan 'namespace' does not exist for channel '$qualifiers->{channel}'"; - } - } - - } - - if ($type eq 'mlflow') { - - # The "name" case sensitivity depends on the server implementation: - # - Azure ML: it is case sensitive and must be kept as-is in the package URL. - # - Databricks: it is case insensitive and must be lowercased in the package URL. - - if (defined $qualifiers->{repository_url} && $qualifiers->{repository_url} =~ /azuredatabricks/) { - $name = lc $name; - } - - } - - if ($type eq 'huggingface') { - - # The version is the model revision Git commit hash. It is case insensitive and - # must be lowercased in the package URL. - $version = lc $version; - } - - my $self = { + return bless purl_components_normalize( scheme => $scheme, type => $type, namespace => $namespace, @@ -131,9 +38,7 @@ sub new { version => $version, qualifiers => $qualifiers, subpath => $subpath - }; - - return bless $self, $class; + ), $class; } @@ -495,7 +400,7 @@ They are exported by default: =over -=item $purl_string = encode_purl(%purl_components); +=item $purl_string = encode_purl(%purl_components) Converts the given Package URL components to "purl" string. Croaks on error. @@ -503,7 +408,7 @@ This function call is functionally identical to: $purl_string = URI::PackageURL->new(%purl_components)->to_string; -=item $purl_components = decode_purl($purl_string); +=item $purl_components = decode_purl($purl_string) Converts the given "purl" string to Package URL components. Croaks on error. diff --git a/lib/URI/PackageURL/Util.pm b/lib/URI/PackageURL/Util.pm index 6ccfaac..39089c9 100644 --- a/lib/URI/PackageURL/Util.pm +++ b/lib/URI/PackageURL/Util.pm @@ -7,8 +7,163 @@ use warnings; use Exporter qw(import); -our $VERSION = '2.20'; -our @EXPORT = qw(purl_to_urls); +our $VERSION = '2.20_2'; +our @EXPORT = qw(purl_to_urls purl_components_normalize); + +sub purl_components_normalize { + + my (%component) = @_; + + my %TYPES = ( + conan => \&_conan_normalize, + cpan => \&_cpan_normalize, + cran => \&_cran_normalize, + huggingface => \&_huggingface_normalize, + mlflow => \&_mlflow_normalize, + pypi => \&_pypi_normalize, + swift => \&_swift_normalize, + ); + + Carp::croak "Invalid Package URL: '$component{scheme}' is not a valid scheme" unless ($component{scheme} eq 'pkg'); + + $component{type} = lc $component{type}; + + if (grep { $_ eq $component{type} } qw(alpm apk bitbucket composer deb github gitlab hex npm oci pypi)) { + $component{name} = lc $component{name}; + } + + if (defined $component{namespace}) { + if (grep { $_ eq $component{type} } qw(alpm apk bitbucket composer deb github gitlab golang hex rpm)) { + $component{namespace} = lc $component{namespace}; + } + } + + foreach my $qualifier (keys %{$component{qualifiers}}) { + Carp::croak "Invalid Package URL: '$qualifier' is not a valid qualifier" if ($qualifier =~ /\s/); + Carp::croak "Invalid Package URL: '$qualifier' is not a valid qualifier" if ($qualifier =~ /\%/); + } + + if (defined $TYPES{$component{type}}) { + return $TYPES{$component{type}}->(%component); + } + + return \%component; + +} + +sub _conan_normalize { + + my (%component) = @_; + + if (defined $component{namespace} && $component{namespace} ne '') { + if (!defined $component{qualifiers}->{channel}) { + Carp::croak + "Invalid Package URL: Conan 'channel' qualifier does not exist for namespace '$component{namespace}'"; + } + } + else { + if (defined $component{qualifiers}->{channel}) { + Carp::croak + "Invalid Package URL: Conan 'namespace' does not exist for channel '$component{qualifiers}->{channel}'"; + } + } + + return \%component; + +} + +sub _cpan_normalize { + + my (%component) = @_; + + # To refer to a CPAN distribution name, the "namespace" MUST be present. In this + # case, the "namespace" is the CPAN id of the author/publisher. It MUST be + # written uppercase, followed by the distribution name in the "name" component. A + # distribution name MUST NOT contain the string "::". + + # To refer to a CPAN module, the "namespace" MUST be absent. The module name MAY + # contain zero or more "::" strings, and the module name MUST NOT contain a "-" + + $component{namespace} = uc $component{namespace} if (defined $component{namespace}); + + if ((defined $component{namespace} && defined $component{name}) && $component{namespace} =~ /\:/) { + Carp::croak "Invalid Package URL: CPAN 'namespace' component must have the distribution author"; + } + + if ((defined $component{namespace} && defined $component{name}) && $component{name} =~ /\:/) { + Carp::croak "Invalid Package URL: CPAN 'name' component must have the distribution name"; + } + + if (!defined $component{namespace} && $component{name} =~ /\-/) { + Carp::croak "Invalid Package URL: CPAN 'name' component must have the module name"; + } + + return \%component; + +} + +sub _cran_normalize { + + my (%component) = @_; + + Carp::croak "Invalid Package URL: Cran 'version' is required" unless defined $component{version}; + + return \%component; + +} + +sub _huggingface_normalize { + + my (%component) = @_; + + # The version is the model revision Git commit hash. It is case insensitive and + # must be lowercased in the package URL. + $component{version} = lc $component{version}; + + return \%component; + +} + +sub _mlflow_normalize { + + my (%component) = @_; + + # The "name" case sensitivity depends on the server implementation: + # - Azure ML: it is case sensitive and must be kept as-is in the package URL. + # - Databricks: it is case insensitive and must be lowercased in the package URL. + + if (defined $component{qualifiers}->{repository_url} + && $component{qualifiers}->{repository_url} =~ /azuredatabricks/) + { + $component{name} = lc $component{name}; + } + + return \%component; + +} + +sub _pypi_normalize { + + my (%component) = @_; + + # A PyPI package name must be lowercased and underscore "_" replaced with a dash "-". + $component{name} =~ s/_/-/g; + + return \%component; + +} + +sub _swift_normalize { + + my (%component) = @_; + + Carp::croak "Invalid Package URL: Swift 'version' is required" unless defined $component{version}; + Carp::croak "Invalid Package URL: Swift 'namespace' is required" unless defined $component{namespace}; + + return \%component; + +} + sub purl_to_urls { @@ -50,42 +205,7 @@ sub purl_to_urls { } -sub _github_urls { - - my $purl = shift; - - my $name = $purl->name; - my $namespace = $purl->namespace; - my $version = $purl->version; - my $qualifiers = $purl->qualifiers; - my $file_ext = $qualifiers->{ext} || 'tar.gz'; - my $version_prefix = $qualifiers->{version_prefix} || ''; - - my $urls = {}; - - if ($name && $namespace) { - $urls->{repository} = "/~https://github.com/$namespace/$name"; - } - - if ($version) { - - my $is_sha1 = ($version =~ /^[a-fA-F0-9]{40}$/); - - if ($is_sha1) { - $urls->{download} = "/~https://github.com/$namespace/$name/archive/$version.$file_ext"; - } - else { - $urls->{download} - = "/~https://github.com/$namespace/$name/archive/refs/tags/$version_prefix$version.$file_ext"; - } - - } - - return $urls; - -} - -sub _gitlab_urls { +sub _bitbucket_urls { my $purl = shift; @@ -99,12 +219,11 @@ sub _gitlab_urls { my $urls = {}; if ($name && $namespace) { - $urls->{repository} = "https://gitlab.com/$namespace/$name"; + $urls->{repository} = "https://bitbucket.org/$namespace/$name"; } if ($version) { - $urls->{download} - = "https://gitlab.com/$namespace/$name/-/archive/$version_prefix$version/$name-$version_prefix$version.$file_ext"; + $urls->{download} = "https://bitbucket.org/$namespace/$name/get/$version_prefix$version.$file_ext"; } return $urls; @@ -129,66 +248,16 @@ sub _cargo_urls { } -sub _gem_urls { - - my $purl = shift; - - my $name = $purl->name; - my $version = $purl->version; - - if ($name && $version) { - return { - repository => "https://rubygems.org/gems/$name/versions/$version", - download => "https://rubygems.org/downloads/$name-$version.gem" - }; - } - - return {repository => "https://rubygems.org/gems/$name"}; - -} - -sub _pypi_urls { - - my $purl = shift; - - my $name = $purl->name; - my $version = $purl->version; - - if ($name && $version) { - return {repository => "https://pypi.org/project/$name/$version"}; - } - - return {repository => "https://pypi.org/project/$name"}; - -} - -sub _npm_urls { +sub _composer_urls { my $purl = shift; - my $namespace = $purl->namespace; my $name = $purl->name; - my $version = $purl->version; - - if ($namespace && $name && $version) { - return { - repository => "https://www.npmjs.com/package/$namespace/$name/v/$version", - download => "https://registry.npmjs.org/$namespace/$name/-/$name-$version.tgz" - }; - } - - if ($name && $version) { - return { - repository => "https://www.npmjs.com/package/$name/v/$version", - download => "https://registry.npmjs.org/$name/-/$name-$version.tgz" - }; - } + my $namespace = $purl->namespace; - if ($namespace && $name) { - return {repository => "https://www.npmjs.com/package/$namespace/$name"}; - } + return unless ($name && $namespace); - return {repository => "https://www.npmjs.com/package/$name"}; + return {repository => "https://packagist.org/packages/$namespace/$name"}; } @@ -225,66 +294,57 @@ sub _cpan_urls { } -sub _nuget_urls { +sub _docker_urls { my $purl = shift; - my $name = $purl->name; - my $version = $purl->version; + my $name = $purl->name; + my $namespace = $purl->namespace; + my $version = $purl->version; + my $qualifiers = $purl->qualifiers; + my $repository_url = $qualifiers->{repository_url} || 'https://hub.docker.com'; - if ($name && $version) { - return { - repository => "https://www.nuget.org/packages/$name/$version", - download => "https://www.nuget.org/api/v2/package/$name/$version" - }; + if ($repository_url !~ /^(http|https):\/\//) { + $repository_url = 'https://' . $repository_url; } - return {repository => "https://www.nuget.org/packages/$name"}; - -} - -sub _maven_urls { - - my $purl = shift; - - my $namespace = $purl->namespace; - my $name = $purl->name; - my $version = $purl->version; - my $qualifiers = $purl->qualifiers; - my $extension = $qualifiers->{extension} // 'jar'; - my $repo_url = $qualifiers->{repository_url} // 'repo1.maven.org/maven2'; - - if ($namespace && $name && $version) { - - (my $ns_url = $namespace) =~ s/\./\//g; + my $urls = {}; - return { - repository => "https://mvnrepository.com/artifact/$namespace/$name/$version", - download => "https://$repo_url/$ns_url/$name/$version/$name-$version.$extension" - }; + if ($repository_url !~ /hub.docker.com/) { + return $urls; + } + if (!$namespace) { + $urls->{repository} = "$repository_url/_/$name"; } - if ($namespace && $name) { - return {repository => "https://mvnrepository.com/artifact/$namespace/$name"}; + if ($name && $namespace) { + $urls->{repository} = "$repository_url/r/$namespace/$name"; } + return $urls; + } -sub _composer_urls { +sub _gem_urls { my $purl = shift; - my $name = $purl->name; - my $namespace = $purl->namespace; + my $name = $purl->name; + my $version = $purl->version; - return unless ($name && $namespace); + if ($name && $version) { + return { + repository => "https://rubygems.org/gems/$name/versions/$version", + download => "https://rubygems.org/downloads/$name-$version.gem" + }; + } - return {repository => "https://packagist.org/packages/$namespace/$name"}; + return {repository => "https://rubygems.org/gems/$name"}; } -sub _bitbucket_urls { +sub _github_urls { my $purl = shift; @@ -298,18 +358,28 @@ sub _bitbucket_urls { my $urls = {}; if ($name && $namespace) { - $urls->{repository} = "https://bitbucket.org/$namespace/$name"; + $urls->{repository} = "/~https://github.com/$namespace/$name"; } if ($version) { - $urls->{download} = "https://bitbucket.org/$namespace/$name/get/$version_prefix$version.$file_ext"; + + my $is_sha1 = ($version =~ /^[a-fA-F0-9]{40}$/); + + if ($is_sha1) { + $urls->{download} = "/~https://github.com/$namespace/$name/archive/$version.$file_ext"; + } + else { + $urls->{download} + = "/~https://github.com/$namespace/$name/archive/refs/tags/$version_prefix$version.$file_ext"; + } + } return $urls; } -sub _docker_urls { +sub _gitlab_urls { my $purl = shift; @@ -317,24 +387,18 @@ sub _docker_urls { my $namespace = $purl->namespace; my $version = $purl->version; my $qualifiers = $purl->qualifiers; - my $repository_url = $qualifiers->{repository_url} || 'https://hub.docker.com'; - - if ($repository_url !~ /^(http|https):\/\//) { - $repository_url = 'https://' . $repository_url; - } + my $file_ext = $qualifiers->{ext} || 'tar.gz'; + my $version_prefix = $qualifiers->{version_prefix} || ''; my $urls = {}; - if ($repository_url !~ /hub.docker.com/) { - return $urls; - } - - if (!$namespace) { - $urls->{repository} = "$repository_url/_/$name"; + if ($name && $namespace) { + $urls->{repository} = "https://gitlab.com/$namespace/$name"; } - if ($name && $namespace) { - $urls->{repository} = "$repository_url/r/$namespace/$name"; + if ($version) { + $urls->{download} + = "https://gitlab.com/$namespace/$name/-/archive/$version_prefix$version/$name-$version_prefix$version.$file_ext"; } return $urls; @@ -392,6 +456,97 @@ sub _luarocks_urls { } +sub _maven_urls { + + my $purl = shift; + + my $namespace = $purl->namespace; + my $name = $purl->name; + my $version = $purl->version; + my $qualifiers = $purl->qualifiers; + my $extension = $qualifiers->{extension} // 'jar'; + my $repo_url = $qualifiers->{repository_url} // 'repo1.maven.org/maven2'; + + if ($namespace && $name && $version) { + + (my $ns_url = $namespace) =~ s/\./\//g; + + return { + repository => "https://mvnrepository.com/artifact/$namespace/$name/$version", + download => "https://$repo_url/$ns_url/$name/$version/$name-$version.$extension" + }; + + } + + if ($namespace && $name) { + return {repository => "https://mvnrepository.com/artifact/$namespace/$name"}; + } + +} + +sub _npm_urls { + + my $purl = shift; + + my $namespace = $purl->namespace; + my $name = $purl->name; + my $version = $purl->version; + + if ($namespace && $name && $version) { + return { + repository => "https://www.npmjs.com/package/$namespace/$name/v/$version", + download => "https://registry.npmjs.org/$namespace/$name/-/$name-$version.tgz" + }; + } + + if ($name && $version) { + return { + repository => "https://www.npmjs.com/package/$name/v/$version", + download => "https://registry.npmjs.org/$name/-/$name-$version.tgz" + }; + } + + if ($namespace && $name) { + return {repository => "https://www.npmjs.com/package/$namespace/$name"}; + } + + return {repository => "https://www.npmjs.com/package/$name"}; + +} + +sub _nuget_urls { + + my $purl = shift; + + my $name = $purl->name; + my $version = $purl->version; + + if ($name && $version) { + return { + repository => "https://www.nuget.org/packages/$name/$version", + download => "https://www.nuget.org/api/v2/package/$name/$version" + }; + } + + return {repository => "https://www.nuget.org/packages/$name"}; + +} + +sub _pypi_urls { + + my $purl = shift; + + my $name = $purl->name; + my $version = $purl->version; + + if ($name && $version) { + return {repository => "https://pypi.org/project/$name/$version"}; + } + + return {repository => "https://pypi.org/project/$name"}; + +} + 1; __END__ @@ -403,7 +558,7 @@ URI::PackageURL::Util - Utility for URI::PackageURL use URI::PackageURL::Util qw(purl_to_urls); - $urls = purl_to_urls('pkg:cpan/GDT/URI-PackageURL@2.01'); + $urls = purl_to_urls('pkg:cpan/GDT/URI-PackageURL@2.20'); $filename = basename($urls->{download}); $ua->mirror($urls->{download}, "/tmp/$filename"); @@ -415,7 +570,11 @@ URL::PackageURL::Util is the utility package for URL::PackageURL. =over -=item $urls = purl_to_urls($purl_string | URI::PackageURL); +=item %normalized_purl_components = purl_components_normalize(%purl_components) + +Normalize the given Package URL components + +=item $urls = purl_to_urls($purl_string | URI::PackageURL) Converts the given Package URL string or L instance and return the hash with C and/or C URL.