0xV3NOMx
Linux ip-172-26-7-228 5.4.0-1103-aws #111~18.04.1-Ubuntu SMP Tue May 23 20:04:10 UTC 2023 x86_64



Your IP : 3.133.122.95


Current Path : /proc/thread-self/root/usr/share/lintian/checks/
Upload File :
Current File : //proc/thread-self/root/usr/share/lintian/checks/source-copyright.pm

# source-copyright-file -- lintian check script -*- perl -*-

# Copyright (C) 2011 Jakub Wilk
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

package Lintian::source_copyright;

use strict;
use warnings;
use autodie;

use constant {
    WC_TYPE_REGEX => 'REGEX',
    WC_TYPE_FILE => 'FILE',
    WC_TYPE_DECENDANTS => 'DECENDANTS',
};

use List::MoreUtils qw(any none);
use Text::Levenshtein qw(distance);

use Lintian::Relation::Version qw(versions_compare);
use Lintian::Tags qw(tag);
use Lintian::Util qw(parse_dpkg_control);
use Lintian::Data;

my $BAD_SHORT_LICENSES = Lintian::Data->new(
    'source-copyright/bad-short-licenses',
    qr/\s*\~\~\s*/,
    sub {
        return {
            'regex' => qr/$_[0]/xms,
            'tag'   => $_[1],
        };
    });

my $dep5_last_normative_change = '0+svn~166';
my $dep5_last_overhaul         = '0+svn~148';
my %dep5_renamed_fields        = (
    'format-specification' => 'format',
    'maintainer'           => 'upstream-contact',
    'upstream-maintainer'  => 'upstream-contact',
    'contact'              => 'upstream-contact',
    'name'                 => 'upstream-name',
);

sub run {
    my (undef, undef, $info, undef, $group) = @_;
    my $debian_dir = $info->index_resolved_path('debian/');
    return if not $debian_dir;
    my $copyright_path = $debian_dir->child('copyright');

    if (not $copyright_path) {
        my @pkgs = $info->binaries;
        tag 'no-debian-copyright';
        if (scalar @pkgs == 1) {
            # If debian/copyright doesn't exist, and the only a single
            # binary package is built, there's a good chance that the
            # copyright file is available as
            # debian/<pkgname>.copyright.
            $copyright_path = $debian_dir->child($pkgs[0] . '.copyright');
        }
        return if not $copyright_path;
    } elsif ($copyright_path->is_symlink) {
        tag 'debian-copyright-is-symlink';
    }

    if ($copyright_path->is_open_ok) {
        _check_dep5_copyright($info, $copyright_path);
        _check_apache_notice_files($info, $group, $copyright_path);
    }
    return;
}

# The policy states, since 4.0.0, that people should use "https://" for the
# format URI. This is checked later in check_dep5_copyright.
# return undef is not dep5 and '' if unknown version
sub _find_dep5_version {
    my ($original_uri) = @_;
    my $uri = $original_uri;
    my $version;

    if ($uri =~ m,\b(?:rev=REVISION|VERSIONED_FORMAT_URL)\b,) {
        tag 'boilerplate-copyright-format-uri', $uri;
        return;
    }

    if (
        $uri =~ s{ https?://wiki\.debian\.org/
                                Proposals/CopyrightFormat\b}{}xsm
      ){
        $version = '0~wiki';
        $uri =~ m,^\?action=recall&rev=(\d+)$,
          and $version = "$version~$1";
        return $version;
    }
    if ($uri =~ m,^https?://dep\.debian\.net/deps/dep5/?$,) {
        $version = '0+svn';
        return $version;
    }
    if (
        $uri =~ s{\A https?://svn\.debian\.org/
                                  wsvn/dep/web/deps/dep5\.mdwn\b}{}xsm
      ){
        $version = '0+svn';
        $uri =~ m,^\?(?:\S+[&;])?rev=(\d+)(?:[&;]\S+)?$,
          and $version = "$version~$1";
        return $version;
    }
    if (
        $uri =~ s{ \A https?://(?:svn|anonscm)\.debian\.org/
                                    viewvc/dep/web/deps/dep5\.mdwn\b}{}xsm
      ){
        $version = '0+svn';
        $uri =~ m{\A \? (?:\S+[&;])?
                             (?:pathrev|revision|rev)=(\d+)(?:[&;]\S+)?
                          \Z}xsm
          and $version = "$version~$1";
        return $version;
    }
    if (
        $uri =~ m{ \A
                       https?://www\.debian\.org/doc/
                       (?:packaging-manuals/)?copyright-format/(\d+\.\d+)/?
                   \Z}xsm
      ){
        $version = $1;
        return $version;
    }

    tag 'unknown-copyright-format-uri', $original_uri;
    return;
}

sub _check_apache_notice_files {
    my ($info, $group, $copyright_path) = @_;

    my @procs = $group->get_processables('binary');
    return if not @procs;
    return if $copyright_path->file_contents !~ m/apache[-\s]+2\./i;

    my @notice_files = grep {
              $_->basename =~ m/^NOTICE(\.txt)?$/
          and $_->is_open_ok
          and $_->file_contents =~ m/apache/i
    } $info->sorted_index;
    return if not @notice_files;

    foreach my $binpkg (@procs) {
        my @files = map { $_->name } $binpkg->info->sorted_index;
        my $java_info = $binpkg->info->java_info;
        for my $jar_file (sort keys %{$java_info}) {
            push @files, keys %{$java_info->{$jar_file}{files}};
        }
        return if any { m{/NOTICE(\.txt)?(\.gz)?$} } @files;
    }

    tag 'missing-notice-file-for-apache-license', join(' ', @notice_files);

    return;
}

sub _check_dep5_copyright {
    my ($info, $copyright_path) = @_;
    my $contents = $copyright_path->file_contents;
    my (@dep5, @lines);

    if (    $contents =~ m/^Files-Excluded:/
        and $contents
        !~ m{^Format:.*/doc/packaging-manuals/copyright-format/1.0$}) {
        tag 'files-excluded-without-copyright-format-1.0';
    }

    if (
        $contents !~ m{
               (?:^ | \n)
               (?i: format(?: [:] |[-\s]spec) )
               (?: . | \n\s+ )*
               (?: /dep[5s]?\b | \bDEP-?5\b
                 | [Mm]achine-readable\s(?:license|copyright)
                 | /copyright-format/ | CopyrightFormat
                 | VERSIONED_FORMAT_URL
               ) }x
      ){

        tag 'no-dep5-copyright';
        return;
    }

    # Before trying to parse the copyright as Debian control file, try to
    # determine the format URI.
    my $first_para = $contents;
    $first_para =~ s,^#.*,,mg;
    $first_para =~ s,[ \t]+$,,mg;
    $first_para =~ s,^\n+,,g;
    $first_para =~ s,\n\n.*,\n,s;    #;; hi emacs
    $first_para =~ s,\n?[ \t]+, ,g;
    $first_para =~ m,^Format(?:-Specification)?:\s*(.*),mi;
    my $uri = $1;
    $uri =~ s/^([^#\s]+)#/$1/
      if defined $uri;               # strip fragment identifier

    if (!defined $uri) {
        tag 'unknown-copyright-format-uri';
        return;
    }

    my $version = _find_dep5_version($uri);

    return if !defined($version);
    if ($version =~ m,wiki,) {
        tag 'wiki-copyright-format-uri', $uri;
    }elsif ($version =~ m,svn$,) {
        tag 'unversioned-copyright-format-uri', $uri;
    }elsif (versions_compare $version, '<<', $dep5_last_normative_change) {
        tag 'out-of-date-copyright-format-uri', $uri;
    }elsif ($uri =~ m,^http://www\.debian\.org/,) {
        tag 'insecure-copyright-format-uri', $uri;
    }

    if (versions_compare $version, '<<', $dep5_last_overhaul) {
        return;
    }

    # We are reasonably certain that we're dealing
    # with an up-to-date DEP-5 format. Let's try to do
    # more strict checks.
    eval {
        open(my $fd, '<', \$contents);
        @dep5 = parse_dpkg_control($fd, 0, \@lines);
        close($fd);
    };
    if ($@) {
        chomp $@;
        $@ =~ s/^syntax error at //;
        tag 'syntax-error-in-dep5-copyright', $@;
        return;
    }

    return if (!@dep5);

    _parse_dep5($info, \@dep5, \@lines);

    return;
}

sub _parse_dep5 {
    my ($info, $dep5ref, $linesref) = @_;
    my @dep5       = @$dep5ref;
    my @lines      = @$linesref;
    my $first_para = shift @dep5;
    my %standalone_licenses;
    my %required_standalone_licenses;
    my %short_licenses_seen;
    my %full_licenses_seen;

    for my $field (keys %{$first_para}) {
        my $renamed_to = $dep5_renamed_fields{$field};
        if (defined $renamed_to) {
            tag 'obsolete-field-in-dep5-copyright', $field,
              $renamed_to, "(line $lines[0]{$field})";
        }
    }
    check_files_excluded($info, $first_para->{'files-excluded'} // '');

    tag 'missing-field-in-dep5-copyright', 'format',
      "(line $lines[0]{'format'})"
      if none { defined $first_para->{$_} } qw(format format-specification);
    tag 'missing-explanation-for-contrib-or-non-free-package'
      if $info->source_field('section', '') =~ m{^(contrib|non-free)(/.+)?$}
      and none { defined $first_para->{$_} } qw(comment disclaimer);
    tag 'missing-explanation-for-repacked-upstream-tarball'
      if $info->repacked
      and none { defined $first_para->{$_} } qw(comment files-excluded);

    my (undef, $full_license_field, undef,@short_licenses_field)
      =parse_license($first_para->{'license'}, 1);
    for my $short_license (@short_licenses_field) {
        $required_standalone_licenses{$short_license} = 0
          if not defined($full_license_field);
        $short_licenses_seen{$short_license}          = 1;
    }
    if(defined($full_license_field)) {
        for (@short_licenses_field) {
            $standalone_licenses{$_} = -1;
            $full_licenses_seen{$_} = 1;
        }
    }

    my (@commas_in_files, %file_para_coverage);
    my %file_coverage = map { $_ => 0 } get_all_files($info);
    my $i = 0;
    my $current_line = 0;
    my $commas_in_files = any { m/,/xsm } $info->sorted_index;
    for my $para (@dep5) {
        $i++;
        $current_line = $lines[$i]{'START-OF-PARAGRAPH'};
        my ($files_fname, $files)
          =get_field($para, 'files', $lines[$i]);
        my $license   = get_field($para, 'license',   $lines[$i]);
        my $copyright = get_field($para, 'copyright', $lines[$i]);

        if (    not defined $files
            and defined $license
            and defined $copyright){
            tag 'ambiguous-paragraph-in-dep5-copyright',
              "paragraph at line $current_line";

            # If it is the first paragraph, it might be an instance of
            # the (no-longer) optional "first Files-field".
            $files = '*' if $i == 1;
        }

        if (defined $license and not defined $files) {
            my (undef, $full_license, $short_license,@short_licenses)
              = parse_license($license, $current_line);

            # Standalone license paragraph
            if (defined($short_license) and $short_license =~ /\s++\|\s++/) {
                tag 'pipe-symbol-used-as-license-disjunction', $short_license,
                  "(paragraph at line $current_line)";
            }
            if (not defined($full_license)) {
                tag 'missing-license-text-in-dep5-copyright', $license,
                  "(paragraph at line $current_line)";
            }else {
                for (@short_licenses) {
                    if(defined($full_licenses_seen{$_})
                        and $_ ne 'public-domain') {
                        tag 'dep5-copyright-license-name-not-unique',
                          "(paragraph at line $current_line)";
                    } else {
                        $standalone_licenses{$_} = $i;
                        $full_licenses_seen{$_} = $current_line;
                    }
                    $short_licenses_seen{$_} = $i;
                }
            }
        }elsif (defined $files) {
            if ($files =~ m/\A\s*\Z/mxs) {
                tag 'missing-field-in-dep5-copyright', 'files',
                  '(empty field,',
                  "paragraph at line $current_line)";
            }

            tag 'global-files-wildcard-not-first-paragraph-in-dep5-copyright',
              "(paragraph at line $current_line)"
              if $files eq '*' and $i > 1;

            # Files paragraph
            if (not @commas_in_files and $files =~ /,/) {
                @commas_in_files = ($i, $files_fname);
            }

            # only attempt to evaluate globbing if commas could be legal
            if (not @commas_in_files or $commas_in_files) {
                my @wildcards = split /[\n\t ]+/, $files;
                for my $wildcard (@wildcards) {
                    $wildcard =~ s/^\s+|\s+$//g;
                    if ($wildcard eq '') {
                        next;
                    }
                    my ($wc_value, $wc_type, $wildcard_error)
                      = parse_wildcard($wildcard);
                    if (defined $wildcard_error) {
                        tag 'invalid-escape-sequence-in-dep5-copyright',
                          substr($wildcard_error, 0, 2)
                          . " (paragraph at line $current_line)";
                        next;
                    }

                    my $used = 0;
                    $file_para_coverage{$current_line} = 0;
                    if ($wc_type eq WC_TYPE_FILE) {
                        if (exists($file_coverage{$wc_value})) {
                            $used = 1;
                            $file_coverage{$wildcard} = $current_line;
                        }
                    } elsif ($wc_type eq WC_TYPE_DECENDANTS) {
                        my @wlist;
                        if (my $dir = $info->index($wc_value)) {
                            if ($wc_value eq q{}) {
                                # Special-case => Files: *
                                push(@wlist, get_all_files($info));
                            } else {
                                push(@wlist,
                                    grep { $_->is_file }
                                      $dir->children('breadth-first'));
                            }
                            $used = 1 if @wlist;
                        }
                        for my $entry (@wlist) {
                            $file_coverage{$entry->name} = $current_line;
                        }
                    } else {
                        for my $srcfile (%file_coverage) {
                            if ($srcfile =~ $wc_value) {
                                $used = 1;
                                $file_coverage{$srcfile} = $current_line;
                            }
                        }
                    }
                    if ($used) {
                        $file_para_coverage{$current_line} = 1;
                    } elsif (not $used) {
                        tag 'wildcard-matches-nothing-in-dep5-copyright',
                          "$wildcard (paragraph at line $current_line)";
                    }
                }
            }

            my ($found_license, $full_license, $short_license, @short_licenses)
              = parse_license($license, $current_line);
            if (defined($short_license) and $short_license =~ /\s++\|\s++/) {
                tag 'pipe-symbol-used-as-license-disjunction', $short_license,
                  "(paragraph at line $current_line)";
            }
            if ($found_license) {
                for (@short_licenses) {
                    $short_licenses_seen{$_} = $i;
                    if (not defined($full_license)) {
                        $required_standalone_licenses{$_} = $i;
                    } else {
                        if(defined($full_licenses_seen{$_})
                            and $_ ne 'public-domain') {
                            tag 'dep5-copyright-license-name-not-unique',
                              "(paragraph at line $current_line)";
                        } else {
                            $full_licenses_seen{$_} = $current_line;
                        }
                    }
                }
            }else {
                tag 'missing-field-in-dep5-copyright', 'license',
                  "(paragraph at line $current_line)";
            }

            if (not defined $copyright) {
                tag 'missing-field-in-dep5-copyright', 'copyright',
                  "(paragraph at line $current_line)";
            }elsif ($copyright =~ m/\A\s*\Z/mxs) {
                tag 'missing-field-in-dep5-copyright', 'copyright',
                  '(empty field,',
                  "paragraph at line $current_line)";
            }

        }else {
            tag 'unknown-paragraph-in-dep5-copyright', 'paragraph at line',
              $current_line;
        }
    }
    if (@commas_in_files and not $commas_in_files) {
        my ($paragraph_no, $field_name) = @commas_in_files;
        tag 'comma-separated-files-in-dep5-copyright',
          'paragraph at line',
          $lines[$paragraph_no]{$field_name};
    } else {
        foreach my $srcfile (sort keys %file_coverage) {
            my $i = $file_coverage{$srcfile};
            if ($srcfile =~ '^\.pc/') {
                next;
            }
            if (not $i) {
                tag 'file-without-copyright-information', $srcfile;
            }
            delete $file_para_coverage{$i};
        }
        foreach my $i (sort keys %file_para_coverage) {
            tag 'unused-file-paragraph-in-dep5-copyright',
              "paragraph at line $i";
        }
    }
    while ((my $license, $i) = each %required_standalone_licenses) {
        if (not defined $standalone_licenses{$license}) {
            tag 'missing-license-paragraph-in-dep5-copyright', $license,
              "(paragraph at line $lines[$i]{'START-OF-PARAGRAPH'})";
        } elsif ($standalone_licenses{$license} == -1) {
            tag 'dep5-file-paragraph-references-header-paragraph', $license,
              "(paragraph at line $lines[$i]{'START-OF-PARAGRAPH'})";
        }

    }
    while ((my $license, $i) = each %standalone_licenses) {
        if (not defined $required_standalone_licenses{$license}) {
            tag 'unused-license-paragraph-in-dep5-copyright', $license,
              "(paragraph at line $lines[$i]{'START-OF-PARAGRAPH'})";
        }
    }
  LICENSE:
    while ((my $license, $i) = each %short_licenses_seen) {
        if ($license =~ m,\s,) {
            if($license =~ m,[^ ]+ \s+ with \s+ (.*),x) {
                my $exceptiontext = $1;
                unless ($exceptiontext =~ m,[^ ]+ \s+ exception,x) {
                    tag 'bad-exception-format-in-dep5-copyright', $license,
                      "(paragraph at line $lines[$i]{'START-OF-PARAGRAPH'})";
                }
            }else {
                tag 'space-in-std-shortname-in-dep5-copyright', $license,
                  "(paragraph at line $lines[$i]{'START-OF-PARAGRAPH'})";
            }
        }
        foreach my $bad_short_license ($BAD_SHORT_LICENSES->all) {
            my $value = $BAD_SHORT_LICENSES->value($bad_short_license);
            my $regex = $value->{'regex'};
            if ($license =~ m/$regex/x) {
                tag $value->{'tag'}, $license,
                  "(paragraph at line $lines[$i]{'START-OF-PARAGRAPH'})";
                next LICENSE;
            }
        }
    }
    return;
}

# parse a license block
sub parse_license {
    my ($license_block, $line) = @_;
    my ($full_license, $short_license);
    return 0 unless defined($license_block);
    if ($license_block =~ m/\n/) {
        ($short_license, $full_license) = split /\n/, $license_block, 2;
    }else {
        $short_license = $license_block;
    }
    $short_license =~ s/[(),]/ /g;
    if ($short_license =~ m/\A\s*\Z/) {
        tag 'empty-short-license-in-dep5-copyright',
          "(paragraph at line $line)";
        return 1, $full_license, '';
    }
    $short_license = lc($short_license);
    my @licenses
      =map { "\L$_" } (split(m/\s++(?:and|or)\s++/, $short_license));
    return 1, $full_license, $short_license, @licenses;
}

sub get_field {
    my ($para, $field, $line) = @_;
    if (exists $para->{$field}) {
        return $para->{$field} unless wantarray;
        return ($field, $para->{$field});
    }

    # Fall back to a "likely misspelling" of the field.
    foreach my $f (sort keys %$para) {
        if (distance($field, $f) < 3) {
            tag 'field-name-typo-in-dep5-copyright', $f, '->', $field,
              "(line $line->{$f})";
            return $para->{$f} unless wantarray;
            return ($f, $para->{$f});
        }
    }
    return;
}

sub dequote_backslashes {
    my ($string) = @_;
    my $error;
    eval {
        $string =~ s{
            ([^\\]+) |
            (\\[\\]) |
            (.+)
        }{
            if (defined $1) {
                quotemeta($1);
            } elsif (defined $2) {
                $2;
            } else {
                $error = $3;
                die;
            }
        }egx;
    };
    if ($@) {
        return (undef, $error);
    } else {
        return ($string, undef);
    }
}

sub parse_wildcard {
    my ($regex_src) = @_;
    my ($error);
    $regex_src =~ s,^\./+,,;
    $regex_src =~ s,//+,/,g;
    if ($regex_src eq '*') {
        return ('', WC_TYPE_DECENDANTS, undef);
    }
    if (index($regex_src, '?') == -1) {
        my $star_index = index($regex_src, '*');
        my $bslash_index = index($regex_src, '\\');
        if ($star_index == -1) {
            # Regular file-match, dequote "\\" if any and stop here.
            if ($bslash_index > -1) {
                ($regex_src, $error) = dequote_backslashes($regex_src);
            }
            return ($regex_src, WC_TYPE_FILE, $error);
        }
        if (length($regex_src) - 1 == $star_index
            and substr($regex_src, -2) eq '/*') {
            # Files: some-dir/*
            $regex_src = substr($regex_src, 0, -1);
            if ($bslash_index > -1) {
                ($regex_src, $error) = dequote_backslashes($regex_src);
            }
            return ($regex_src, WC_TYPE_DECENDANTS, $error);
        }
    }

    eval {
        $regex_src =~ s{
            (\*) |
            (\?) |
            ([^*?\\]+) |
            (\\[\\*?]) |
            (.+)
        }{
            if (defined $1) {
                '.*';
            } elsif (defined $2) {
                '.'
            } elsif (defined $3) {
                quotemeta($3);
            } elsif (defined $4) {
                $4;
            } else {
                $error = $5;
                die;
            }
        }egx;
    };
    if ($@) {
        return (undef, undef, $error);
    } else {
        return (qr/^(?:$regex_src)$/, WC_TYPE_REGEX, undef);
    }
}

sub get_all_files {
    my ($info) = @_;
    return grep { $_->is_file } $info->sorted_index;
}

sub check_files_excluded {
    my ($info, $excluded) = @_;
    my @files = grep { $_->is_file } $info->sorted_orig_index;
    my @wildcards = split /[\n\t ]+/, $excluded;
    for my $wildcard (@wildcards) {
        $wildcard =~ s/^\s+|\s+$//g;
        if ($wildcard eq '') {
            next;
        }
        my ($wc_value, $wc_type, $wildcard_error)= parse_wildcard($wildcard);
        if (defined $wildcard_error) {
            tag 'invalid-escape-sequence-in-dep5-copyright', $wildcard_error;
            next;
        }
        if ($wc_type eq WC_TYPE_FILE) {
            # Also match "dir/filename" for "Files-Excluded: dir"
            $wc_value = qr/^${wc_value}(?:\/|$)/;
        }
        for my $srcfile (@files) {
            next if $srcfile =~ m/^(?:debian|\.pc)\//;
            tag 'source-includes-file-in-files-excluded', $srcfile
              if $srcfile =~ qr/^$wc_value/;
        }
    }

    return;
}

1;

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et