lib/dependabot/git_metadata_fetcher.rb



# typed: strict
# frozen_string_literal: true

require "excon"
require "open3"
require "ostruct"
require "sorbet-runtime"
require "tmpdir"
require "dependabot/errors"
require "dependabot/git_ref"
require "dependabot/git_tag_with_detail"
require "dependabot/credential"

module Dependabot
  class GitMetadataFetcher
    extend T::Sig

    KNOWN_HOSTS = /github\.com|bitbucket\.org|gitlab.com/i

    sig do
      params(
        url: String,
        credentials: T::Array[Dependabot::Credential]
      )
        .void
    end
    def initialize(url:, credentials:)
      @url = url
      @credentials = credentials
    end

    sig { returns(T.nilable(String)) }
    def upload_pack
      @upload_pack ||= T.let(fetch_upload_pack_for(url), T.nilable(String))
    rescue Octokit::ClientError
      raise Dependabot::GitDependenciesNotReachable, [url]
    end

    sig { returns(T::Array[GitRef]) }
    def tags
      return [] unless upload_pack

      @tags ||= T.let(
        tags_for_upload_pack.map do |ref|
          GitRef.new(
            name: ref.name,
            tag_sha: ref.ref_sha,
            commit_sha: ref.commit_sha
          )
        end,
        T.nilable(T::Array[GitRef])
      )
    end

    sig { returns(T::Array[GitRef]) }
    def tags_for_upload_pack
      @tags_for_upload_pack ||= T.let(
        refs_for_upload_pack.select { |ref| ref.ref_type == RefType::Tag },
        T.nilable(T::Array[GitRef])
      )
    end

    sig { returns(T::Array[GitRef]) }
    def refs_for_upload_pack
      @refs_for_upload_pack ||= T.let(parse_refs_for_upload_pack, T.nilable(T::Array[GitRef]))
    end

    sig { returns(T::Array[String]) }
    def ref_names
      refs_for_upload_pack.map(&:name)
    end

    sig { params(ref: String).returns(T.nilable(String)) }
    def head_commit_for_ref(ref)
      if ref == "HEAD"
        # Remove the opening clause of the upload pack as this isn't always
        # followed by a line break. When it isn't (e.g., with Bitbucket) it
        # causes problems for our `sha_for_update_pack_line` logic. The format
        # of this opening clause is documented at
        # https://git-scm.com/docs/http-protocol#_smart_server_response
        line = T.must(upload_pack).gsub(/^[0-9a-f]{4}# service=git-upload-pack/, "")
                .lines.find { |l| l.include?(" HEAD") }
        return sha_for_update_pack_line(line) if line
      end

      refs_for_upload_pack
        .find { |r| r.name == ref }
        &.commit_sha
    end

    sig { params(ref: String).returns(T.nilable(String)) }
    def head_commit_for_ref_sha(ref)
      refs_for_upload_pack
        .find { |r| r.ref_sha == ref }
        &.commit_sha
    end

    sig { returns(T::Array[GitTagWithDetail]) }
    def refs_for_tag_with_detail
      @refs_for_tag_with_detail ||= T.let(parse_refs_for_tag_with_detail,
                                          T.nilable(T::Array[GitTagWithDetail]))
    end

    sig { returns(T::Array[GitTagWithDetail]) }
    def parse_refs_for_tag_with_detail
      result_lines = []
      return result_lines if upload_tag_with_detail.nil?

      T.must(upload_tag_with_detail).lines.each do |line|
        tag, detail = line.split(/\s+/, 2)
        next unless tag && detail

        result_lines << GitTagWithDetail.new(
          tag: tag.strip,
          release_date: detail.strip
        )
      end
      result_lines
    end

    sig { params(uri: String).returns(String) }
    def fetch_tags_with_detail(uri)
      response_with_git = fetch_tags_with_detail_from_git_for(uri)
      return response_with_git.body if response_with_git.status == 200

      raise Dependabot::GitDependenciesNotReachable, [uri] unless uri.match?(KNOWN_HOSTS)

      if response_with_git.status < 400
        raise "Unexpected response: #{response_with_git.status} - #{response_with_git.body}"
      end

      if uri.match?(/github\.com/i)
        response = response_with_git.data
        response[:response_headers] = response[:headers] unless response.nil?
        raise Octokit::Error.from_response(response)
      end

      raise "Server error at #{uri}: #{response_with_git.body}" if response_with_git.status >= 500

      raise Dependabot::GitDependenciesNotReachable, [uri]
    rescue Excon::Error::Socket, Excon::Error::Timeout
      raise if uri.match?(KNOWN_HOSTS)

      raise Dependabot::GitDependenciesNotReachable, [uri]
    end

    sig { params(ref: String).returns(Excon::Response) }
    def ref_details_for_pinned_ref(ref)
      Dependabot::RegistryClient.get(url: provider_url(ref))
    end

    private

    sig { returns(String) }
    attr_reader :url

    sig { returns(T::Array[Dependabot::Credential]) }
    attr_reader :credentials

    sig { params(uri: String).returns(String) }
    def fetch_upload_pack_for(uri)
      response = fetch_raw_upload_pack_for(uri)
      return response.body if response.status == 200

      response_with_git = fetch_raw_upload_pack_with_git_for(uri)
      return response_with_git.body if response_with_git.status == 200

      raise Dependabot::GitDependenciesNotReachable, [uri] unless uri.match?(KNOWN_HOSTS)

      raise "Unexpected response: #{response.status} - #{response.body}" if response.status < 400

      if uri.match?(/github\.com/i)
        response = response.data
        response[:response_headers] = response[:headers]
        raise Octokit::Error.from_response(response)
      end

      raise "Server error at #{uri}: #{response.body}" if response.status >= 500

      raise Dependabot::GitDependenciesNotReachable, [uri]
    rescue Excon::Error::Socket, Excon::Error::Timeout
      raise if uri.match?(KNOWN_HOSTS)

      raise Dependabot::GitDependenciesNotReachable, [uri]
    end

    sig { params(uri: String).returns(Excon::Response) }
    def fetch_raw_upload_pack_for(uri)
      url = service_pack_uri(uri)
      url = url.rpartition("@").tap { |a| a.first.gsub!("@", "%40") }.join
      Excon.get(
        url,
        idempotent: true,
        **excon_defaults
      )
    end

    sig { params(uri: String).returns(T.untyped) }
    def fetch_raw_upload_pack_with_git_for(uri)
      service_pack_uri = uri
      service_pack_uri += ".git" unless service_pack_uri.end_with?(".git") || skip_git_suffix(uri)

      env = { "PATH" => ENV.fetch("PATH", nil), "GIT_TERMINAL_PROMPT" => "0" }
      command = "git ls-remote #{service_pack_uri}"
      command = SharedHelpers.escape_command(command)

      begin
        stdout, stderr, process = Open3.capture3(env, command)
        # package the command response like a HTTP response so error handling remains unchanged
      rescue Errno::ENOENT => e # thrown when `git` isn't installed...
        OpenStruct.new(body: e.message, status: 500)
      else
        if process.success?
          OpenStruct.new(body: stdout, status: 200)
        else
          OpenStruct.new(body: stderr, status: 500)
        end
      end
    end

    sig { returns(T::Array[GitRef]) }
    def parse_refs_for_upload_pack
      peeled_lines = []

      result = T.must(upload_pack).lines.each_with_object({}) do |line, res|
        full_ref_name = T.must(line.split.last)
        next unless full_ref_name.start_with?("refs/tags", "refs/heads")

        (peeled_lines << line) && next if line.strip.end_with?("^{}")

        ref_name = full_ref_name.sub(%r{^refs/(tags|heads)/}, "").strip
        sha = sha_for_update_pack_line(line)

        res[ref_name] = GitRef.new(
          name: ref_name,
          ref_sha: sha,
          ref_type: full_ref_name.start_with?("refs/tags") ? RefType::Tag : RefType::Head,
          commit_sha: sha
        )
      end

      # Loop through the peeled lines, updating the commit_sha for any
      # matching tags in our results hash
      peeled_lines.each do |line|
        ref_name = line.split(%r{ refs/(tags|heads)/})
                       .last.strip.gsub(/\^{}$/, "")
        next unless result[ref_name]

        result[ref_name].commit_sha = sha_for_update_pack_line(line)
      end

      result.values
    end

    sig { params(uri: String).returns(String) }
    def service_pack_uri(uri)
      uri = uri_sanitize(uri)
      service_pack_uri = uri_with_auth(uri)
      service_pack_uri = service_pack_uri.gsub(%r{/$}, "")
      service_pack_uri += ".git" unless service_pack_uri.end_with?(".git") || skip_git_suffix(uri)
      service_pack_uri + "/info/refs?service=git-upload-pack"
    end

    sig { params(uri: String).returns(T::Boolean) }
    def skip_git_suffix(uri)
      # TODO: Unlike the other providers (GitHub, GitLab, BitBucket), as of 2023-01-18 Azure DevOps does not support the
      # ".git" suffix. It will return a 404.
      # So skip adding ".git" if looks like an ADO URI.
      # There's no access to the source object here, so have to check the URI instead.
      # Even if we had the current source object, the URI may be for a dependency hosted elsewhere.
      # Unfortunately as a consequence, urls pointing to Azure DevOps Server will not work.
      # Only alternative is to remove the addition of ".git" suffix since the other providers
      # (GitHub, GitLab, BitBucket) work with or without the suffix.
      # That change has other ramifications, so it'd be better if Azure started supporting ".git"
      # like all the other providers.
      uri = uri_sanitize(uri)
      uri = SharedHelpers.scp_to_standard(uri)
      uri = URI(uri)
      hostname = uri.hostname.to_s
      hostname == "dev.azure.com" || hostname.end_with?(".visualstudio.com")
    end

    # Add in username and password if present in credentials.
    # Credentials are never present for production Dependabot.
    sig { params(uri: String).returns(String) }
    def uri_with_auth(uri)
      uri = SharedHelpers.scp_to_standard(uri)
      uri = URI(uri)
      cred = credentials.select { |c| c["type"] == "git_source" }
                        .find { |c| uri.host == c["host"] }

      uri.scheme = "https" if uri.scheme != "http"

      if !uri.password && cred&.fetch("username", nil) && cred.fetch("password", nil)
        # URI doesn't have authentication details, but we have credentials
        uri.user = URI.encode_www_form_component(cred["username"])
        uri.password = URI.encode_www_form_component(cred["password"])
      end

      uri.to_s
    end

    sig { params(uri: String).returns(String) }
    def uri_sanitize(uri)
      uri = uri.strip
      uri.to_s
    end

    sig { params(line: String).returns(String) }
    def sha_for_update_pack_line(line)
      T.must(line.split.first).chars.last(40).join
    end

    sig { returns(T::Hash[Symbol, T.untyped]) }
    def excon_defaults
      # Some git hosts are slow when returning a large number of tags
      SharedHelpers.excon_defaults(read_timeout: 20)
    end

    sig { returns(T.nilable(String)) }
    def upload_tag_with_detail
      @upload_tag_detail ||= T.let(fetch_tags_with_detail(url), T.nilable(String))
    rescue Octokit::ClientError
      raise Dependabot::GitDependenciesNotReachable, [url]
    end

    # Added method to fetch tags with their creation dates from a git repository. In case
    # private registry is used, it will clone the repository and fetch tags with their creation dates.
    sig { params(uri: String).returns(T.untyped) }
    def fetch_tags_with_detail_from_git_for(uri)
      uri_ending_with_git = uri
      uri_ending_with_git += ".git" unless uri_ending_with_git.end_with?(".git") || skip_git_suffix(uri)

      Dir.mktmpdir do |dir|
        # Clone the repository into a temporary directory
        clone_command = "git clone --bare #{uri_ending_with_git} #{dir}"
        env = { "PATH" => ENV.fetch("PATH", nil), "GIT_TERMINAL_PROMPT" => "0" }
        clone_command = SharedHelpers.escape_command(clone_command)

        _stdout, stderr, process = Open3.capture3(env, clone_command)
        return OpenStruct.new(body: stderr, status: 500) unless process.success?

        # Change to the cloned repository directory
        Dir.chdir(dir) do
          # Fetch tags and their creation dates
          tags_command = 'git for-each-ref --format="%(refname:short) %(creatordate:short)" refs/tags'
          tags_stdout, stderr, process = Open3.capture3(env, tags_command)

          return OpenStruct.new(body: stderr, status: 500) unless process.success?

          # Parse and sort tags by creation date
          tags = tags_stdout.lines.map do |line|
            tag, date = line.strip.split(" ", 2)
            { tag: tag, date: date }
          end
          sorted_tags = tags.sort_by { |tag| tag[:date] }

          # Format the output as a string
          formatted_output = sorted_tags.map { |tag| "#{tag[:tag]} #{tag[:date]}" }.join("\n")
          return OpenStruct.new(body: formatted_output, status: 200)
        end
      end
    rescue Errno::ENOENT => e # Thrown when `git` isn't installed
      OpenStruct.new(body: e.message, status: 500)
    end

    sig do
      params(ref: String).returns(String)
    end
    def provider_url(ref)
      provider_url = url.gsub(/\.git$/, "")

      api_url = {
        github: provider_url.gsub("github.com", "api.github.com/repos")
      }.freeze

      "#{api_url[:github]}/commits?per_page=100&sha=#{ref}"
    end
  end
end