class Mindee::Input::Source::UrlInputSource
Load a remote document from a file url.
def as_local_input_source(filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
-
(BytesInputSource)
- The full path of the saved file.
Parameters:
-
max_redirects
(Integer
) -- Maximum amount of redirects to follow. -
token
(String, nil
) -- Optional token for JWT-based authentication. -
password
(String, nil
) -- Optional password for authentication. -
username
(String, nil
) -- Optional username for authentication. -
filename
(String, nil
) -- Optional name to give to the file.
def as_local_input_source(filename: nil, username: nil, password: nil, token: nil, max_redirects: 3) filename = fill_filename(filename) response_body = fetch_file_content(username: username, password: password, token: token, max_redirects: max_redirects) bytes = StringIO.new(response_body) BytesInputSource.new(bytes.read, filename) end
def extract_filename_from_url(uri)
def extract_filename_from_url(uri) filename = File.basename(uri.path) filename.empty? ? '' : filename end
def fetch_file_content(username: nil, password: nil, token: nil, max_redirects: 3)
-
(String)
- The downloaded file content.
Parameters:
-
max_redirects
(Integer
) -- Maximum amount of redirects to follow. -
token
(String, nil
) -- Optional token for JWT-based authentication. -
password
(String, nil
) -- Optional password for authentication. -
username
(String, nil
) -- Optional username for authentication.
def fetch_file_content(username: nil, password: nil, token: nil, max_redirects: 3) uri = URI.parse(@url) request = Net::HTTP::Get.new(uri) request['Authorization'] = "Bearer #{token}" if token request.basic_auth(username, password) if username && password response = make_request(uri, request, max_redirects) if response.code.to_i > 299 raise "Failed to download file: HTTP status code #{response.code}" elsif response.code.to_i < 200 raise "Failed to download file: Invalid response code #{response.code}." end response.body end
def fill_filename(filename)
def fill_filename(filename) filename ||= extract_filename_from_url(URI.parse(@url)) if filename.empty? || File.extname(filename).empty? filename = generate_file_name(extension: get_file_extension(filename)) end filename end
def generate_file_name(extension: nil)
def generate_file_name(extension: nil) extension ||= '.tmp' random_string = Array.new(8) { rand(36).to_s(36) }.join "mindee_temp_#{Time.now.strftime('%Y-%m-%d_%H-%M-%S')}_#{random_string}#{extension}" end
def get_file_extension(filename)
def get_file_extension(filename) ext = File.extname(filename) ext.empty? ? nil : ext.downcase end
def initialize(url)
def initialize(url) raise 'URL must be HTTPS' unless url.start_with? 'https://' @url = url end
def make_request(uri, request, max_redirects)
def make_request(uri, request, max_redirects) Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| response = http.request(request) if response.is_a?(Net::HTTPRedirection) && max_redirects.positive? location = response['location'] raise 'No location in redirection header.' if location.nil? new_uri = URI.parse(location) request = Net::HTTP::Get.new(new_uri) make_request(new_uri, request, max_redirects - 1) else response end end end
def save_to_file(path, filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
-
(String)
- The full path of the saved file.
Parameters:
-
max_redirects
(Integer
) -- Maximum amount of redirects to follow. -
token
(String, nil
) -- Optional token for JWT-based authentication. -
password
(String, nil
) -- Optional password for authentication. -
username
(String, nil
) -- Optional username for authentication. -
filename
(String, nil
) -- Optional name to give to the file. -
path
(String
) -- Path to save the file to.
def save_to_file(path, filename: nil, username: nil, password: nil, token: nil, max_redirects: 3) response_body = fetch_file_content(username: username, password: password, token: token, max_redirects: max_redirects) filename = fill_filename(filename) full_path = File.join(path.chomp('/'), filename) File.write(full_path, response_body) full_path end