# frozen_string_literal: true
module Asciidoctor
# Public: Handles all operations for resolving, cleaning and joining paths.
# This class includes operations for handling both web paths (request URIs) and
# system paths.
#
# The main emphasis of the class is on creating clean and secure paths. Clean
# paths are void of duplicate parent and current directory references in the
# path name. Secure paths are paths which are restricted from accessing
# directories outside of a jail path, if specified.
#
# Since joining two paths can result in an insecure path, this class also
# handles the task of joining a parent (start) and child (target) path.
#
# This class makes no use of path utilities from the Ruby libraries. Instead,
# it handles all aspects of path manipulation. The main benefit of
# internalizing these operations is that the class is able to handle both posix
# and windows paths independent of the operating system on which it runs. This
# makes the class both deterministic and easier to test.
#
# Examples
#
# resolver = PathResolver.new
#
# # Web Paths
#
# resolver.web_path('images')
# => 'images'
#
# resolver.web_path('./images')
# => './images'
#
# resolver.web_path('/images')
# => '/images'
#
# resolver.web_path('./images/../assets/images')
# => './assets/images'
#
# resolver.web_path('/../images')
# => '/images'
#
# resolver.web_path('images', 'assets')
# => 'assets/images'
#
# resolver.web_path('tiger.png', '../assets/images')
# => '../assets/images/tiger.png'
#
# # System Paths
#
# resolver.working_dir
# => '/path/to/docs'
#
# resolver.system_path('images')
# => '/path/to/docs/images'
#
# resolver.system_path('../images')
# => '/path/to/images'
#
# resolver.system_path('/etc/images')
# => '/etc/images'
#
# resolver.system_path('images', '/etc')
# => '/etc/images'
#
# resolver.system_path('', '/etc/images')
# => '/etc/images'
#
# resolver.system_path(nil, nil, '/path/to/docs')
# => '/path/to/docs'
#
# resolver.system_path('..', nil, '/path/to/docs')
# => '/path/to/docs'
#
# resolver.system_path('../../../css', nil, '/path/to/docs')
# => '/path/to/docs/css'
#
# resolver.system_path('../../../css', '../../..', '/path/to/docs')
# => '/path/to/docs/css'
#
# resolver.system_path('..', 'C:\\data\\docs\\assets', 'C:\\data\\docs')
# => 'C:/data/docs'
#
# resolver.system_path('..\\..\\css', 'C:\\data\\docs\\assets', 'C:\\data\\docs')
# => 'C:/data/docs/css'
#
# begin
# resolver.system_path('../../../css', '../../..', '/path/to/docs', recover: false)
# rescue SecurityError => e
# puts e.message
# end
# => 'path ../../../../../../css refers to location outside jail: /path/to/docs (disallowed in safe mode)'
#
# resolver.system_path('/path/to/docs/images', nil, '/path/to/docs')
# => '/path/to/docs/images'
#
# begin
# resolver.system_path('images', '/etc', '/path/to/docs', recover: false)
# rescue SecurityError => e
# puts e.message
# end
# => start path /etc is outside of jail: /path/to/docs'
#
class PathResolver
include Logging
DOT = '.'
DOT_DOT = '..'
DOT_SLASH = './'
SLASH = '/'
BACKSLASH = '\\'
DOUBLE_SLASH = '//'
WindowsRootRx = /^(?:[a-zA-Z]:)?[\\\/]/
attr_accessor :file_separator
attr_accessor :working_dir
# Public: Construct a new instance of PathResolver, optionally specifying the
# file separator (to override the system default) and the working directory
# (to override the present working directory). The working directory will be
# expanded to an absolute path inside the constructor.
#
# file_separator - the String file separator to use for path operations
# (optional, default: File::ALT_SEPARATOR or File::SEPARATOR)
# working_dir - the String working directory (optional, default: Dir.pwd)
#
def initialize file_separator = nil, working_dir = nil
@file_separator = file_separator || ::File::ALT_SEPARATOR || ::File::SEPARATOR
@working_dir = working_dir ? ((root? working_dir) ? (posixify working_dir) : (::File.expand_path working_dir)) : ::Dir.pwd
@_partition_path_sys = {}
@_partition_path_web = {}
end
# Public: Check whether the specified path is an absolute path.
#
# This operation considers both posix paths and Windows paths. The path does
# not have to be posixified beforehand. This operation does not handle URIs.
#
# Unix absolute paths start with a slash. UNC paths can start with a slash or
# backslash. Windows roots can start with a drive letter.
#
# path - the String path to check
#
# returns a Boolean indicating whether the path is an absolute root path
def absolute_path? path
(path.start_with? SLASH) || (@file_separator == BACKSLASH && (WindowsRootRx.match? path))
end
# Public: Check if the specified path is an absolute root path (or, in the
# browser environment, an absolute URI as well)
#
# This operation considers both posix paths and Windows paths. If the JavaScript IO
# module is xmlhttprequest, this operation also considers absolute URIs.
#
# Unix absolute paths and UNC paths start with slash. Windows roots can
# start with a drive letter. When the IO module is xmlhttprequest (Opal
# runtime only), an absolute (qualified) URI (starts with file://, http://,
# or https://) is also considered to be an absolute path.
#
# path - the String path to check
#
# returns a Boolean indicating whether the path is an absolute root path (or
# an absolute URI when the JavaScript IO module is xmlhttprequest)
if RUBY_ENGINE == 'opal' && ::JAVASCRIPT_IO_MODULE == 'xmlhttprequest'
def root? path
(absolute_path? path) || (path.start_with? 'file://', 'http://', 'https://')
end
else
alias root? absolute_path?
end
# Public: Determine if the path is a UNC (root) path
#
# path - the String path to check
#
# returns a Boolean indicating whether the path is a UNC path
def unc? path
path.start_with? DOUBLE_SLASH
end
# Public: Determine if the path is an absolute (root) web path
#
# path - the String path to check
#
# returns a Boolean indicating whether the path is an absolute (root) web path
def web_root? path
path.start_with? SLASH
end
# Public: Determine whether path descends from base.
#
# If path equals base, or base is a parent of path, return true.
#
# path - The String path to check. Can be relative.
# base - The String base path to check against. Can be relative.
#
# returns If path descends from base, return the offset, otherwise false.
def descends_from? path, base
if base == path
0
elsif base == SLASH
(path.start_with? SLASH) && 1
else
(path.start_with? base + SLASH) && (base.length + 1)
end
end
# Public: Calculate the relative path to this absolute path from the specified base directory
#
# If neither path or base are absolute paths, the path is not contained
# within the base directory, or the relative path cannot be computed, the
# original path is returned work is done.
#
# path - [String] an absolute filename.
# base - [String] an absolute base directory.
#
# Return the [String] relative path of the specified path calculated from the base directory.
def relative_path path, base
if root? path
if (offset = descends_from? path, base)
path.slice offset, path.length
else
begin
(Pathname.new path).relative_path_from(Pathname.new base).to_s
rescue
path
end
end
else
path
end
end
# Public: Normalize path by converting any backslashes to forward slashes
#
# path - the String path to normalize
#
# returns a String path with any backslashes replaced with forward slashes
def posixify path
if path
@file_separator == BACKSLASH && (path.include? BACKSLASH) ? (path.tr BACKSLASH, SLASH) : path
else
''
end
end
alias posixfy posixify
# Public: Expand the specified path by converting the path to a posix path, resolving parent
# references (..), and removing self references (.).
#
# path - the String path to expand
#
# returns a String path as a posix path with parent references resolved and self references removed.
# The result will be relative if the path is relative and absolute if the path is absolute.
def expand_path path
path_segments, path_root = partition_path path
if path.include? DOT_DOT
resolved_segments = []
path_segments.each do |segment|
segment == DOT_DOT ? resolved_segments.pop : resolved_segments << segment
end
join_path resolved_segments, path_root
else
join_path path_segments, path_root
end
end
# Public: Partition the path into path segments and remove self references (.) and the trailing
# slash, if present. Prior to being partitioned, the path is converted to a posix path.
#
# Parent references are not resolved by this method since the consumer often needs to handle this
# resolution in a certain context (checking for the breach of a jail, for instance).
#
# path - the String path to partition
# web - a Boolean indicating whether the path should be handled
# as a web path (optional, default: false)
#
# Returns a 2-item Array containing the Array of String path segments and the
# path root (e.g., '/', './', 'c:/', or '//'), which is nil unless the path is absolute.
def partition_path path, web = nil
if (result = (cache = web ? @_partition_path_web : @_partition_path_sys)[path])
return result
end
posix_path = posixify path
if web
# ex. /sample/path
if web_root? posix_path
root = SLASH
# ex. ./sample/path
elsif posix_path.start_with? DOT_SLASH
root = DOT_SLASH
# else ex. sample/path
end
elsif root? posix_path
# ex. //sample/path
if unc? posix_path
root = DOUBLE_SLASH
# ex. /sample/path
elsif posix_path.start_with? SLASH
root = SLASH
# ex. C:/sample/path (or file:///sample/path in browser environment)
else
root = posix_path.slice 0, (posix_path.index SLASH) + 1
end
# ex. ./sample/path
elsif posix_path.start_with? DOT_SLASH
root = DOT_SLASH
# else ex. sample/path
end
path_segments = (root ? (posix_path.slice root.length, posix_path.length) : posix_path).split SLASH
# strip out all dot entries
path_segments.delete DOT
cache[path] = [path_segments, root]
end
# Public: Join the segments using the posix file separator (since Ruby knows
# how to work with paths specified this way, regardless of OS). Use the root,
# if specified, to construct an absolute path. Otherwise join the segments as
# a relative path.
#
# segments - a String Array of path segments
# root - a String path root (optional, default: nil)
#
# returns a String path formed by joining the segments using the posix file
# separator and prepending the root, if specified
def join_path segments, root = nil
root ? %(#{root}#{segments.join SLASH}) : (segments.join SLASH)
end
# Public: Securely resolve a system path
#
# Resolve a system path from the target relative to the start path, jail path, or working
# directory (specified in the constructor), in that order. If a jail path is specified, enforce
# that the resolved path descends from the jail path. If a jail path is not provided, the resolved
# path may be any location on the system. If the resolved path is absolute, use it as is (unless
# it breaches the jail path). Expand all parent and self references in the resolved path.
#
# target - the String target path
# start - the String start path from which to resolve a relative target; falls back to jail, if
# specified, or the working directory specified in the constructor (default: nil)
# jail - the String jail path to which to confine the resolved path, if specified; must be an
# absolute path (default: nil)
# opts - an optional Hash of options to control processing (default: {}):
# * :recover is used to control whether the processor should
# automatically recover when an illegal path is encountered
# * :target_name is used in messages to refer to the path being resolved
#
# returns a String path relative to the start path, if specified, and confined to the jail path,
# if specified. The path is posixified and all parent and self references in the path are expanded.
def system_path target, start = nil, jail = nil, opts = {}
if jail
raise ::SecurityError, %(Jail is not an absolute path: #{jail}) unless root? jail
#raise ::SecurityError, %(Jail is not a canonical path: #{jail}) if jail.include? DOT_DOT
jail = posixify jail
end
if target
if root? target
target_path = expand_path target
if jail && !(descends_from? target_path, jail)
if opts.fetch :recover, true
logger.warn %(#{opts[:target_name] || 'path'} is outside of jail; recovering automatically)
target_segments, _ = partition_path target_path
jail_segments, jail_root = partition_path jail
return join_path jail_segments + target_segments, jail_root
else
raise ::SecurityError, %(#{opts[:target_name] || 'path'} #{target} is outside of jail: #{jail} (disallowed in safe mode))
end
end
return target_path
else
target_segments, _ = partition_path target
end
else
target_segments = []
end
if target_segments.empty?
if start.nil_or_empty?
return jail || @working_dir
elsif root? start
if jail
start = posixify start
else
return expand_path start
end
else
target_segments, _ = partition_path start
start = jail || @working_dir
end
elsif start.nil_or_empty?
start = jail || @working_dir
elsif root? start
start = posixify start if jail
else
#start = system_path start, jail, jail, opts
start = %(#{(jail || @working_dir).chomp '/'}/#{start})
end
# both jail and start have been posixified at this point if jail is set
if jail && (recheck = !(descends_from? start, jail)) && @file_separator == BACKSLASH
start_segments, start_root = partition_path start
jail_segments, jail_root = partition_path jail
if start_root != jail_root
if opts.fetch :recover, true
logger.warn %(start path for #{opts[:target_name] || 'path'} is outside of jail root; recovering automatically)
start_segments = jail_segments
recheck = false
else
raise ::SecurityError, %(start path for #{opts[:target_name] || 'path'} #{start} refers to location outside jail root: #{jail} (disallowed in safe mode))
end
end
else
start_segments, jail_root = partition_path start
end
if (resolved_segments = start_segments + target_segments).include? DOT_DOT
unresolved_segments, resolved_segments = resolved_segments, []
if jail
jail_segments, _ = partition_path jail unless jail_segments
warned = false
unresolved_segments.each do |segment|
if segment == DOT_DOT
if resolved_segments.size > jail_segments.size
resolved_segments.pop
elsif opts.fetch :recover, true
unless warned
logger.warn %(#{opts[:target_name] || 'path'} has illegal reference to ancestor of jail; recovering automatically)
warned = true
end
else
raise ::SecurityError, %(#{opts[:target_name] || 'path'} #{target} refers to location outside jail: #{jail} (disallowed in safe mode))
end
else
resolved_segments << segment
end
end
else
unresolved_segments.each do |segment|
segment == DOT_DOT ? resolved_segments.pop : resolved_segments << segment
end
end
end
if recheck
target_path = join_path resolved_segments, jail_root
if descends_from? target_path, jail
target_path
elsif opts.fetch :recover, true
logger.warn %(#{opts[:target_name] || 'path'} is outside of jail; recovering automatically)
jail_segments, _ = partition_path jail unless jail_segments
join_path jail_segments + target_segments, jail_root
else
raise ::SecurityError, %(#{opts[:target_name] || 'path'} #{target} is outside of jail: #{jail} (disallowed in safe mode))
end
else
join_path resolved_segments, jail_root
end
end
# Public: Resolve a web path from the target and start paths.
# The main function of this operation is to resolve any parent
# references and remove any self references.
#
# The target is assumed to be a path, not a qualified URI.
# That check should happen before this method is invoked.
#
# target - the String target path
# start - the String start (i.e., parent) path
#
# returns a String path that joins the target path with the
# start path with any parent references resolved and self
# references removed
def web_path target, start = nil
target = posixify target
start = posixify start
uri_prefix = nil
unless start.nil_or_empty? || (web_root? target)
target = (start.end_with? SLASH) ? %(#{start}#{target}) : %(#{start}#{SLASH}#{target})
if (uri_prefix = Helpers.uri_prefix target)
target = target[uri_prefix.length..-1]
end
end
# use this logic instead if we want to normalize target if it contains a URI
#unless web_root? target
# if preserve_uri_target && (uri_prefix = Helpers.uri_prefix target)
# target = target[uri_prefix.length..-1]
# elsif !start.nil_or_empty?
# target = %(#{start}#{SLASH}#{target})
# if (uri_prefix = Helpers.uri_prefix target)
# target = target[uri_prefix.length..-1]
# end
# end
#end
target_segments, target_root = partition_path target, true
resolved_segments = []
target_segments.each do |segment|
if segment == DOT_DOT
if resolved_segments.empty?
resolved_segments << segment unless target_root && target_root != DOT_SLASH
elsif resolved_segments[-1] == DOT_DOT
resolved_segments << segment
else
resolved_segments.pop
end
else
resolved_segments << segment
# checking for empty would eliminate repeating forward slashes
#resolved_segments << segment unless segment.empty?
end
end
if (resolved_path = join_path resolved_segments, target_root).include? ' '
resolved_path = resolved_path.gsub ' ', '%20'
end
uri_prefix ? %(#{uri_prefix}#{resolved_path}) : resolved_path
end
end
end