class Sidekiq::JobRetry
end
sidekiq_options retry: 10, retry_queue: ‘low’
include Sidekiq::Job
class MyJob
a low priority queue with:
or limit the number of retries for a particular job and send retries to<br><br>Sidekiq.default_configuration = 7
You can change the default maximum number of retries in your initializer:
The default number of retries is 25 which works out to about 3 weeks
to the job and everyone is using an error service, right?
We don’t store the backtrace by default as that can add a lot of overhead
* ‘backtrace’ - the number of lines of error backtrace to store
* ‘retried_at’ - the last time it was retried
* ‘failed_at’ - the first time it failed
* ‘error_class’ - the exception class
* ‘error_message’ - the message from the exception
* ‘retry_count’ - number of times we’ve retried so far.
* ‘retry_queue’ - if job retries should be pushed to a different (e.g. lower priority) queue
* ‘queue’ - the queue for the initial job
Relevant options for job retries:
‘true’, Sidekiq retries 25 times)
The job will be retried this number of times before giving up. (If simply
{ ‘class’ => ‘HardJob’, ‘args’ => [1, 2, ‘foo’], ‘retry’ => 5 }
The ‘retry’ option also accepts a number (in place of ‘true’):
{ ‘class’ => ‘HardJob’, ‘args’ => [1, 2, ‘foo’], ‘retry’ => true }
A job looks like:
5. After 6 months on the DJQ, Sidekiq will discard the job.
manually in the Web UI.
job to the Dead Job Queue (aka morgue) where it must be dealt with
4. Once retries are exhausted, Sidekiq will give up and move the
reprocessed successfully.
3. After a few days, a developer deploys a fix. The job is
an exponential delay, the job continues to fail.
2. Sidekiq retries jobs in the retry queue multiple times with
the job and pushes it onto a retry queue.
1. Bug causes job processing to fail, Sidekiq’s middleware captures
0. Push some code changes with a bug in it.
Sidekiq’s retry support assumes a typical development lifecycle:
Automatically retry jobs that fail in Sidekiq.
#
def compress_backtrace(backtrace)
def compress_backtrace(backtrace) serialized = Sidekiq.dump_json(backtrace) compressed = Zlib::Deflate.deflate(serialized) Base64.encode64(compressed) end
def delay_for(jobinst, count, exception, msg)
def delay_for(jobinst, count, exception, msg) rv = begin # sidekiq_retry_in can return two different things: # 1. When to retry next, as an integer of seconds # 2. A symbol which re-routes the job elsewhere, e.g. :discard, :kill, :default jobinst&.sidekiq_retry_in_block&.call(count, exception, msg) rescue Exception => e handle_exception(e, {context: "Failure scheduling retry using the defined `sidekiq_retry_in` in #{jobinst.class.name}, falling back to default"}) nil end rv = rv.to_i if rv.respond_to?(:to_i) delay = (count**4) + 15 if Integer === rv && rv > 0 delay = rv elsif rv == :discard return [:discard, nil] # do nothing, job goes poof elsif rv == :kill return [:kill, nil] end [:default, delay] end
def exception_caused_by_shutdown?(e, checked_causes = [])
def exception_caused_by_shutdown?(e, checked_causes = []) return false unless e.cause # Handle circular causes checked_causes << e.object_id return false if checked_causes.include?(e.cause.object_id) e.cause.instance_of?(Sidekiq::Shutdown) || exception_caused_by_shutdown?(e.cause, checked_causes) end
def exception_message(exception)
Extract message from exception.
def exception_message(exception) # App code can stuff all sorts of crazy binary data into the error message # that won't convert to JSON. exception.message.to_s[0, 10_000] rescue +"!!! ERROR MESSAGE THREW AN ERROR !!!" end
def global(jobstr, queue)
We want to be able to retry as much as possible so we don't
The global retry handler requires only the barest of data.
def global(jobstr, queue) yield rescue Handled => ex raise ex rescue Sidekiq::Shutdown => ey # ignore, will be pushed back onto queue during hard_shutdown raise ey rescue Exception => e # ignore, will be pushed back onto queue during hard_shutdown raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e) msg = Sidekiq.load_json(jobstr) if msg["retry"] process_retry(nil, msg, queue, e) else @capsule.config.death_handlers.each do |handler| handler.call(msg, e) rescue => handler_ex handle_exception(handler_ex, {context: "Error calling death handler", job: msg}) end end raise Handled end
def initialize(capsule)
def initialize(capsule) @config = @capsule = capsule @max_retries = Sidekiq.default_configuration[:max_retries] || DEFAULT_MAX_RETRY_ATTEMPTS @backtrace_cleaner = Sidekiq.default_configuration[:backtrace_cleaner] end
def local(jobinst, jobstr, queue)
Skip exception is unwrapped within Sidekiq::Processor#process before
exception so the global block does not reprocess the error. The
Note that any exception from the block is wrapped in the Skip
This is required to support the `sidekiq_retries_exhausted` block.
this block can be associated with the given job instance.
The local retry support means that any errors that occur within
def local(jobinst, jobstr, queue) yield rescue Handled => ex raise ex rescue Sidekiq::Shutdown => ey # ignore, will be pushed back onto queue during hard_shutdown raise ey rescue Exception => e # ignore, will be pushed back onto queue during hard_shutdown raise Sidekiq::Shutdown if exception_caused_by_shutdown?(e) msg = Sidekiq.load_json(jobstr) if msg["retry"].nil? msg["retry"] = jobinst.class.get_sidekiq_options["retry"] end raise e unless msg["retry"] process_retry(jobinst, msg, queue, e) # We've handled this error associated with this job, don't # need to handle it at the global level raise Skip end
def process_retry(jobinst, msg, queue, exception)
instantiate the job instance. All access must be guarded and
Note that +jobinst+ can be nil here if an error is raised before we can
def process_retry(jobinst, msg, queue, exception) max_retry_attempts = retry_attempts_from(msg["retry"], @max_retries) msg["queue"] = (msg["retry_queue"] || queue) m = exception_message(exception) if m.respond_to?(:scrub!) m.force_encoding("utf-8") m.scrub! end msg["error_message"] = m msg["error_class"] = exception.class.name count = if msg["retry_count"] msg["retried_at"] = Time.now.to_f msg["retry_count"] += 1 else msg["failed_at"] = Time.now.to_f msg["retry_count"] = 0 end if msg["backtrace"] backtrace = @backtrace_cleaner.call(exception.backtrace) lines = if msg["backtrace"] == true backtrace else backtrace[0...msg["backtrace"].to_i] end msg["error_backtrace"] = compress_backtrace(lines) end # Goodbye dear message, you (re)tried your best I'm sure. return retries_exhausted(jobinst, msg, exception) if count >= max_retry_attempts strategy, delay = delay_for(jobinst, count, exception, msg) case strategy when :discard return # poof! when :kill return retries_exhausted(jobinst, msg, exception) end # Logging here can break retries if the logging device raises ENOSPC #3979 # logger.debug { "Failure! Retry #{count} in #{delay} seconds" } jitter = rand(10) * (count + 1) retry_at = Time.now.to_f + delay + jitter payload = Sidekiq.dump_json(msg) redis do |conn| conn.zadd("retry", retry_at.to_s, payload) end end
def retries_exhausted(jobinst, msg, exception)
def retries_exhausted(jobinst, msg, exception) begin block = jobinst&.sidekiq_retries_exhausted_block block&.call(msg, exception) rescue => e handle_exception(e, {context: "Error calling retries_exhausted", job: msg}) end send_to_morgue(msg) unless msg["dead"] == false @capsule.config.death_handlers.each do |handler| handler.call(msg, exception) rescue => e handle_exception(e, {context: "Error calling death handler", job: msg}) end end
def retry_attempts_from(msg_retry, default)
def retry_attempts_from(msg_retry, default) if msg_retry.is_a?(Integer) msg_retry else default end end
def send_to_morgue(msg)
def send_to_morgue(msg) logger.info { "Adding dead #{msg["class"]} job #{msg["jid"]}" } payload = Sidekiq.dump_json(msg) now = Time.now.to_f redis do |conn| conn.multi do |xa| xa.zadd("dead", now.to_s, payload) xa.zremrangebyscore("dead", "-inf", now - @capsule.config[:dead_timeout_in_seconds]) xa.zremrangebyrank("dead", 0, - @capsule.config[:dead_max_jobs]) end end end