Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions app/jobs/blobs/compute_sha256_checksum_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Blobs
class ComputeSha256ChecksumJob < ApplicationJob
queue_as :default

def perform(blob_id)
blob = ActiveStorage::Blob.find_by(id: blob_id)
return unless blob
return if blob.sha256_checksum.present?

Sha256Calculator.new(blob).call
end
end
end
21 changes: 21 additions & 0 deletions app/models/concerns/active_storage_blob_sha256.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
module ActiveStorageBlobSha256
extend ActiveSupport::Concern

included do
after_create_commit :enqueue_sha256_checksum_calculation
end

# Returns the SHA256 digest of the blob's content, computing and
# persisting it synchronously if it has not yet been calculated.
def ensure_sha256_checksum
return sha256_checksum if sha256_checksum.present?

Blobs::Sha256Calculator.new(self).call
end

private

def enqueue_sha256_checksum_calculation
Blobs::ComputeSha256ChecksumJob.perform_later(id)
end
end
2 changes: 1 addition & 1 deletion app/services/beacons/manifest_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def build_file(topic, document)
id: blob.id,
filename: filename,
path: "providers/#{topic.provider_id}/topics/#{topic.id}/#{filename}",
checksum: blob.checksum,
checksum: "sha256:#{blob.ensure_sha256_checksum}",
size_bytes: blob.byte_size,
content_type: blob.content_type,
updated_at: blob.created_at.iso8601,
Expand Down
23 changes: 23 additions & 0 deletions app/services/blobs/sha256_calculator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module Blobs
class Sha256Calculator
def initialize(blob)
@blob = blob
end

def call
digest = compute_digest
blob.update_column(:sha256_checksum, digest)
digest
end

private

attr_reader :blob

def compute_digest
sha = OpenSSL::Digest::SHA256.new
blob.download { |chunk| sha.update(chunk) }
sha.hexdigest
end
end
end
3 changes: 3 additions & 0 deletions config/initializers/active_storage_blob_sha256.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Rails.application.config.to_prepare do
ActiveStorage::Blob.include(ActiveStorageBlobSha256)
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddSha256ChecksumToActiveStorageBlobs < ActiveRecord::Migration[8.0]
def change
add_column :active_storage_blobs, :sha256_checksum, :string
end
end
3 changes: 2 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions lib/tasks/blob_checksums.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace :blobs do
desc "Enqueue SHA256 checksum calculation for blobs missing one"
task backfill_sha256: :environment do
enqueued = 0

ActiveStorage::Blob.where(sha256_checksum: nil).find_each do |blob|
Blobs::ComputeSha256ChecksumJob.perform_later(blob.id)
enqueued += 1
end

puts "Enqueued SHA256 backfill job for #{enqueued} blob(s)"
end
end
44 changes: 44 additions & 0 deletions spec/jobs/blobs/compute_sha256_checksum_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
require "rails_helper"

RSpec.describe Blobs::ComputeSha256ChecksumJob, type: :job do
describe "#perform" do
let(:blob) do
ActiveStorage::Blob.create_and_upload!(
io: StringIO.new("job test content"),
filename: "job.txt",
content_type: "text/plain",
)
end
let(:calculator) { instance_double(Blobs::Sha256Calculator, call: "digest") }

before do
allow(Blobs::Sha256Calculator).to receive(:new).and_return(calculator)
blob.update_column(:sha256_checksum, nil)
end

it "delegates calculation to Blobs::Sha256Calculator" do
described_class.perform_now(blob.id)

expect(Blobs::Sha256Calculator).to have_received(:new).with(blob)
expect(calculator).to have_received(:call)
end

context "when the blob does not exist" do
it "returns without invoking the calculator" do
described_class.perform_now(-1)

expect(Blobs::Sha256Calculator).not_to have_received(:new)
end
end

context "when the blob already has a checksum" do
before { blob.update_column(:sha256_checksum, "already-there") }

it "skips calculation" do
described_class.perform_now(blob.id)

expect(Blobs::Sha256Calculator).not_to have_received(:new)
end
end
end
end
35 changes: 35 additions & 0 deletions spec/models/concerns/active_storage_blob_sha256_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
require "rails_helper"

RSpec.describe ActiveStorageBlobSha256 do
let(:blob) do
ActiveStorage::Blob.create_and_upload!(
io: StringIO.new("concern content"),
filename: "concern.txt",
content_type: "text/plain",
)
end

describe "#ensure_sha256_checksum" do
context "when the checksum is already stored" do
before { blob.update_column(:sha256_checksum, "cached-digest") }

it "returns the stored value without recomputing" do
allow(Blobs::Sha256Calculator).to receive(:new)

expect(blob.ensure_sha256_checksum).to eq("cached-digest")
expect(Blobs::Sha256Calculator).not_to have_received(:new)
end
end

context "when the checksum is missing" do
before { blob.update_column(:sha256_checksum, nil) }

it "computes, stores, and returns the SHA256 digest" do
digest = blob.ensure_sha256_checksum

expect(digest).to match(/\A[a-f0-9]{64}\z/)
expect(blob.reload.sha256_checksum).to eq(digest)
end
end
end
end
2 changes: 1 addition & 1 deletion spec/services/beacons/manifest_builder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
blob = topic.documents.first.blob

expect(file_data[:id]).to eq(blob.id)
expect(file_data[:checksum]).to eq(blob.checksum)
expect(file_data[:checksum]).to match(/\Asha256:[a-f0-9]{64}\z/)
expect(file_data[:size_bytes]).to eq(blob.byte_size)
expect(file_data[:content_type]).to eq(blob.content_type)
expect(file_data[:path]).to start_with("providers/#{provider.id}/topics/#{topic.id}/")
Expand Down
41 changes: 41 additions & 0 deletions spec/services/blobs/sha256_calculator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
require "rails_helper"

RSpec.describe Blobs::Sha256Calculator do
subject(:calculator) { described_class.new(blob) }

let(:content) { "hello skillrx checksum world" }
let(:expected_digest) { OpenSSL::Digest::SHA256.hexdigest(content) }
let(:blob) do
ActiveStorage::Blob.create_and_upload!(
io: StringIO.new(content),
filename: "sample.txt",
content_type: "text/plain",
)
end

before { blob.update_column(:sha256_checksum, nil) }

describe "#call" do
it "computes the SHA256 digest of the blob's content" do
expect(calculator.call).to eq(expected_digest)
end

it "persists the digest on the blob" do
calculator.call

expect(blob.reload.sha256_checksum).to eq(expected_digest)
end

it "returns a 64-character lowercase hex string" do
expect(calculator.call).to match(/\A[a-f0-9]{64}\z/)
end

it "overwrites any stale digest" do
blob.update_column(:sha256_checksum, "stale")

calculator.call

expect(blob.reload.sha256_checksum).to eq(expected_digest)
end
end
end