From 8df720d893d52fbb30829c9f62e76c44fd001d9e Mon Sep 17 00:00:00 2001 From: dmitrytrager Date: Thu, 23 Apr 2026 10:58:12 +0200 Subject: [PATCH] Add SHA256 checksum storage for ActiveStorage blobs Device sync protocol requires SHA256-prefixed per-file checksums so Clinical Edge can verify downloaded files against the manifest. Adds a sha256_checksum column to active_storage_blobs, async calculation via Blobs::ComputeSha256ChecksumJob on blob creation, a lazy fallback for legacy blobs, and a backfill rake task. ManifestBuilder now emits sha256: per the data-exchange spec. Co-Authored-By: Claude Opus 4 --- app/jobs/blobs/compute_sha256_checksum_job.rb | 13 ++++++ .../concerns/active_storage_blob_sha256.rb | 21 +++++++++ app/services/beacons/manifest_builder.rb | 2 +- app/services/blobs/sha256_calculator.rb | 23 ++++++++++ .../active_storage_blob_sha256.rb | 3 ++ ...sha256_checksum_to_active_storage_blobs.rb | 5 +++ db/schema.rb | 3 +- lib/tasks/blob_checksums.rake | 13 ++++++ .../blobs/compute_sha256_checksum_job_spec.rb | 44 +++++++++++++++++++ .../active_storage_blob_sha256_spec.rb | 35 +++++++++++++++ .../services/beacons/manifest_builder_spec.rb | 2 +- spec/services/blobs/sha256_calculator_spec.rb | 41 +++++++++++++++++ 12 files changed, 202 insertions(+), 3 deletions(-) create mode 100644 app/jobs/blobs/compute_sha256_checksum_job.rb create mode 100644 app/models/concerns/active_storage_blob_sha256.rb create mode 100644 app/services/blobs/sha256_calculator.rb create mode 100644 config/initializers/active_storage_blob_sha256.rb create mode 100644 db/migrate/20260203100009_add_sha256_checksum_to_active_storage_blobs.rb create mode 100644 lib/tasks/blob_checksums.rake create mode 100644 spec/jobs/blobs/compute_sha256_checksum_job_spec.rb create mode 100644 spec/models/concerns/active_storage_blob_sha256_spec.rb create mode 100644 spec/services/blobs/sha256_calculator_spec.rb diff --git a/app/jobs/blobs/compute_sha256_checksum_job.rb b/app/jobs/blobs/compute_sha256_checksum_job.rb new file mode 100644 index 00000000..e48889f1 --- /dev/null +++ b/app/jobs/blobs/compute_sha256_checksum_job.rb @@ -0,0 +1,13 @@ +module Blobs + class ComputeSha256ChecksumJob < ApplicationJob + queue_as :default + + def perform(blob_id) + blob = ActiveStorage::Blob.find_by(id: blob_id) + return unless blob + return if blob.sha256_checksum.present? + + Sha256Calculator.new(blob).call + end + end +end diff --git a/app/models/concerns/active_storage_blob_sha256.rb b/app/models/concerns/active_storage_blob_sha256.rb new file mode 100644 index 00000000..b773415d --- /dev/null +++ b/app/models/concerns/active_storage_blob_sha256.rb @@ -0,0 +1,21 @@ +module ActiveStorageBlobSha256 + extend ActiveSupport::Concern + + included do + after_create_commit :enqueue_sha256_checksum_calculation + end + + # Returns the SHA256 digest of the blob's content, computing and + # persisting it synchronously if it has not yet been calculated. + def ensure_sha256_checksum + return sha256_checksum if sha256_checksum.present? + + Blobs::Sha256Calculator.new(self).call + end + + private + + def enqueue_sha256_checksum_calculation + Blobs::ComputeSha256ChecksumJob.perform_later(id) + end +end diff --git a/app/services/beacons/manifest_builder.rb b/app/services/beacons/manifest_builder.rb index b525850c..72f15b11 100644 --- a/app/services/beacons/manifest_builder.rb +++ b/app/services/beacons/manifest_builder.rb @@ -104,7 +104,7 @@ def build_file(topic, document) id: blob.id, filename: filename, path: "providers/#{topic.provider_id}/topics/#{topic.id}/#{filename}", - checksum: blob.checksum, + checksum: "sha256:#{blob.ensure_sha256_checksum}", size_bytes: blob.byte_size, content_type: blob.content_type, updated_at: blob.created_at.iso8601, diff --git a/app/services/blobs/sha256_calculator.rb b/app/services/blobs/sha256_calculator.rb new file mode 100644 index 00000000..e2116526 --- /dev/null +++ b/app/services/blobs/sha256_calculator.rb @@ -0,0 +1,23 @@ +module Blobs + class Sha256Calculator + def initialize(blob) + @blob = blob + end + + def call + digest = compute_digest + blob.update_column(:sha256_checksum, digest) + digest + end + + private + + attr_reader :blob + + def compute_digest + sha = OpenSSL::Digest::SHA256.new + blob.download { |chunk| sha.update(chunk) } + sha.hexdigest + end + end +end diff --git a/config/initializers/active_storage_blob_sha256.rb b/config/initializers/active_storage_blob_sha256.rb new file mode 100644 index 00000000..d21e0581 --- /dev/null +++ b/config/initializers/active_storage_blob_sha256.rb @@ -0,0 +1,3 @@ +Rails.application.config.to_prepare do + ActiveStorage::Blob.include(ActiveStorageBlobSha256) +end diff --git a/db/migrate/20260203100009_add_sha256_checksum_to_active_storage_blobs.rb b/db/migrate/20260203100009_add_sha256_checksum_to_active_storage_blobs.rb new file mode 100644 index 00000000..1bc65aba --- /dev/null +++ b/db/migrate/20260203100009_add_sha256_checksum_to_active_storage_blobs.rb @@ -0,0 +1,5 @@ +class AddSha256ChecksumToActiveStorageBlobs < ActiveRecord::Migration[8.0] + def change + add_column :active_storage_blobs, :sha256_checksum, :string + end +end diff --git a/db/schema.rb b/db/schema.rb index c89e0c89..f518e029 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.1].define(version: 2026_02_03_100007) do +ActiveRecord::Schema[8.1].define(version: 2026_02_03_100009) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" @@ -33,6 +33,7 @@ t.string "key", null: false t.text "metadata" t.string "service_name", null: false + t.string "sha256_checksum" t.index ["key"], name: "index_active_storage_blobs_on_key", unique: true end diff --git a/lib/tasks/blob_checksums.rake b/lib/tasks/blob_checksums.rake new file mode 100644 index 00000000..d549c468 --- /dev/null +++ b/lib/tasks/blob_checksums.rake @@ -0,0 +1,13 @@ +namespace :blobs do + desc "Enqueue SHA256 checksum calculation for blobs missing one" + task backfill_sha256: :environment do + enqueued = 0 + + ActiveStorage::Blob.where(sha256_checksum: nil).find_each do |blob| + Blobs::ComputeSha256ChecksumJob.perform_later(blob.id) + enqueued += 1 + end + + puts "Enqueued SHA256 backfill job for #{enqueued} blob(s)" + end +end diff --git a/spec/jobs/blobs/compute_sha256_checksum_job_spec.rb b/spec/jobs/blobs/compute_sha256_checksum_job_spec.rb new file mode 100644 index 00000000..ea71d957 --- /dev/null +++ b/spec/jobs/blobs/compute_sha256_checksum_job_spec.rb @@ -0,0 +1,44 @@ +require "rails_helper" + +RSpec.describe Blobs::ComputeSha256ChecksumJob, type: :job do + describe "#perform" do + let(:blob) do + ActiveStorage::Blob.create_and_upload!( + io: StringIO.new("job test content"), + filename: "job.txt", + content_type: "text/plain", + ) + end + let(:calculator) { instance_double(Blobs::Sha256Calculator, call: "digest") } + + before do + allow(Blobs::Sha256Calculator).to receive(:new).and_return(calculator) + blob.update_column(:sha256_checksum, nil) + end + + it "delegates calculation to Blobs::Sha256Calculator" do + described_class.perform_now(blob.id) + + expect(Blobs::Sha256Calculator).to have_received(:new).with(blob) + expect(calculator).to have_received(:call) + end + + context "when the blob does not exist" do + it "returns without invoking the calculator" do + described_class.perform_now(-1) + + expect(Blobs::Sha256Calculator).not_to have_received(:new) + end + end + + context "when the blob already has a checksum" do + before { blob.update_column(:sha256_checksum, "already-there") } + + it "skips calculation" do + described_class.perform_now(blob.id) + + expect(Blobs::Sha256Calculator).not_to have_received(:new) + end + end + end +end diff --git a/spec/models/concerns/active_storage_blob_sha256_spec.rb b/spec/models/concerns/active_storage_blob_sha256_spec.rb new file mode 100644 index 00000000..c0015906 --- /dev/null +++ b/spec/models/concerns/active_storage_blob_sha256_spec.rb @@ -0,0 +1,35 @@ +require "rails_helper" + +RSpec.describe ActiveStorageBlobSha256 do + let(:blob) do + ActiveStorage::Blob.create_and_upload!( + io: StringIO.new("concern content"), + filename: "concern.txt", + content_type: "text/plain", + ) + end + + describe "#ensure_sha256_checksum" do + context "when the checksum is already stored" do + before { blob.update_column(:sha256_checksum, "cached-digest") } + + it "returns the stored value without recomputing" do + allow(Blobs::Sha256Calculator).to receive(:new) + + expect(blob.ensure_sha256_checksum).to eq("cached-digest") + expect(Blobs::Sha256Calculator).not_to have_received(:new) + end + end + + context "when the checksum is missing" do + before { blob.update_column(:sha256_checksum, nil) } + + it "computes, stores, and returns the SHA256 digest" do + digest = blob.ensure_sha256_checksum + + expect(digest).to match(/\A[a-f0-9]{64}\z/) + expect(blob.reload.sha256_checksum).to eq(digest) + end + end + end +end diff --git a/spec/services/beacons/manifest_builder_spec.rb b/spec/services/beacons/manifest_builder_spec.rb index 43688c08..96707683 100644 --- a/spec/services/beacons/manifest_builder_spec.rb +++ b/spec/services/beacons/manifest_builder_spec.rb @@ -60,7 +60,7 @@ blob = topic.documents.first.blob expect(file_data[:id]).to eq(blob.id) - expect(file_data[:checksum]).to eq(blob.checksum) + expect(file_data[:checksum]).to match(/\Asha256:[a-f0-9]{64}\z/) expect(file_data[:size_bytes]).to eq(blob.byte_size) expect(file_data[:content_type]).to eq(blob.content_type) expect(file_data[:path]).to start_with("providers/#{provider.id}/topics/#{topic.id}/") diff --git a/spec/services/blobs/sha256_calculator_spec.rb b/spec/services/blobs/sha256_calculator_spec.rb new file mode 100644 index 00000000..bc183cf3 --- /dev/null +++ b/spec/services/blobs/sha256_calculator_spec.rb @@ -0,0 +1,41 @@ +require "rails_helper" + +RSpec.describe Blobs::Sha256Calculator do + subject(:calculator) { described_class.new(blob) } + + let(:content) { "hello skillrx checksum world" } + let(:expected_digest) { OpenSSL::Digest::SHA256.hexdigest(content) } + let(:blob) do + ActiveStorage::Blob.create_and_upload!( + io: StringIO.new(content), + filename: "sample.txt", + content_type: "text/plain", + ) + end + + before { blob.update_column(:sha256_checksum, nil) } + + describe "#call" do + it "computes the SHA256 digest of the blob's content" do + expect(calculator.call).to eq(expected_digest) + end + + it "persists the digest on the blob" do + calculator.call + + expect(blob.reload.sha256_checksum).to eq(expected_digest) + end + + it "returns a 64-character lowercase hex string" do + expect(calculator.call).to match(/\A[a-f0-9]{64}\z/) + end + + it "overwrites any stale digest" do + blob.update_column(:sha256_checksum, "stale") + + calculator.call + + expect(blob.reload.sha256_checksum).to eq(expected_digest) + end + end +end