diff --git a/Gemfile b/Gemfile index 875416db6..7a09c15ab 100644 --- a/Gemfile +++ b/Gemfile @@ -13,6 +13,7 @@ gem "govuk_personalisation" gem "govuk_publishing_components" gem "nokogiri" gem "plek" +gem "rack-utf8_sanitizer" gem "redis" gem "sprockets-rails" gem "terser" diff --git a/Gemfile.lock b/Gemfile.lock index 3fde3eb49..f8ce57a83 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -449,6 +449,8 @@ GEM rack (>= 3.0.0) rack-test (2.1.0) rack (>= 1.3) + rack-utf8_sanitizer (1.9.1) + rack (>= 1.0, < 4.0) rackup (2.1.0) rack (>= 3) webrick (~> 1.8) @@ -613,6 +615,7 @@ DEPENDENCIES nokogiri plek pry + rack-utf8_sanitizer rails (= 7.2.1.1) redis rubocop-govuk diff --git a/config/application.rb b/config/application.rb index 148f89c08..c5db4fcc9 100644 --- a/config/application.rb +++ b/config/application.rb @@ -14,6 +14,7 @@ require "active_support/time" # require "action_cable/engine" require "rails/test_unit/railtie" +require_relative "../lib/sanitiser/strategy" # Require the gems listed in Gemfile, including any gems # you've limited to :test, :development, or :production. @@ -27,7 +28,7 @@ class Application < Rails::Application # Please, add to the `ignore` list any other `lib` subdirectories that do # not contain `.rb` files, or that should not be reloaded or eager loaded. # Common ones are `templates`, `generators`, or `middleware`, for example. - config.autoload_lib(ignore: %w[assets tasks]) + config.autoload_lib(ignore: %w[assets tasks sanitiser]) # Settings in config/environments/* take precedence over those specified here. # Application configuration can go into files in config/initializers @@ -44,5 +45,15 @@ class Application < Rails::Application # Use temporary directory for page cache if filesystem is read-only config.action_controller.page_cache_directory = File.join(ENV["TMPDIR"], "page_cache") if ENV.fetch("USE_TMPDIR_PAGE_CACHE", "false") == "true" + + # Protect from "invalid byte sequence in UTF-8" errors, + # when a query or a cookie is a string with incorrect UTF-8 encoding. + config.middleware.insert_before( + 0, + Rack::UTF8Sanitizer, + sanitizable_content_types: [], + only: %w[QUERY_STRING], + strategy: Sanitiser::Strategy, + ) end end diff --git a/lib/sanitiser/strategy.rb b/lib/sanitiser/strategy.rb new file mode 100644 index 000000000..fc2414343 --- /dev/null +++ b/lib/sanitiser/strategy.rb @@ -0,0 +1,18 @@ +module Sanitiser + class Strategy + class SanitisingError < StandardError; end + + class << self + def call(input, sanitize_null_bytes: false) + input + .force_encoding(Encoding::ASCII_8BIT) + .encode!(Encoding::UTF_8) + if sanitize_null_bytes && Rack::UTF8Sanitizer::NULL_BYTE_REGEX.match?(input) + raise NullByteInString + end + rescue StandardError + raise SanitisingError, "Non-UTF-8 (or null) character in the query or in the cookie" + end + end + end +end diff --git a/test/integration/sanitiser_test.rb b/test/integration/sanitiser_test.rb new file mode 100644 index 000000000..f36e6c779 --- /dev/null +++ b/test/integration/sanitiser_test.rb @@ -0,0 +1,84 @@ +require "test_helper" + +class SanitiserTest < ActiveSupport::TestCase + include Rack::Test::Methods + + def app + Rails.application + end + + test "with query being correct percent-encoded UTF-8 string does not raise exception" do + get "/templates/scheduled_maintenance.html.erb?%41" + assert last_response.successful? + end + + test "with query being incorrect percent-encoded UTF-8 string raises SanitisingError" do + assert_raises(Sanitiser::Strategy::SanitisingError) do + get "/templates/scheduled_maintenance.html.erb?%AD" + end + end + + test "with cookie key being correct UTF-8 does not raise exception" do + header("Cookie", "\x41=value") + get "/templates/scheduled_maintenance.html.erb" + assert last_response.successful? + end + + test "with cookie key being incorrect UTF-8 raises exception" do + header("Cookie", "\xAD=value") + assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do + get "/templates/scheduled_maintenance.html.erb" + end + end + + test "with cookie value being correct UTF-8 does not raise exception" do + header("Cookie", "key=\x41") + get "/templates/scheduled_maintenance.html.erb" + assert last_response.successful? + end + + test "with cookie value being incorrect UTF-8 raises exception" do + header("Cookie", "key=\xAD") + assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do + get "/templates/scheduled_maintenance.html.erb" + end + end + + test "with cookie path being correct UTF-8 does not raise exception" do + header("Cookie", "key=value; Path=/\x41") + get "/templates/scheduled_maintenance.html.erb" + assert last_response.successful? + end + + test "with cookie path being incorrect UTF-8 raises exception" do + header("Cookie", "key=value; Path=/\xAD") + assert_raises(ArgumentError, match: "invalid byte sequence in UTF-8") do + get "/templates/scheduled_maintenance.html.erb" + end + end + + test "with cookie path being correct percent-encoded UTF-8 does not raise exception" do + header("Cookie", "key=value; Path=/%41") + get "/templates/scheduled_maintenance.html.erb" + assert last_response.successful? + end + + test "with cookie path being incorrect percent-encoded UTF-8 raises SanitisingError" do + header("Cookie", "key=value; Path=/%AD") + assert_raises(Sanitiser::Strategy::SanitisingError) do + get "/templates/scheduled_maintenance.html.erb" + end + end + + test "with referrer headerbeing correct percent-encoded UTF-8 does not raise exception" do + header("Referer", "http://example.com/?%41") + get "/templates/scheduled_maintenance.html.erb" + assert last_response.successful? + end + + test "with referrer header being incorrect percent-encoded UTF-8 does not raise exception" do + header("Referer", "http://example.com/?%AD") + get "/templates/scheduled_maintenance.html.erb" + assert last_response.successful? + end +end