Skip to content

Commit

Permalink
Fix validity checks for char (#3853)
Browse files Browse the repository at this point in the history
Ensure value validity checks for `char` in Kani matches the Rust
documentation:

> A char value must not be a surrogate (i.e., must not be in the range
  0xD800..=0xDFFF) and must be equal to or less than char::MAX.

The existing code was relying on the compiler ABI information, which can
only express one continuous value range for value validity. But `char`
is a special case, and the Rust compiler understands the gap in valid
`char`. This change makes Kani compiler aware of this gap too.

Resolves #3241

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 and MIT licenses.
  • Loading branch information
celinval authored Jan 25, 2025
1 parent 3e93311 commit e09095b
Show file tree
Hide file tree
Showing 3 changed files with 255 additions and 24 deletions.
130 changes: 106 additions & 24 deletions kani-compiler/src/kani_middle/transform/check_values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use stable_mir::mir::{
Statement, StatementKind, Terminator, TerminatorKind,
};
use stable_mir::target::{MachineInfo, MachineSize};
use stable_mir::ty::{AdtKind, IndexedVal, MirConst, RigidTy, Ty, TyKind, UintTy};
use stable_mir::ty::{AdtKind, IndexedVal, MirConst, RigidTy, Span, Ty, TyKind, UintTy};
use std::fmt::Debug;
use strum_macros::AsRefStr;
use tracing::{debug, trace};
Expand Down Expand Up @@ -164,7 +164,18 @@ pub struct ValidValueReq {
/// Size of this requirement.
size: MachineSize,
/// The range restriction is represented by a Scalar.
valid_range: WrappingRange,
valid_range: ValidityRange,
}

#[derive(Clone, Debug, Eq, PartialEq, Hash)]
enum ValidityRange {
/// The value validity fits in a single value range.
/// This includes cases where the full range is covered.
Single(WrappingRange),
/// The validity includes more than one value range.
/// Currently, this is only the case for `char`, which has two ranges.
/// If more cases come up, we could turn this into a vector instead.
Multiple([WrappingRange; 2]),
}

// TODO: Optimize checks by merging requirements whenever possible.
Expand All @@ -180,44 +191,88 @@ impl ValidValueReq {
/// It's not possible to define a `rustc_layout_scalar_valid_range_*` to any other structure.
/// Note that this annotation only applies to the first scalar in the layout.
pub fn try_from_ty(machine_info: &MachineInfo, ty: Ty) -> Option<ValidValueReq> {
let shape = ty.layout().unwrap().shape();
match shape.abi {
ValueAbi::Scalar(Scalar::Initialized { value, valid_range })
| ValueAbi::ScalarPair(Scalar::Initialized { value, valid_range }, _) => {
Some(ValidValueReq { offset: 0, size: value.size(machine_info), valid_range })
if ty.kind().is_char() {
Some(ValidValueReq {
offset: 0,
size: MachineSize::from_bits(size_of::<char>() * 8),
valid_range: ValidityRange::Multiple([
WrappingRange { start: 0, end: 0xD7FF },
WrappingRange { start: 0xE000, end: char::MAX.into() },
]),
})
} else {
let shape = ty.layout().unwrap().shape();
match shape.abi {
ValueAbi::Scalar(Scalar::Initialized { value, valid_range })
| ValueAbi::ScalarPair(Scalar::Initialized { value, valid_range }, _) => {
Some(ValidValueReq {
offset: 0,
size: value.size(machine_info),
valid_range: ValidityRange::Single(valid_range),
})
}
ValueAbi::Scalar(_)
| ValueAbi::ScalarPair(_, _)
| ValueAbi::Uninhabited
| ValueAbi::Vector { .. }
| ValueAbi::Aggregate { .. } => None,
}
ValueAbi::Scalar(_)
| ValueAbi::ScalarPair(_, _)
| ValueAbi::Uninhabited
| ValueAbi::Vector { .. }
| ValueAbi::Aggregate { .. } => None,
}
}

/// Check if range is full.
pub fn is_full(&self) -> bool {
self.valid_range.is_full(self.size).unwrap()
if let ValidityRange::Single(valid_range) = self.valid_range {
valid_range.is_full(self.size).unwrap()
} else {
false
}
}

/// Check if this range contains `other` range.
///
/// I.e., `scalar_2` ⊆ `scalar_1`
pub fn contains(&self, other: &ValidValueReq) -> bool {
assert_eq!(self.size, other.size);
match (self.valid_range.wraps_around(), other.valid_range.wraps_around()) {
(true, true) | (false, false) => {
self.valid_range.start <= other.valid_range.start
&& self.valid_range.end >= other.valid_range.end
match (&self.valid_range, &other.valid_range) {
(ValidityRange::Single(this_range), ValidityRange::Single(other_range)) => {
range_contains(this_range, other_range, self.size)
}
(ValidityRange::Multiple(this_ranges), ValidityRange::Single(other_range)) => {
range_contains(&this_ranges[0], other_range, self.size)
|| range_contains(&this_ranges[1], other_range, self.size)
}
(ValidityRange::Single(this_range), ValidityRange::Multiple(other_ranges)) => {
range_contains(this_range, &other_ranges[0], self.size)
&& range_contains(this_range, &other_ranges[1], self.size)
}
(true, false) => {
self.valid_range.start <= other.valid_range.start
|| self.valid_range.end >= other.valid_range.end
(ValidityRange::Multiple(this_ranges), ValidityRange::Multiple(other_ranges)) => {
let contains = (range_contains(&this_ranges[0], &other_ranges[0], self.size)
|| range_contains(&this_ranges[1], &other_ranges[0], self.size))
&& (range_contains(&this_ranges[0], &other_ranges[1], self.size)
|| range_contains(&this_ranges[1], &other_ranges[1], self.size));
// Multiple today only cover `char` case.
debug_assert!(
contains,
"Expected validity of `char` for Multiple ranges. Found: {self:?}, {other:?}"
);
contains
}
(false, true) => self.is_full(),
}
}
}

/// Check if range `r1` contains range `r2`.
///
/// I.e., `r2` ⊆ `r1`
fn range_contains(r1: &WrappingRange, r2: &WrappingRange, sz: MachineSize) -> bool {
match (r1.wraps_around(), r2.wraps_around()) {
(true, true) | (false, false) => r1.start <= r2.start && r1.end >= r2.end,
(true, false) => r1.start <= r2.start || r1.end >= r2.end,
(false, true) => r1.is_full(sz).unwrap(),
}
}

#[derive(AsRefStr, Clone, Debug)]
enum SourceOp {
/// Validity checks are done on a byte level when the Rvalue can generate invalid value.
Expand Down Expand Up @@ -763,8 +818,6 @@ pub fn build_limits(
let span = source.span(body.blocks());
debug!(?req, ?rvalue_ptr, ?span, "build_limits");
let primitive_ty = uint_ty(req.size.bytes());
let start_const = body.new_uint_operand(req.valid_range.start, primitive_ty, span);
let end_const = body.new_uint_operand(req.valid_range.end, primitive_ty, span);
let orig_ptr = if req.offset != 0 {
let start_ptr =
move_local(body.insert_assignment(rvalue_ptr, source, InsertPosition::Before));
Expand Down Expand Up @@ -799,6 +852,35 @@ pub fn build_limits(
InsertPosition::Before,
);
let value = Operand::Copy(Place { local: value_ptr, projection: vec![ProjectionElem::Deref] });
match &req.valid_range {
ValidityRange::Single(range) => {
build_single_limit(body, range, source, span, primitive_ty, value)
}
ValidityRange::Multiple([range1, range2]) => {
// Build `let valid = range1.contains(value) || range2.contains(value);
let cond1 = build_single_limit(body, range1, source, span, primitive_ty, value.clone());
let cond2 = build_single_limit(body, range2, source, span, primitive_ty, value);
body.insert_binary_op(
BinOp::BitOr,
move_local(cond1),
move_local(cond2),
source,
InsertPosition::Before,
)
}
}
}

fn build_single_limit(
body: &mut MutableBody,
range: &WrappingRange,
source: &mut SourceInstruction,
span: Span,
primitive_ty: UintTy,
value: Operand,
) -> Local {
let start_const = body.new_uint_operand(range.start, primitive_ty, span);
let end_const = body.new_uint_operand(range.end, primitive_ty, span);
let start_result = body.insert_binary_op(
BinOp::Ge,
value.clone(),
Expand All @@ -808,7 +890,7 @@ pub fn build_limits(
);
let end_result =
body.insert_binary_op(BinOp::Le, value, end_const, source, InsertPosition::Before);
if req.valid_range.wraps_around() {
if range.wraps_around() {
// valid >= start || valid <= end
body.insert_binary_op(
BinOp::BitOr,
Expand Down
37 changes: 37 additions & 0 deletions tests/expected/valid-value-checks/char_validity.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
Checking harness check_invalid_char_unit_wrapper_should_fail...
- Status: UNREACHABLE\
- Description: ""Unreachable code: Expected invalid char wrapper detection""
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<char, ()>`
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<(), char>`
VERIFICATION:- FAILED

Checking harness check_invalid_char_nonzero_wrapper_should_fail...
- Status: UNREACHABLE\
- Description: ""Unreachable code: Expected invalid char / NonZero detection""
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<char, std::num::NonZero<u32>>`
VERIFICATION:- FAILED

Checking harness check_invalid_char_should_fail...
- Status: UNREACHABLE\
- Description: ""Unreachable code: Expected invalid char detection""
Failed Checks: Undefined Behavior: Invalid value of type `char`
VERIFICATION:- FAILED


Checking harness check_valid_mixed_wrapper...
VERIFICATION:- SUCCESSFUL

Checking harness check_valid_char_wrappers...
VERIFICATION:- SUCCESSFUL

Checking harness cannot_dereference_invalid_char...
VERIFICATION:- SUCCESSFUL

Checking harness check_char_ok...
VERIFICATION:- SUCCESSFUL

Summary:
Verification failed for - check_invalid_char_unit_wrapper_should_fail
Verification failed for - check_invalid_char_nonzero_wrapper_should_fail
Verification failed for - check_invalid_char_should_fail
Complete - 4 successfully verified harnesses, 3 failures, 7 total.
112 changes: 112 additions & 0 deletions tests/expected/valid-value-checks/char_validity.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT
// kani-flags: -Z valid-value-checks -Z mem-predicates
//! Check that Kani can correctly identify value validity of `char` and structures with `char`.
//! Note that we use `black_box` hint to ensure the logic doesn't get removed as dead code.
use std::num::NonZeroU32;

#[repr(C)]
#[derive(Copy, Clone, kani::Arbitrary)]
struct OneField<T>(T);

#[repr(C)]
#[derive(Copy, Clone, kani::Arbitrary)]
struct TwoFields<T, U>(T, U);

#[repr(C)]
#[derive(Copy, Clone, kani::Arbitrary)]
struct ThreeFields<T, U, V>(T, U, V);

/// Check that valid u32's are all identified as valid.
#[kani::proof]
fn check_char_ok() {
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
assert!(kani::mem::can_dereference(&val as *const _ as *const char));
let c1: char = unsafe { std::mem::transmute(val) };
let c2 = unsafe { char::from_u32_unchecked(val) };
let c3 = char::from_u32(val).unwrap();
assert_eq!(c1, c2);
assert_eq!(c2, c3);
}

/// Check that all invalid u32's identified as invalid.
#[kani::proof]
fn cannot_dereference_invalid_char() {
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
assert!(!kani::mem::can_dereference(&val as *const _ as *const char));
}

/// Check that transmuting from invalid u32's trigger a UB check.
#[kani::proof]
fn check_invalid_char_should_fail() {
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
let _ = if kani::any() {
unsafe { char::from_u32_unchecked(val) }
} else {
unsafe { std::mem::transmute(val) }
};
assert!(false, "Unreachable code: Expected invalid char detection");
}

#[kani::proof]
fn check_valid_char_wrappers() {
let v1 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
let v2 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
let v3 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
assert!(kani::mem::can_dereference(&OneField(v1) as *const _ as *const OneField<char>));
assert!(kani::mem::can_dereference(
&TwoFields(v1, v2) as *const _ as *const TwoFields<char, char>
));
assert!(kani::mem::can_dereference(
&ThreeFields(v1, v2, v3) as *const _ as *const ThreeFields<char, char, char>
));
}

/// Ensure that we correctly identify validity of a structure with fields with different
/// requirements.
#[kani::proof]
fn check_valid_mixed_wrapper() {
let unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
let non_zero = kani::any_where(|v: &u32| *v != 0);
assert!(kani::mem::can_dereference(
&TwoFields(unicode, non_zero) as *const _ as *const TwoFields<char, NonZeroU32>
));
assert!(kani::mem::can_dereference(
&TwoFields(non_zero, unicode) as *const _ as *const TwoFields<NonZeroU32, char>
));
assert!(kani::mem::can_dereference(
&TwoFields((), unicode) as *const _ as *const TwoFields<(), char>
));
}

/// Check that transmuting from invalid wrappers trigger UB check failure.
#[kani::proof]
fn check_invalid_char_nonzero_wrapper_should_fail() {
let unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
let non_unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
let non_zero = kani::any_where(|v: &u32| *v != 0);
let var: TwoFields<char, NonZeroU32> = if kani::any() {
unsafe { std::mem::transmute(TwoFields(non_unicode, non_zero)) }
} else {
unsafe { std::mem::transmute(TwoFields(unicode, 0)) }
};
// Ensure the condition above does not get pruned.
std::hint::black_box(var);
assert!(false, "Unreachable code: Expected invalid char / NonZero detection");
}

/// Check that transmuting from invalid wrappers trigger UB check failure independent
/// on the position of the unit field.
#[kani::proof]
fn check_invalid_char_unit_wrapper_should_fail() {
let non_unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
if kani::any() {
let var: TwoFields<char, ()> = unsafe { std::mem::transmute(TwoFields(non_unicode, ())) };
std::hint::black_box(var);
} else {
let var: TwoFields<(), char> = unsafe { std::mem::transmute(TwoFields((), non_unicode)) };
std::hint::black_box(var);
}
assert!(false, "Unreachable code: Expected invalid char wrapper detection");
}

0 comments on commit e09095b

Please sign in to comment.