Skip to content

Commit 99ba556

Browse files
committed
Auto merge of #144081 - RalfJung:const-ptr-fragments, r=oli-obk
const-eval: full support for pointer fragments This fixes rust-lang/const-eval#72 and makes `swap_nonoverlapping` fully work in const-eval by enhancing per-byte provenance tracking with tracking of *which* of the bytes of the pointer this one is. Later, if we see all the same bytes in the exact same order, we can treat it like a whole pointer again without ever risking a leak of the data bytes (that encode the offset into the allocation). This lifts the limitation that was discussed quite a bit in #137280. For a concrete piece of code that used to fail and now works properly consider this example doing a byte-for-byte memcpy in const without using intrinsics: ```rust use std::{mem::{self, MaybeUninit}, ptr}; type Byte = MaybeUninit<u8>; const unsafe fn memcpy(dst: *mut Byte, src: *const Byte, n: usize) { let mut i = 0; while i < n { *dst.add(i) = *src.add(i); i += 1; } } const _MEMCPY: () = unsafe { let ptr = &42; let mut ptr2 = ptr::null::<i32>(); // Copy from ptr to ptr2. memcpy(&mut ptr2 as *mut _ as *mut _, &ptr as *const _ as *const _, mem::size_of::<&i32>()); assert!(*ptr2 == 42); }; ``` What makes this code tricky is that pointers are "opaque blobs" in const-eval, we cannot just let people look at the individual bytes since *we don't know what those bytes look like* -- that depends on the absolute address the pointed-to object will be placed at. The code above "breaks apart" a pointer into individual bytes, and then puts them back together in the same order elsewhere. This PR implements the logic to properly track how those individual bytes relate to the original pointer, and to recognize when they are in the right order again. We still reject constants where the final value contains a not-fully-put-together pointer: I have no idea how one could construct an LLVM global where one byte is defined as "the 3rd byte of a pointer to that other global over there" -- and even if LLVM supports this somehow, we can leave implementing that to a future PR. It seems unlikely to me anyone would even want this, but who knows.^^ This also changes the behavior of Miri, by tracking the order of bytes with provenance and only considering a pointer to have valid provenance if all bytes are in the original order again. This is related to rust-lang/unsafe-code-guidelines#558. It means one cannot implement XOR linked lists with strict provenance any more, which is however only of theoretical interest. Practically I am curious if anyone will show up with any code that Miri now complains about - that would be interesting data. Cc `@rust-lang/opsem`
2 parents 2e2642e + ba5b6b9 commit 99ba556

36 files changed

+542
-358
lines changed

compiler/rustc_const_eval/messages.ftl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ const_eval_const_context = {$kind ->
5757
}
5858
5959
const_eval_const_heap_ptr_in_final = encountered `const_allocate` pointer in final value that was not made global
60-
.note = use `const_make_global` to make allocated pointers immutable before returning
60+
.note = use `const_make_global` to turn allocated pointers into immutable globals before returning
6161
6262
const_eval_const_make_global_ptr_already_made_global = attempting to call `const_make_global` twice on the same allocation {$alloc}
6363
@@ -231,6 +231,9 @@ const_eval_mutable_borrow_escaping =
231231
232232
const_eval_mutable_ptr_in_final = encountered mutable pointer in final value of {const_eval_intern_kind}
233233
234+
const_eval_partial_pointer_in_final = encountered partial pointer in final value of {const_eval_intern_kind}
235+
.note = while pointers can be broken apart into individual bytes during const-evaluation, only complete pointers (with all their bytes in the right order) are supported in the final value
236+
234237
const_eval_nested_static_in_thread_local = #[thread_local] does not support implicit nested statics, please create explicit static items and refer to them instead
235238
236239
const_eval_non_const_await =
@@ -299,10 +302,8 @@ const_eval_panic = evaluation panicked: {$msg}
299302
300303
const_eval_panic_non_str = argument to `panic!()` in a const context must have type `&str`
301304
302-
const_eval_partial_pointer_copy =
303-
unable to copy parts of a pointer from memory at {$ptr}
304-
const_eval_partial_pointer_overwrite =
305-
unable to overwrite parts of a pointer in memory at {$ptr}
305+
const_eval_partial_pointer_read =
306+
unable to read parts of a pointer from memory at {$ptr}
306307
const_eval_pointer_arithmetic_overflow =
307308
overflowing pointer arithmetic: the total offset in bytes does not fit in an `isize`
308309

compiler/rustc_const_eval/src/const_eval/eval_queries.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,13 @@ fn eval_body_using_ecx<'tcx, R: InterpretationResult<'tcx>>(
117117
ecx.tcx.dcx().emit_err(errors::ConstHeapPtrInFinal { span: ecx.tcx.span }),
118118
)));
119119
}
120+
Err(InternError::PartialPointer) => {
121+
throw_inval!(AlreadyReported(ReportedErrorInfo::non_const_eval_error(
122+
ecx.tcx
123+
.dcx()
124+
.emit_err(errors::PartialPtrInFinal { span: ecx.tcx.span, kind: intern_kind }),
125+
)));
126+
}
120127
}
121128

122129
interp_ok(R::make_result(ret, ecx))

compiler/rustc_const_eval/src/errors.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ pub(crate) struct ConstHeapPtrInFinal {
5151
pub span: Span,
5252
}
5353

54+
#[derive(Diagnostic)]
55+
#[diag(const_eval_partial_pointer_in_final)]
56+
#[note]
57+
pub(crate) struct PartialPtrInFinal {
58+
#[primary_span]
59+
pub span: Span,
60+
pub kind: InternKind,
61+
}
62+
5463
#[derive(Diagnostic)]
5564
#[diag(const_eval_unstable_in_stable_exposed)]
5665
pub(crate) struct UnstableInStableExposed {
@@ -836,8 +845,7 @@ impl ReportErrorExt for UnsupportedOpInfo {
836845
UnsupportedOpInfo::Unsupported(s) => s.clone().into(),
837846
UnsupportedOpInfo::ExternTypeField => const_eval_extern_type_field,
838847
UnsupportedOpInfo::UnsizedLocal => const_eval_unsized_local,
839-
UnsupportedOpInfo::OverwritePartialPointer(_) => const_eval_partial_pointer_overwrite,
840-
UnsupportedOpInfo::ReadPartialPointer(_) => const_eval_partial_pointer_copy,
848+
UnsupportedOpInfo::ReadPartialPointer(_) => const_eval_partial_pointer_read,
841849
UnsupportedOpInfo::ReadPointerAsInt(_) => const_eval_read_pointer_as_int,
842850
UnsupportedOpInfo::ThreadLocalStatic(_) => const_eval_thread_local_static,
843851
UnsupportedOpInfo::ExternStatic(_) => const_eval_extern_static,
@@ -848,7 +856,7 @@ impl ReportErrorExt for UnsupportedOpInfo {
848856
use UnsupportedOpInfo::*;
849857

850858
use crate::fluent_generated::*;
851-
if let ReadPointerAsInt(_) | OverwritePartialPointer(_) | ReadPartialPointer(_) = self {
859+
if let ReadPointerAsInt(_) | ReadPartialPointer(_) = self {
852860
diag.help(const_eval_ptr_as_bytes_1);
853861
diag.help(const_eval_ptr_as_bytes_2);
854862
}
@@ -860,7 +868,7 @@ impl ReportErrorExt for UnsupportedOpInfo {
860868
| UnsupportedOpInfo::ExternTypeField
861869
| Unsupported(_)
862870
| ReadPointerAsInt(_) => {}
863-
OverwritePartialPointer(ptr) | ReadPartialPointer(ptr) => {
871+
ReadPartialPointer(ptr) => {
864872
diag.arg("ptr", ptr);
865873
}
866874
ThreadLocalStatic(did) | ExternStatic(did) => rustc_middle::ty::tls::with(|tcx| {

compiler/rustc_const_eval/src/interpret/intern.rs

Lines changed: 57 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@ use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
1919
use rustc_hir as hir;
2020
use rustc_hir::definitions::{DefPathData, DisambiguatorState};
2121
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
22-
use rustc_middle::mir::interpret::{ConstAllocation, CtfeProvenance, InterpResult};
22+
use rustc_middle::mir::interpret::{
23+
AllocBytes, ConstAllocation, CtfeProvenance, InterpResult, Provenance,
24+
};
2325
use rustc_middle::query::TyCtxtAt;
2426
use rustc_middle::span_bug;
27+
use rustc_middle::ty::TyCtxt;
2528
use rustc_middle::ty::layout::TyAndLayout;
2629
use rustc_span::def_id::LocalDefId;
2730
use tracing::{instrument, trace};
@@ -52,39 +55,30 @@ impl HasStaticRootDefId for const_eval::CompileTimeMachine<'_> {
5255
}
5356
}
5457

55-
/// Intern an allocation. Returns `Err` if the allocation does not exist in the local memory.
56-
///
57-
/// `mutability` can be used to force immutable interning: if it is `Mutability::Not`, the
58-
/// allocation is interned immutably; if it is `Mutability::Mut`, then the allocation *must be*
59-
/// already mutable (as a sanity check).
60-
///
61-
/// Returns an iterator over all relocations referred to by this allocation.
62-
fn intern_shallow<'tcx, M: CompileTimeMachine<'tcx>>(
63-
ecx: &mut InterpCx<'tcx, M>,
64-
alloc_id: AllocId,
58+
fn prepare_alloc<'tcx, Prov: Provenance, Extra, Bytes: AllocBytes>(
59+
tcx: TyCtxt<'tcx>,
60+
kind: MemoryKind<const_eval::MemoryKind>,
61+
alloc: &mut Allocation<Prov, Extra, Bytes>,
6562
mutability: Mutability,
66-
disambiguator: Option<&mut DisambiguatorState>,
67-
) -> Result<impl Iterator<Item = CtfeProvenance> + 'tcx, InternError> {
68-
trace!("intern_shallow {:?}", alloc_id);
69-
// remove allocation
70-
// FIXME(#120456) - is `swap_remove` correct?
71-
let Some((kind, mut alloc)) = ecx.memory.alloc_map.swap_remove(&alloc_id) else {
72-
return Err(InternError::DanglingPointer);
73-
};
74-
63+
) -> Result<(), InternError> {
7564
match kind {
7665
MemoryKind::Machine(const_eval::MemoryKind::Heap { was_made_global }) => {
7766
if !was_made_global {
7867
// Attempting to intern a `const_allocate`d pointer that was not made global via
79-
// `const_make_global`. We want to error here, but we have to first put the
80-
// allocation back into the `alloc_map` to keep things in a consistent state.
81-
ecx.memory.alloc_map.insert(alloc_id, (kind, alloc));
68+
// `const_make_global`.
69+
tcx.dcx().delayed_bug("non-global heap allocation in const value");
8270
return Err(InternError::ConstAllocNotGlobal);
8371
}
8472
}
8573
MemoryKind::Stack | MemoryKind::CallerLocation => {}
8674
}
8775

76+
if !alloc.provenance_merge_bytes(&tcx) {
77+
// Per-byte provenance is not supported by backends, so we cannot accept it here.
78+
tcx.dcx().delayed_bug("partial pointer in const value");
79+
return Err(InternError::PartialPointer);
80+
}
81+
8882
// Set allocation mutability as appropriate. This is used by LLVM to put things into
8983
// read-only memory, and also by Miri when evaluating other globals that
9084
// access this one.
@@ -97,6 +91,36 @@ fn intern_shallow<'tcx, M: CompileTimeMachine<'tcx>>(
9791
assert_eq!(alloc.mutability, Mutability::Mut);
9892
}
9993
}
94+
Ok(())
95+
}
96+
97+
/// Intern an allocation. Returns `Err` if the allocation does not exist in the local memory.
98+
///
99+
/// `mutability` can be used to force immutable interning: if it is `Mutability::Not`, the
100+
/// allocation is interned immutably; if it is `Mutability::Mut`, then the allocation *must be*
101+
/// already mutable (as a sanity check).
102+
///
103+
/// Returns an iterator over all relocations referred to by this allocation.
104+
fn intern_shallow<'tcx, M: CompileTimeMachine<'tcx>>(
105+
ecx: &mut InterpCx<'tcx, M>,
106+
alloc_id: AllocId,
107+
mutability: Mutability,
108+
disambiguator: Option<&mut DisambiguatorState>,
109+
) -> Result<impl Iterator<Item = CtfeProvenance> + 'tcx, InternError> {
110+
trace!("intern_shallow {:?}", alloc_id);
111+
// remove allocation
112+
// FIXME(#120456) - is `swap_remove` correct?
113+
let Some((kind, mut alloc)) = ecx.memory.alloc_map.swap_remove(&alloc_id) else {
114+
return Err(InternError::DanglingPointer);
115+
};
116+
117+
if let Err(err) = prepare_alloc(*ecx.tcx, kind, &mut alloc, mutability) {
118+
// We want to error here, but we have to first put the
119+
// allocation back into the `alloc_map` to keep things in a consistent state.
120+
ecx.memory.alloc_map.insert(alloc_id, (kind, alloc));
121+
return Err(err);
122+
}
123+
100124
// link the alloc id to the actual allocation
101125
let alloc = ecx.tcx.mk_const_alloc(alloc);
102126
if let Some(static_id) = ecx.machine.static_def_id() {
@@ -166,6 +190,7 @@ pub enum InternError {
166190
BadMutablePointer,
167191
DanglingPointer,
168192
ConstAllocNotGlobal,
193+
PartialPointer,
169194
}
170195

171196
/// Intern `ret` and everything it references.
@@ -221,21 +246,18 @@ pub fn intern_const_alloc_recursive<'tcx, M: CompileTimeMachine<'tcx>>(
221246
let mut todo: Vec<_> = if is_static {
222247
// Do not steal the root allocation, we need it later to create the return value of `eval_static_initializer`.
223248
// But still change its mutability to match the requested one.
224-
let alloc = ecx.memory.alloc_map.get_mut(&base_alloc_id).unwrap();
225-
alloc.1.mutability = base_mutability;
226-
alloc.1.provenance().ptrs().iter().map(|&(_, prov)| prov).collect()
249+
let (kind, alloc) = ecx.memory.alloc_map.get_mut(&base_alloc_id).unwrap();
250+
prepare_alloc(*ecx.tcx, *kind, alloc, base_mutability)?;
251+
alloc.provenance().ptrs().iter().map(|&(_, prov)| prov).collect()
227252
} else {
228-
intern_shallow(ecx, base_alloc_id, base_mutability, Some(&mut disambiguator))
229-
.unwrap()
230-
.collect()
253+
intern_shallow(ecx, base_alloc_id, base_mutability, Some(&mut disambiguator))?.collect()
231254
};
232255
// We need to distinguish "has just been interned" from "was already in `tcx`",
233256
// so we track this in a separate set.
234257
let mut just_interned: FxHashSet<_> = std::iter::once(base_alloc_id).collect();
235258
// Whether we encountered a bad mutable pointer.
236259
// We want to first report "dangling" and then "mutable", so we need to delay reporting these
237260
// errors.
238-
let mut result = Ok(());
239261
let mut found_bad_mutable_ptr = false;
240262

241263
// Keep interning as long as there are things to intern.
@@ -310,28 +332,23 @@ pub fn intern_const_alloc_recursive<'tcx, M: CompileTimeMachine<'tcx>>(
310332
// okay with losing some potential for immutability here. This can anyway only affect
311333
// `static mut`.
312334
just_interned.insert(alloc_id);
313-
match intern_shallow(ecx, alloc_id, inner_mutability, Some(&mut disambiguator)) {
314-
Ok(nested) => todo.extend(nested),
315-
Err(err) => {
316-
ecx.tcx.dcx().delayed_bug("error during const interning");
317-
result = Err(err);
318-
}
319-
}
335+
let next = intern_shallow(ecx, alloc_id, inner_mutability, Some(&mut disambiguator))?;
336+
todo.extend(next);
320337
}
321-
if found_bad_mutable_ptr && result.is_ok() {
338+
if found_bad_mutable_ptr {
322339
// We found a mutable pointer inside a const where inner allocations should be immutable,
323340
// and there was no other error. This should usually never happen! However, this can happen
324341
// in unleash-miri mode, so report it as a normal error then.
325342
if ecx.tcx.sess.opts.unstable_opts.unleash_the_miri_inside_of_you {
326-
result = Err(InternError::BadMutablePointer);
343+
return Err(InternError::BadMutablePointer);
327344
} else {
328345
span_bug!(
329346
ecx.tcx.span,
330347
"the static const safety checks accepted a mutable pointer they should not have accepted"
331348
);
332349
}
333350
}
334-
result
351+
Ok(())
335352
}
336353

337354
/// Intern `ret`. This function assumes that `ret` references no other allocation.

compiler/rustc_const_eval/src/interpret/memory.rs

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,29 +1314,20 @@ impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes>
13141314
}
13151315

13161316
/// Mark the given sub-range (relative to this allocation reference) as uninitialized.
1317-
pub fn write_uninit(&mut self, range: AllocRange) -> InterpResult<'tcx> {
1317+
pub fn write_uninit(&mut self, range: AllocRange) {
13181318
let range = self.range.subrange(range);
13191319

1320-
self.alloc
1321-
.write_uninit(&self.tcx, range)
1322-
.map_err(|e| e.to_interp_error(self.alloc_id))
1323-
.into()
1320+
self.alloc.write_uninit(&self.tcx, range);
13241321
}
13251322

13261323
/// Mark the entire referenced range as uninitialized
1327-
pub fn write_uninit_full(&mut self) -> InterpResult<'tcx> {
1328-
self.alloc
1329-
.write_uninit(&self.tcx, self.range)
1330-
.map_err(|e| e.to_interp_error(self.alloc_id))
1331-
.into()
1324+
pub fn write_uninit_full(&mut self) {
1325+
self.alloc.write_uninit(&self.tcx, self.range);
13321326
}
13331327

13341328
/// Remove all provenance in the reference range.
1335-
pub fn clear_provenance(&mut self) -> InterpResult<'tcx> {
1336-
self.alloc
1337-
.clear_provenance(&self.tcx, self.range)
1338-
.map_err(|e| e.to_interp_error(self.alloc_id))
1339-
.into()
1329+
pub fn clear_provenance(&mut self) {
1330+
self.alloc.clear_provenance(&self.tcx, self.range);
13401331
}
13411332
}
13421333

@@ -1427,11 +1418,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
14271418

14281419
// Side-step AllocRef and directly access the underlying bytes more efficiently.
14291420
// (We are staying inside the bounds here and all bytes do get overwritten so all is good.)
1430-
let alloc_id = alloc_ref.alloc_id;
1431-
let bytes = alloc_ref
1432-
.alloc
1433-
.get_bytes_unchecked_for_overwrite(&alloc_ref.tcx, alloc_ref.range)
1434-
.map_err(move |e| e.to_interp_error(alloc_id))?;
1421+
let bytes =
1422+
alloc_ref.alloc.get_bytes_unchecked_for_overwrite(&alloc_ref.tcx, alloc_ref.range);
14351423
// `zip` would stop when the first iterator ends; we want to definitely
14361424
// cover all of `bytes`.
14371425
for dest in bytes {
@@ -1513,10 +1501,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
15131501
// `get_bytes_mut` will clear the provenance, which is correct,
15141502
// since we don't want to keep any provenance at the target.
15151503
// This will also error if copying partial provenance is not supported.
1516-
let provenance = src_alloc
1517-
.provenance()
1518-
.prepare_copy(src_range, dest_offset, num_copies, self)
1519-
.map_err(|e| e.to_interp_error(src_alloc_id))?;
1504+
let provenance =
1505+
src_alloc.provenance().prepare_copy(src_range, dest_offset, num_copies, self);
15201506
// Prepare a copy of the initialization mask.
15211507
let init = src_alloc.init_mask().prepare_copy(src_range);
15221508

@@ -1534,10 +1520,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
15341520
dest_range,
15351521
)?;
15361522
// Yes we do overwrite all bytes in `dest_bytes`.
1537-
let dest_bytes = dest_alloc
1538-
.get_bytes_unchecked_for_overwrite_ptr(&tcx, dest_range)
1539-
.map_err(|e| e.to_interp_error(dest_alloc_id))?
1540-
.as_mut_ptr();
1523+
let dest_bytes =
1524+
dest_alloc.get_bytes_unchecked_for_overwrite_ptr(&tcx, dest_range).as_mut_ptr();
15411525

15421526
if init.no_bytes_init() {
15431527
// Fast path: If all bytes are `uninit` then there is nothing to copy. The target range
@@ -1546,9 +1530,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
15461530
// This also avoids writing to the target bytes so that the backing allocation is never
15471531
// touched if the bytes stay uninitialized for the whole interpreter execution. On contemporary
15481532
// operating system this can avoid physically allocating the page.
1549-
dest_alloc
1550-
.write_uninit(&tcx, dest_range)
1551-
.map_err(|e| e.to_interp_error(dest_alloc_id))?;
1533+
dest_alloc.write_uninit(&tcx, dest_range);
15521534
// `write_uninit` also resets the provenance, so we are done.
15531535
return interp_ok(());
15541536
}

compiler/rustc_const_eval/src/interpret/place.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -705,7 +705,7 @@ where
705705

706706
match value {
707707
Immediate::Scalar(scalar) => {
708-
alloc.write_scalar(alloc_range(Size::ZERO, scalar.size()), scalar)
708+
alloc.write_scalar(alloc_range(Size::ZERO, scalar.size()), scalar)?;
709709
}
710710
Immediate::ScalarPair(a_val, b_val) => {
711711
let BackendRepr::ScalarPair(a, b) = layout.backend_repr else {
@@ -725,10 +725,10 @@ where
725725
alloc.write_scalar(alloc_range(Size::ZERO, a_val.size()), a_val)?;
726726
alloc.write_scalar(alloc_range(b_offset, b_val.size()), b_val)?;
727727
// We don't have to reset padding here, `write_immediate` will anyway do a validation run.
728-
interp_ok(())
729728
}
730729
Immediate::Uninit => alloc.write_uninit_full(),
731730
}
731+
interp_ok(())
732732
}
733733

734734
pub fn write_uninit(
@@ -748,7 +748,7 @@ where
748748
// Zero-sized access
749749
return interp_ok(());
750750
};
751-
alloc.write_uninit_full()?;
751+
alloc.write_uninit_full();
752752
}
753753
}
754754
interp_ok(())
@@ -772,7 +772,7 @@ where
772772
// Zero-sized access
773773
return interp_ok(());
774774
};
775-
alloc.clear_provenance()?;
775+
alloc.clear_provenance();
776776
}
777777
}
778778
interp_ok(())

0 commit comments

Comments
 (0)