diff --git a/packages/cli/src/build/patch.rs b/packages/cli/src/build/patch.rs index 994c7147f2..de2f76810d 100644 --- a/packages/cli/src/build/patch.rs +++ b/packages/cli/src/build/patch.rs @@ -15,7 +15,7 @@ use std::{ sync::{Arc, RwLock}, }; use subsecond_types::{AddressMap, JumpTable}; -use target_lexicon::{Architecture, OperatingSystem, Triple}; +use target_lexicon::{Architecture, OperatingSystem, PointerWidth, Triple}; use thiserror::Error; use walrus::{ ConstExpr, DataKind, ElementItems, ElementKind, FunctionBuilder, FunctionId, FunctionKind, @@ -83,6 +83,7 @@ pub struct CachedSymbol { pub kind: SymbolKind, pub is_undefined: bool, pub is_weak: bool, + pub size: u64, } impl PartialEq for HotpatchModuleCache { @@ -137,6 +138,7 @@ impl HotpatchModuleCache { }, is_undefined, is_weak: false, + size: 0, }, ); } @@ -155,6 +157,7 @@ impl HotpatchModuleCache { kind: SymbolKind::Data, is_undefined, is_weak: false, + size: 0, }, ); } @@ -256,6 +259,7 @@ impl HotpatchModuleCache { is_undefined: s.is_undefined(), is_weak: s.is_weak(), kind: s.kind(), + size: s.size(), }, )) }) @@ -905,32 +909,7 @@ pub fn create_undefined_symbol_stub( // Unfortunately this isn't simply cross-platform, so we need to handle Unix and Windows // calling conventions separately. It also depends on the architecture, making it even more // complicated. - // - // Rust code typically generates Tls symbols as functions (text), so we handle them as jumps too. - // Figured this out by checking the disassembly of the symbols causing the violation. - // ``` - // __ZN17crossbeam_channel5waker17current_thread_id9THREAD_ID29_$u7b$$u7b$constant$u7d$$u7d$28_$u7b$$u7b$closure$u7d$$u7d$17h33618d877d86bb77E: - // stp x20, x19, [sp, #-0x20]! - // stp x29, x30, [sp, #0x10] - // add x29, sp, #0x10 - // adrp x19, 21603 ; 0x1054bd000 - // add x19, x19, #0x998 - // ldr x20, [x19] - // mov x0, x19 - // blr x20 - // ldr x8, [x0] - // cbz x8, 0x10005acc0 - // mov x0, x19 - // blr x20 - // ldp x29, x30, [sp, #0x10] - // ldp x20, x19, [sp], #0x20 - // ret - // mov x0, x19 - // blr x20 - // bl __ZN3std3sys12thread_local6native4lazy20Storage$LT$T$C$D$GT$10initialize17h818476638edff4e6E - // b 0x10005acac - // ``` - SymbolKind::Text | SymbolKind::Tls => { + SymbolKind::Text => { let jump_asm = match triple.operating_system { // The windows ABI and calling convention is different than the SystemV ABI. OperatingSystem::Windows => match triple.architecture { @@ -1053,6 +1032,75 @@ pub fn create_undefined_symbol_stub( }); } + // Rust code typically generates Tls accessors as functions (text), but they are referenced + // indirectly as data symbols. We end up handling this by adding the TLS symbol as a data + // symbol with the initializer as the address of the original tls initializer. That way + // if new TLS are added at runtime, they get initialized properly, but otherwise, the + // tls initialization check (cbz) properly skips re-initialization on patches. + // + // ``` + // __ZN17crossbeam_channel5waker17current_thread_id9THREAD_ID29_$u7b$$u7b$constant$u7d$$u7d$28_$u7b$$u7b$closure$u7d$$u7d$17h33618d877d86bb77E: + // stp x20, x19, [sp, #-0x20]! + // stp x29, x30, [sp, #0x10] + // add x29, sp, #0x10 + // adrp x19, 21603 ; 0x1054bd000 + // add x19, x19, #0x998 + // ldr x20, [x19] + // mov x0, x19 + // blr x20 + // ldr x8, [x0] + // cbz x8, 0x10005acc0 + // mov x0, x19 + // blr x20 + // ldp x29, x30, [sp, #0x10] + // ldp x20, x19, [sp], #0x20 + // ret + // mov x0, x19 + // blr x20 + // bl __ZN3std3sys12thread_local6native4lazy20Storage$LT$T$C$D$GT$10initialize17h818476638edff4e6E + // b 0x10005acac + // ``` + SymbolKind::Tls => { + let tls_section = obj.section_id(StandardSection::Tls); + + let pointer_width = match triple.pointer_width().unwrap() { + PointerWidth::U16 => 2, + PointerWidth::U32 => 4, + PointerWidth::U64 => 8, + }; + + let size = if sym.size == 0 { + pointer_width + } else { + sym.size + }; + + let align = size.min(pointer_width).next_power_of_two(); + let mut init = vec![0u8; size as usize]; + + // write the contents of the symbol to the init vec + init.iter_mut() + .zip(match triple.endianness() { + Ok(target_lexicon::Endianness::Little) => abs_addr.to_le_bytes(), + Ok(target_lexicon::Endianness::Big) => abs_addr.to_be_bytes(), + _ => return Err(PatchError::UnsupportedPlatform(triple.to_string())), + }) + .for_each(|(b, v)| *b = v); + + let offset = obj.append_section_data(tls_section, &init, align); + + obj.add_symbol(Symbol { + name: name.as_bytes()[name_offset..].to_vec(), + value: offset, // offset inside .tdata + size, + scope: SymbolScope::Linkage, + kind: SymbolKind::Tls, + weak: false, + section: SymbolSection::Section(tls_section), + flags: object::SymbolFlags::None, + }); + } + // We just assume all non-text symbols are data (globals, statics, etc) _ => { // darwin statics show up as "unknown" symbols even though they are data symbols. diff --git a/packages/subsecond/subsecond/src/lib.rs b/packages/subsecond/subsecond/src/lib.rs index 325ccce5fd..6a2f71b908 100644 --- a/packages/subsecond/subsecond/src/lib.rs +++ b/packages/subsecond/subsecond/src/lib.rs @@ -64,7 +64,17 @@ //! Subsecond is only enabled when debug_assertions are enabled so you can safely ship your application //! with Subsecond enabled without worrying about the performance overhead. //! -//! ## Globals and statics +//! ## Workspace support +//! +//! Subsecond currently only patches the "tip" crate - ie the crate in which your `main.rs` is located. +//! Changes to crates outside this crate will be ignored, which can be confusing. We plan to add full +//! workspace support in the future, but for now be aware of this limitation. Crate setups that have +//! a `main.rs` importing a `lib.rs` won't patch sensibly since the crate becomes a library for itself. +//! +//! This is due to limitations in rustc itself where the build-graph is non-deterministic and changes +//! to functions that forward generics can cause a cascade of codegen changes. +//! +//! ## Globals, statics, and thread-locals //! //! Subsecond *does* support hot-reloading of globals, statics, and thread locals. However, there are several limitations: //! @@ -75,6 +85,12 @@ //! Subsecond purposefully handles statics this way since many libraries like Dioxus and Tokio rely //! on persistent global runtimes. //! +//! HUGE WARNING: Currently, thread-locals in the "tip" crate (the one being patched) will seemingly +//! reset to their initial value on new patches. This is because we don't currently bind thread-locals +//! in the patches to their original addresses in the main program. If you rely on thread-locals heavily +//! in your tip crate, you should be aware of this. Sufficiently complex setups might crash or even +//! segfault. We plan to fix this in the future, but for now, you should be aware of this limitation. +//! //! ## Struct layout and alignment //! //! Subsecond currently does not support hot-reloading of structs. This is because the generated code @@ -84,14 +100,27 @@ //! To mitigate this, framework authors can integrate with Subsecond to either dispose of the old struct //! or to re-allocate the struct in a way that is compatible with the new layout. This is called "re-instancing." //! -//! Because Subsecond performs a safe panic if a stale function is called, you should never witness -//! a crash due to a struct layout change. However, changing a struct's layout will likely cause a -//! re-instantiation of the struct and potentially a loss of state. +//! In practice, frameworks that implement subsecond patching properly will throw out the old state +//! and thus you should never witness a segfault due to misalignment or size changes. Frameworks are +//! encouraged to aggressively dispose of old state that might cause size and alignment changes. //! //! We'd like to lift this limitation in the future by providing utilities to re-instantiate structs, //! but for now it's up to the framework authors to handle this. For example, Dioxus apps simply throw //! out the old state and rebuild it from scratch. //! +//! ## Pointer versioning +//! +//! Currently, Subsecond does not "version" function pointers. We have plans to provide this metadata +//! so framework authors can safely memoize changes without much runtime overhead. Frameworks like +//! Dioxus and Bevy circumvent this issue by using the TypeID of structs passed to hot functions as +//! well as the `ptr_address` method on [`HotFn`] to determine if the function pointer has changed. +//! +//! Currently, the `ptr_address` method will always return the most up-to-date version of the function +//! even if the function contents itself did not change. In essence, this is equivalent to a version +//! of the function where every function is considered "new." This means that framework authors who +//! integrate re-instancing in their apps might dispose of old state too aggressively. For now, this +//! is the safer and more practical approach. +//! //! ## Nesting Calls //! //! Subsecond calls are designed to be nested. This provides clean integration points to know exactly