mito2/engine/region_hook.rs
1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Region hook extension point for observing SST writes and manifest mutations.
16//!
17//! ## Design
18//!
19//! The [`RegionHook`] trait provides two methods with clear separation of concerns:
20//!
21//! - [`on_sst_files_written`]: Fires when mito2 physically writes SST **data files**.
22//! Provides per-file [`SstInfo`] + [`FileMeta`] — the richest available metadata
23//! (row counts, index metadata, Parquet metadata, etc.).
24//!
25//! - [`on_manifest_updated`]: Fires after **any** manifest write is successfully committed.
26//! Receives the full [`RegionMetaActionList`] so consumers can inspect what changed
27//! (file additions/removals, schema changes, truncation, partition expression changes, etc.).
28//!
29//! Hook implementations are registered via the [`Plugins`](common_base::Plugins) system:
30//! ```ignore
31//! plugins.insert(Arc::new(MyHook) as RegionHookRef);
32//! ```
33//!
34//! ## Coverage
35//!
36//! | Scenario | `on_sst_files_written` | `on_manifest_updated` |
37//! |------------------------------|:----------------------:|:---------------------:|
38//! | Flush (memtable → SST) | ✅ Yes | ✅ Yes |
39//! | Local compaction | ✅ Yes | ✅ Yes |
40//! | Remote compaction | ✅ (compactor node) ¹ | ✅ (compactor node) ¹ |
41//! | RegionEdit / bulk ingestion | ❌ (files pre-written) | ✅ Yes |
42//! | Copy region | ❌ (object-store copy) | ✅ Yes |
43//! | Apply staging | ❌ (delegates to edit) | ✅ Yes ² |
44//! | Alter (schema change) | ❌ (no SST files) | ✅ Yes |
45//! | Truncate | ❌ (removes files) | ✅ Yes |
46//! | Enter staging | ❌ (no SST files) | ✅ Yes |
47//! | Async index build | ❌ (index files only) | ✅ Yes |
48//!
49//! ¹ Remote compaction runs on a dedicated compactor node via `open_compaction_region()`.
50//! The caller must pass plugins via `OpenCompactionRegionRequest` to enable hooks on the
51//! compactor node.
52//! ² Apply staging fires `on_manifest_updated` twice: once when the staging SST files are
53//! committed via `RegionEdit`, and once when `exit_staging_on_success` merges all staged
54//! manifest actions into the live manifest.
55//!
56//! The following paths do **not** trigger any hook:
57//! - Follower region sync / catchup (manifest read-only; followers don't author changes)
58//! - GC / checkpoint / drop / remap (internal bookkeeping, not logical state changes)
59//!
60//! ## Invocation points
61//!
62//! `on_sst_files_written` is invoked at the SST write site (flush task or compaction task),
63//! immediately after SST files are written but **before** the manifest is committed.
64//!
65//! `on_manifest_updated` is funneled through [`ManifestContext::update_locked`],
66//! the sole caller of the low-level [`RegionManifestManager::update`], which
67//! packages each successful write into a [`PendingManifestHook`]. The caller
68//! owns the write lock, drops it, and *then* fires the receipt — the hook must
69//! never run under the lock. [`ManifestContext::update_manifest`] is the common
70//! case: it acquires the lock, delegates to `update_locked`, and fires the
71//! receipt in one go. Multi-step sequences (staging-exit, role-state backfill)
72//! call `update_locked` directly under their own held guard.
73//!
74//! Non-logical writes (GC, staging bookkeeping) call the manager's own methods
75//! directly and intentionally do not fire the hook.
76//!
77//! ## Future work
78//!
79//! A future `on_files_removed` hook may be added to observe file lifecycle end
80//! (GC, drop, truncate, compaction removal). This is not yet implemented.
81//!
82//! [`on_sst_files_written`]: RegionHook::on_sst_files_written
83//! [`on_manifest_updated`]: RegionHook::on_manifest_updated
84//! [`RegionManifestManager::update`]: crate::manifest::manager::RegionManifestManager::update
85//! [`ManifestContext::update_locked`]: crate::region::ManifestContext::update_locked
86//! [`ManifestContext::update_manifest`]: crate::region::ManifestContext::update_manifest
87
88use std::fmt::Debug;
89use std::sync::Arc;
90
91use async_trait::async_trait;
92use store_api::ManifestVersion;
93use store_api::metadata::RegionMetadataRef;
94use store_api::storage::RegionId;
95
96use crate::manifest::action::RegionMetaActionList;
97use crate::sst::file::FileMeta;
98use crate::sst::parquet::SstInfo;
99
100/// A deferred [`RegionHook::on_manifest_updated`] notification produced by a
101/// logical manifest write via [`ManifestContext::update_locked`](crate::region::ManifestContext::update_locked).
102///
103/// Must be [`fire`](Self::fire)d **after** the manifest write lock is released
104/// (the hook may read the manifest). `#[must_use]` so a forgotten receipt warns.
105#[must_use = "the region hook must be fired after releasing the manifest write lock"]
106pub(crate) struct PendingManifestHook {
107 region_id: RegionId,
108 /// `None` when no hook is registered (fire becomes a no-op).
109 action_list: Option<RegionMetaActionList>,
110 version: ManifestVersion,
111 hook: Option<RegionHookRef>,
112}
113
114impl PendingManifestHook {
115 pub(crate) fn new(
116 region_id: RegionId,
117 action_list: Option<RegionMetaActionList>,
118 version: ManifestVersion,
119 hook: Option<RegionHookRef>,
120 ) -> Self {
121 Self {
122 region_id,
123 action_list,
124 version,
125 hook,
126 }
127 }
128
129 /// The manifest version produced by the write.
130 pub(crate) fn version(&self) -> ManifestVersion {
131 self.version
132 }
133
134 /// Fires the hook if one is registered. Safe to call unconditionally: it is
135 /// a no-op when no hook is registered.
136 pub(crate) async fn fire(self) {
137 if let (Some(hook), Some(action_list)) = (self.hook, self.action_list) {
138 hook.on_manifest_updated(self.region_id, &action_list, self.version)
139 .await;
140 }
141 }
142
143 /// Merges two pending notifications into one so consumers observe a single
144 /// `on_manifest_updated` call covering all actions. The combined action list
145 /// keeps `self`'s actions followed by `other`'s, and the *later* manifest
146 /// version wins. Used when a sequence of writes (e.g. staging-exit followed
147 /// by metadata backfill) should notify the hook exactly once.
148 pub(crate) fn merge(self, other: PendingManifestHook) -> PendingManifestHook {
149 debug_assert_eq!(
150 self.region_id, other.region_id,
151 "Cannot merge pending hooks of different regions: {:?} and {:?}",
152 self.region_id, other.region_id
153 );
154 PendingManifestHook {
155 region_id: self.region_id,
156 action_list: match (self.action_list, other.action_list) {
157 (Some(mut a), Some(b)) => {
158 a.actions.extend(b.actions);
159 Some(a)
160 }
161 (a, None) => a,
162 (None, b) => b,
163 },
164 version: self.version.max(other.version),
165 hook: self.hook.or(other.hook),
166 }
167 }
168}
169
170/// Information about a single SST data file written during flush or compaction.
171pub struct SstFileInfo<'a> {
172 pub sst_info_ref: &'a SstInfo,
173 pub file_meta: &'a FileMeta,
174}
175
176/// Hook for observing region mutations in mito2.
177///
178/// Implementations can be registered via the `Plugins` system:
179/// ```ignore
180/// use std::sync::Arc;
181/// use common_base::Plugins;
182/// use mito2::engine::region_hook::{RegionHook, RegionHookRef};
183///
184/// plugins.insert(Arc::new(MyHook) as RegionHookRef);
185/// ```
186#[async_trait]
187pub trait RegionHook: Send + Sync + Debug {
188 /// Called after SST **data files** are physically written, before manifest commit.
189 ///
190 /// This fires only when mito2 itself writes SST files (flush and compaction).
191 /// It does **not** fire when SST files are pre-written externally (bulk ingestion,
192 /// copy region) or when only index files are written (async index build).
193 async fn on_sst_files_written(
194 &self,
195 region_id: RegionId,
196 region_metadata: &RegionMetadataRef,
197 files: &[SstFileInfo<'_>],
198 ) {
199 let _ = (region_id, region_metadata, files);
200 }
201
202 /// Called after the region manifest is successfully committed.
203 ///
204 /// Fires for **all** manifest write paths: flush, compaction, region edit,
205 /// copy region, alter, truncate, enter staging, index build, etc.
206 ///
207 /// Does **not** fire for:
208 /// - Manifest reads / follower sync (no write)
209 /// - GC / checkpoint (internal bookkeeping)
210 /// - Failed manifest updates
211 async fn on_manifest_updated(
212 &self,
213 region_id: RegionId,
214 action_list: &RegionMetaActionList,
215 manifest_version: ManifestVersion,
216 ) {
217 let _ = (region_id, action_list, manifest_version);
218 }
219}
220
221pub type RegionHookRef = Arc<dyn RegionHook>;