Skip to main content

mito2/engine/
region_hook.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Region hook extension point for observing SST writes and manifest mutations.
16//!
17//! ## Design
18//!
19//! The [`RegionHook`] trait provides two methods with clear separation of concerns:
20//!
21//! - [`on_sst_files_written`]: Fires when mito2 physically writes SST **data files**.
22//!   Provides per-file [`SstInfo`] + [`FileMeta`] — the richest available metadata
23//!   (row counts, index metadata, Parquet metadata, etc.).
24//!
25//! - [`on_manifest_updated`]: Fires after **any** manifest write is successfully committed.
26//!   Receives the full [`RegionMetaActionList`] so consumers can inspect what changed
27//!   (file additions/removals, schema changes, truncation, partition expression changes, etc.).
28//!
29//! Hook implementations are registered via the [`Plugins`](common_base::Plugins) system:
30//! ```ignore
31//! plugins.insert(Arc::new(MyHook) as RegionHookRef);
32//! ```
33//!
34//! ## Coverage
35//!
36//! | Scenario                     | `on_sst_files_written` | `on_manifest_updated` |
37//! |------------------------------|:----------------------:|:---------------------:|
38//! | Flush (memtable → SST)       | ✅ Yes                 | ✅ Yes                |
39//! | Local compaction             | ✅ Yes                 | ✅ Yes                |
40//! | Remote compaction            | ✅ (compactor node) ¹     | ✅ (compactor node) ¹    |
41//! | RegionEdit / bulk ingestion  | ❌ (files pre-written)  | ✅ Yes                |
42//! | Copy region                  | ❌ (object-store copy)  | ✅ Yes                |
43//! | Apply staging                | ❌ (delegates to edit)  | ✅ Yes ²               |
44//! | Alter (schema change)        | ❌ (no SST files)       | ✅ Yes                |
45//! | Truncate                     | ❌ (removes files)      | ✅ Yes                |
46//! | Enter staging                | ❌ (no SST files)       | ✅ Yes                |
47//! | Async index build            | ❌ (index files only)   | ✅ Yes                |
48//!
49//! ¹ Remote compaction runs on a dedicated compactor node via `open_compaction_region()`.
50//!   The caller must pass plugins via `OpenCompactionRegionRequest` to enable hooks on the
51//!   compactor node.
52//! ² Apply staging fires `on_manifest_updated` twice: once when the staging SST files are
53//!   committed via `RegionEdit`, and once when `exit_staging_on_success` merges all staged
54//!   manifest actions into the live manifest.
55//!
56//! The following paths do **not** trigger any hook:
57//! - Follower region sync / catchup (manifest read-only; followers don't author changes)
58//! - GC / checkpoint / drop / remap (internal bookkeeping, not logical state changes)
59//!
60//! ## Invocation points
61//!
62//! `on_sst_files_written` is invoked at the SST write site (flush task or compaction task),
63//! immediately after SST files are written but **before** the manifest is committed.
64//!
65//! `on_manifest_updated` is funneled through [`ManifestContext::update_locked`],
66//! the sole caller of the low-level [`RegionManifestManager::update`], which
67//! packages each successful write into a [`PendingManifestHook`]. The caller
68//! owns the write lock, drops it, and *then* fires the receipt — the hook must
69//! never run under the lock. [`ManifestContext::update_manifest`] is the common
70//! case: it acquires the lock, delegates to `update_locked`, and fires the
71//! receipt in one go. Multi-step sequences (staging-exit, role-state backfill)
72//! call `update_locked` directly under their own held guard.
73//!
74//! Non-logical writes (GC, staging bookkeeping) call the manager's own methods
75//! directly and intentionally do not fire the hook.
76//!
77//! ## Future work
78//!
79//! A future `on_files_removed` hook may be added to observe file lifecycle end
80//! (GC, drop, truncate, compaction removal). This is not yet implemented.
81//!
82//! [`on_sst_files_written`]: RegionHook::on_sst_files_written
83//! [`on_manifest_updated`]: RegionHook::on_manifest_updated
84//! [`RegionManifestManager::update`]: crate::manifest::manager::RegionManifestManager::update
85//! [`ManifestContext::update_locked`]: crate::region::ManifestContext::update_locked
86//! [`ManifestContext::update_manifest`]: crate::region::ManifestContext::update_manifest
87
88use std::fmt::Debug;
89use std::sync::Arc;
90
91use async_trait::async_trait;
92use store_api::ManifestVersion;
93use store_api::metadata::RegionMetadataRef;
94use store_api::storage::RegionId;
95
96use crate::manifest::action::RegionMetaActionList;
97use crate::sst::file::FileMeta;
98use crate::sst::parquet::SstInfo;
99
100/// A deferred [`RegionHook::on_manifest_updated`] notification produced by a
101/// logical manifest write via [`ManifestContext::update_locked`](crate::region::ManifestContext::update_locked).
102///
103/// Must be [`fire`](Self::fire)d **after** the manifest write lock is released
104/// (the hook may read the manifest). `#[must_use]` so a forgotten receipt warns.
105#[must_use = "the region hook must be fired after releasing the manifest write lock"]
106pub(crate) struct PendingManifestHook {
107    region_id: RegionId,
108    /// `None` when no hook is registered (fire becomes a no-op).
109    action_list: Option<RegionMetaActionList>,
110    version: ManifestVersion,
111    hook: Option<RegionHookRef>,
112}
113
114impl PendingManifestHook {
115    pub(crate) fn new(
116        region_id: RegionId,
117        action_list: Option<RegionMetaActionList>,
118        version: ManifestVersion,
119        hook: Option<RegionHookRef>,
120    ) -> Self {
121        Self {
122            region_id,
123            action_list,
124            version,
125            hook,
126        }
127    }
128
129    /// The manifest version produced by the write.
130    pub(crate) fn version(&self) -> ManifestVersion {
131        self.version
132    }
133
134    /// Fires the hook if one is registered. Safe to call unconditionally: it is
135    /// a no-op when no hook is registered.
136    pub(crate) async fn fire(self) {
137        if let (Some(hook), Some(action_list)) = (self.hook, self.action_list) {
138            hook.on_manifest_updated(self.region_id, &action_list, self.version)
139                .await;
140        }
141    }
142
143    /// Merges two pending notifications into one so consumers observe a single
144    /// `on_manifest_updated` call covering all actions. The combined action list
145    /// keeps `self`'s actions followed by `other`'s, and the *later* manifest
146    /// version wins. Used when a sequence of writes (e.g. staging-exit followed
147    /// by metadata backfill) should notify the hook exactly once.
148    pub(crate) fn merge(self, other: PendingManifestHook) -> PendingManifestHook {
149        debug_assert_eq!(
150            self.region_id, other.region_id,
151            "Cannot merge pending hooks of different regions: {:?} and {:?}",
152            self.region_id, other.region_id
153        );
154        PendingManifestHook {
155            region_id: self.region_id,
156            action_list: match (self.action_list, other.action_list) {
157                (Some(mut a), Some(b)) => {
158                    a.actions.extend(b.actions);
159                    Some(a)
160                }
161                (a, None) => a,
162                (None, b) => b,
163            },
164            version: self.version.max(other.version),
165            hook: self.hook.or(other.hook),
166        }
167    }
168}
169
170/// Information about a single SST data file written during flush or compaction.
171pub struct SstFileInfo<'a> {
172    pub sst_info_ref: &'a SstInfo,
173    pub file_meta: &'a FileMeta,
174}
175
176/// Hook for observing region mutations in mito2.
177///
178/// Implementations can be registered via the `Plugins` system:
179/// ```ignore
180/// use std::sync::Arc;
181/// use common_base::Plugins;
182/// use mito2::engine::region_hook::{RegionHook, RegionHookRef};
183///
184/// plugins.insert(Arc::new(MyHook) as RegionHookRef);
185/// ```
186#[async_trait]
187pub trait RegionHook: Send + Sync + Debug {
188    /// Called after SST **data files** are physically written, before manifest commit.
189    ///
190    /// This fires only when mito2 itself writes SST files (flush and compaction).
191    /// It does **not** fire when SST files are pre-written externally (bulk ingestion,
192    /// copy region) or when only index files are written (async index build).
193    async fn on_sst_files_written(
194        &self,
195        region_id: RegionId,
196        region_metadata: &RegionMetadataRef,
197        files: &[SstFileInfo<'_>],
198    ) {
199        let _ = (region_id, region_metadata, files);
200    }
201
202    /// Called after the region manifest is successfully committed.
203    ///
204    /// Fires for **all** manifest write paths: flush, compaction, region edit,
205    /// copy region, alter, truncate, enter staging, index build, etc.
206    ///
207    /// Does **not** fire for:
208    /// - Manifest reads / follower sync (no write)
209    /// - GC / checkpoint (internal bookkeeping)
210    /// - Failed manifest updates
211    async fn on_manifest_updated(
212        &self,
213        region_id: RegionId,
214        action_list: &RegionMetaActionList,
215        manifest_version: ManifestVersion,
216    ) {
217        let _ = (region_id, action_list, manifest_version);
218    }
219}
220
221pub type RegionHookRef = Arc<dyn RegionHook>;