From c82ad45b366a4aae28afea5587701ba82e32dcff Mon Sep 17 00:00:00 2001 From: Shunsuke Hayashi Date: Sat, 22 Nov 2025 18:25:00 +0900 Subject: [PATCH] feat(core): implement Glob and Grep search tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add GlobTool for finding files matching glob patterns - Supports ** recursive patterns - Configurable max results (default 1000) - Base directory support - Add GrepTool for searching file contents with regex - Regex pattern matching - Case insensitive option - File glob filtering - Directory recursive search - Max matches limit (default 100) - Add glob and regex dependencies to workspace - Export GlobTool and GrepTool from lib.rs - Update create_file_tool_registry (5 tools) - Update create_standard_tool_registry (6 tools) - Add 7 tests for Glob and Grep tools Closes #27 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Cargo.toml | 2 + crates/miyabi-core/Cargo.toml | 2 + crates/miyabi-core/src/lib.rs | 2 +- crates/miyabi-core/src/tools.rs | 425 +++++++++++++++++++++++++++++++- 4 files changed, 424 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 65cfee2..c147a04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,3 +61,5 @@ reqwest = { version = "0.12", features = ["json", "stream"] } chrono = { version = "0.4", features = ["serde"] } uuid = { version = "1", features = ["v4", "serde"] } once_cell = "1" +glob = "0.3" +regex = "1" diff --git a/crates/miyabi-core/Cargo.toml b/crates/miyabi-core/Cargo.toml index 0167c66..b01d311 100644 --- a/crates/miyabi-core/Cargo.toml +++ b/crates/miyabi-core/Cargo.toml @@ -21,6 +21,8 @@ uuid = { workspace = true } reqwest = { workspace = true } futures = { workspace = true } async-trait = { workspace = true } +glob = { workspace = true } +regex = { workspace = true } [dev-dependencies] tempfile = "3" diff --git a/crates/miyabi-core/src/lib.rs b/crates/miyabi-core/src/lib.rs index 5765612..fb2b583 100644 --- a/crates/miyabi-core/src/lib.rs +++ b/crates/miyabi-core/src/lib.rs @@ -23,5 +23,5 @@ pub use tool::{ pub use conversation::{ Conversation, ConversationMessage, ConversationManager, ConversationMetadata, ConversationError, }; -pub use tools::{ReadTool, WriteTool, EditTool, BashTool, create_file_tool_registry, create_standard_tool_registry}; +pub use tools::{ReadTool, WriteTool, EditTool, BashTool, GlobTool, GrepTool, create_file_tool_registry, create_standard_tool_registry}; pub use token::{TokenCounter, TokenUsage, ContextManager, ContextUsage, ModelLimits}; diff --git a/crates/miyabi-core/src/tools.rs b/crates/miyabi-core/src/tools.rs index d026ba7..bf73024 100644 --- a/crates/miyabi-core/src/tools.rs +++ b/crates/miyabi-core/src/tools.rs @@ -5,6 +5,8 @@ use crate::tool::{ParameterDef, Tool, ToolError, ToolOutput, ToolResult}; use async_trait::async_trait; +use glob::glob; +use regex::Regex; use serde_json::Value; use std::path::{Path, PathBuf}; use std::process::Stdio; @@ -563,13 +565,290 @@ impl Tool for BashTool { } } +/// Glob tool for finding files matching patterns +/// +/// Finds files in the filesystem that match glob patterns. +#[derive(Debug, Clone)] +pub struct GlobTool { + /// Base directory for relative patterns + base_dir: PathBuf, + /// Maximum number of results to return + max_results: usize, +} + +impl Default for GlobTool { + fn default() -> Self { + Self::new() + } +} + +impl GlobTool { + /// Create a new glob tool + pub fn new() -> Self { + Self { + base_dir: std::env::current_dir().unwrap_or_default(), + max_results: 1000, + } + } + + /// Create with a specific base directory + pub fn with_base_dir(base_dir: impl Into) -> Self { + Self { + base_dir: base_dir.into(), + max_results: 1000, + } + } +} + +#[async_trait] +impl Tool for GlobTool { + fn name(&self) -> &str { + "glob" + } + + fn description(&self) -> &str { + "Find files matching a glob pattern" + } + + fn parameters(&self) -> Vec { + vec![ + ParameterDef::required_string("pattern", "Glob pattern to match (e.g., **/*.rs, src/**/*.ts)"), + ParameterDef::optional_string("path", "Base directory to search in"), + ] + } + + async fn execute(&self, input: Value) -> ToolResult { + let pattern = input + .get("pattern") + .and_then(|v| v.as_str()) + .ok_or_else(|| ToolError::InvalidInput("pattern is required".to_string()))?; + + let base_path = input + .get("path") + .and_then(|v| v.as_str()) + .map(PathBuf::from) + .unwrap_or_else(|| self.base_dir.clone()); + + debug!("Glob search: {} in {:?}", pattern, base_path); + + // Construct full pattern + let full_pattern = if pattern.starts_with('/') { + pattern.to_string() + } else { + format!("{}/{}", base_path.display(), pattern) + }; + + // Execute glob + let entries = glob(&full_pattern).map_err(|e| { + ToolError::InvalidInput(format!("Invalid glob pattern: {}", e)) + })?; + + let mut matches = Vec::new(); + for entry in entries { + match entry { + Ok(path) => { + if matches.len() >= self.max_results { + break; + } + matches.push(path.display().to_string()); + } + Err(e) => { + warn!("Glob error for entry: {}", e); + } + } + } + + Ok(ToolOutput::success(serde_json::json!({ + "pattern": pattern, + "matches": matches, + "count": matches.len(), + "truncated": matches.len() >= self.max_results + }))) + } +} + +/// Grep tool for searching file contents +/// +/// Searches for patterns in file contents using regex. +#[derive(Debug, Clone)] +pub struct GrepTool { + /// Base directory for relative paths + base_dir: PathBuf, + /// Maximum number of matches to return + max_matches: usize, + /// Maximum file size to search (in bytes) + max_file_size: u64, +} + +impl Default for GrepTool { + fn default() -> Self { + Self::new() + } +} + +impl GrepTool { + /// Create a new grep tool + pub fn new() -> Self { + Self { + base_dir: std::env::current_dir().unwrap_or_default(), + max_matches: 100, + max_file_size: 10 * 1024 * 1024, // 10MB + } + } + + /// Create with a specific base directory + pub fn with_base_dir(base_dir: impl Into) -> Self { + Self { + base_dir: base_dir.into(), + max_matches: 100, + max_file_size: 10 * 1024 * 1024, + } + } +} + +#[async_trait] +impl Tool for GrepTool { + fn name(&self) -> &str { + "grep" + } + + fn description(&self) -> &str { + "Search for a pattern in files using regex" + } + + fn parameters(&self) -> Vec { + vec![ + ParameterDef::required_string("pattern", "Regex pattern to search for"), + ParameterDef::required_string("path", "File or directory to search in"), + ParameterDef::optional_string("glob", "Glob pattern to filter files (e.g., *.rs)"), + ParameterDef { + name: "case_insensitive".to_string(), + param_type: "boolean".to_string(), + description: "Case insensitive search".to_string(), + required: false, + default: Some(Value::Bool(false)), + enum_values: None, + }, + ] + } + + async fn execute(&self, input: Value) -> ToolResult { + let pattern = input + .get("pattern") + .and_then(|v| v.as_str()) + .ok_or_else(|| ToolError::InvalidInput("pattern is required".to_string()))?; + + let path = input + .get("path") + .and_then(|v| v.as_str()) + .ok_or_else(|| ToolError::InvalidInput("path is required".to_string()))?; + + let glob_pattern = input.get("glob").and_then(|v| v.as_str()); + + let case_insensitive = input + .get("case_insensitive") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + debug!("Grep search: {} in {} (case_insensitive: {})", pattern, path, case_insensitive); + + // Build regex + let regex_pattern = if case_insensitive { + format!("(?i){}", pattern) + } else { + pattern.to_string() + }; + + let regex = Regex::new(®ex_pattern).map_err(|e| { + ToolError::InvalidInput(format!("Invalid regex pattern: {}", e)) + })?; + + let search_path = if Path::new(path).is_absolute() { + PathBuf::from(path) + } else { + self.base_dir.join(path) + }; + + let mut results = Vec::new(); + + // Collect files to search + let files_to_search = if search_path.is_file() { + vec![search_path] + } else if search_path.is_dir() { + // Use glob to find files + let glob_str = if let Some(g) = glob_pattern { + format!("{}/**/{}", search_path.display(), g) + } else { + format!("{}/**/*", search_path.display()) + }; + + glob(&glob_str) + .map_err(|e| ToolError::ExecutionFailed(format!("Glob error: {}", e)))? + .filter_map(|entry| entry.ok()) + .filter(|p| p.is_file()) + .collect() + } else { + return Err(ToolError::ExecutionFailed(format!( + "Path does not exist: {}", + path + ))); + }; + + // Search files + for file_path in files_to_search { + if results.len() >= self.max_matches { + break; + } + + // Check file size + let metadata = match std::fs::metadata(&file_path) { + Ok(m) => m, + Err(_) => continue, + }; + + if metadata.len() > self.max_file_size { + continue; + } + + // Read and search file + let content = match std::fs::read_to_string(&file_path) { + Ok(c) => c, + Err(_) => continue, // Skip binary files + }; + + for (line_num, line) in content.lines().enumerate() { + if results.len() >= self.max_matches { + break; + } + + if regex.is_match(line) { + results.push(serde_json::json!({ + "file": file_path.display().to_string(), + "line": line_num + 1, + "content": line + })); + } + } + } + + Ok(ToolOutput::success(serde_json::json!({ + "pattern": pattern, + "matches": results, + "count": results.len(), + "truncated": results.len() >= self.max_matches + }))) + } +} + /// Create a tool registry with all file tools pub fn create_file_tool_registry() -> crate::tool::ToolRegistry { let mut registry = crate::tool::ToolRegistry::new(); registry .register(ReadTool::new()) .register(WriteTool::new()) - .register(EditTool::new()); + .register(EditTool::new()) + .register(GlobTool::new()) + .register(GrepTool::new()); registry } @@ -580,7 +859,9 @@ pub fn create_standard_tool_registry() -> crate::tool::ToolRegistry { .register(ReadTool::new()) .register(WriteTool::new()) .register(EditTool::new()) - .register(BashTool::new()); + .register(BashTool::new()) + .register(GlobTool::new()) + .register(GrepTool::new()); registry } @@ -752,10 +1033,12 @@ mod tests { #[test] fn test_create_file_tool_registry() { let registry = create_file_tool_registry(); - assert_eq!(registry.len(), 3); + assert_eq!(registry.len(), 5); assert!(registry.contains("read")); assert!(registry.contains("write")); assert!(registry.contains("edit")); + assert!(registry.contains("glob")); + assert!(registry.contains("grep")); } #[test] @@ -860,8 +1143,8 @@ mod tests { fn test_standard_registry_has_all_tools() { let registry = create_standard_tool_registry(); - // Should have all 4 tools - assert_eq!(registry.len(), 4); + // Should have all 6 tools + assert_eq!(registry.len(), 6); // Check each tool exists let api_tools = registry.to_anthropic_tools(); @@ -871,6 +1154,8 @@ mod tests { assert!(tool_names.contains(&"write")); assert!(tool_names.contains(&"edit")); assert!(tool_names.contains(&"bash")); + assert!(tool_names.contains(&"glob")); + assert!(tool_names.contains(&"grep")); } #[test] @@ -941,10 +1226,138 @@ mod tests { #[test] fn test_create_standard_tool_registry() { let registry = create_standard_tool_registry(); - assert_eq!(registry.len(), 4); + assert_eq!(registry.len(), 6); assert!(registry.contains("read")); assert!(registry.contains("write")); assert!(registry.contains("edit")); assert!(registry.contains("bash")); + assert!(registry.contains("glob")); + assert!(registry.contains("grep")); + } + + #[tokio::test] + async fn test_glob_tool_basic() { + let dir = TempDir::new().unwrap(); + create_temp_file(&dir, "test1.txt", "content"); + create_temp_file(&dir, "test2.txt", "content"); + + let tool = GlobTool::with_base_dir(dir.path()); + let result = tool + .execute(serde_json::json!({ + "pattern": "*.txt" + })) + .await; + + assert!(result.is_ok()); + let output = result.unwrap(); + assert!(output.success); + let count = output.content["count"].as_u64().unwrap(); + assert_eq!(count, 2); // test1.txt and test2.txt + } + + #[tokio::test] + async fn test_glob_tool_recursive() { + let dir = TempDir::new().unwrap(); + create_temp_file(&dir, "root.rs", "fn main() {}"); + let sub_dir = dir.path().join("src"); + std::fs::create_dir(&sub_dir).unwrap(); + let sub_path = sub_dir.join("lib.rs"); + std::fs::write(&sub_path, "pub fn test() {}").unwrap(); + + let tool = GlobTool::with_base_dir(dir.path()); + let result = tool + .execute(serde_json::json!({ + "pattern": "**/*.rs" + })) + .await; + + assert!(result.is_ok()); + let output = result.unwrap(); + let count = output.content["count"].as_u64().unwrap(); + assert_eq!(count, 2); // root.rs and src/lib.rs + } + + #[tokio::test] + async fn test_grep_tool_basic() { + let dir = TempDir::new().unwrap(); + create_temp_file(&dir, "test.txt", "Line 1\nLine 2 with pattern\nLine 3"); + + let tool = GrepTool::with_base_dir(dir.path()); + let result = tool + .execute(serde_json::json!({ + "pattern": "pattern", + "path": "test.txt" + })) + .await; + + assert!(result.is_ok()); + let output = result.unwrap(); + assert!(output.success); + let count = output.content["count"].as_u64().unwrap(); + assert_eq!(count, 1); + + let matches = output.content["matches"].as_array().unwrap(); + assert_eq!(matches[0]["line"], 2); + assert!(matches[0]["content"].as_str().unwrap().contains("pattern")); + } + + #[tokio::test] + async fn test_grep_tool_regex() { + let dir = TempDir::new().unwrap(); + create_temp_file(&dir, "test.txt", "foo123\nbar456\nfoo789"); + + let tool = GrepTool::with_base_dir(dir.path()); + let result = tool + .execute(serde_json::json!({ + "pattern": "foo\\d+", + "path": "test.txt" + })) + .await; + + assert!(result.is_ok()); + let output = result.unwrap(); + let count = output.content["count"].as_u64().unwrap(); + assert_eq!(count, 2); // foo123 and foo789 + } + + #[tokio::test] + async fn test_grep_tool_case_insensitive() { + let dir = TempDir::new().unwrap(); + create_temp_file(&dir, "test.txt", "Hello\nhello\nHELLO"); + + let tool = GrepTool::with_base_dir(dir.path()); + let result = tool + .execute(serde_json::json!({ + "pattern": "hello", + "path": "test.txt", + "case_insensitive": true + })) + .await; + + assert!(result.is_ok()); + let output = result.unwrap(); + let count = output.content["count"].as_u64().unwrap(); + assert_eq!(count, 3); + } + + #[tokio::test] + async fn test_grep_tool_directory_search() { + let dir = TempDir::new().unwrap(); + create_temp_file(&dir, "file1.txt", "pattern here"); + create_temp_file(&dir, "file2.txt", "no match"); + create_temp_file(&dir, "file3.txt", "pattern again"); + + let tool = GrepTool::with_base_dir(dir.path()); + let result = tool + .execute(serde_json::json!({ + "pattern": "pattern", + "path": "." + })) + .await; + + assert!(result.is_ok()); + let output = result.unwrap(); + let count = output.content["count"].as_u64().unwrap(); + assert_eq!(count, 2); } }