/** * Inbox Item Processing Service * Handles text analysis and suggestion generation for inbox items */ const nlp = require('compromise'); // Helper constants const AUXILIARY_VERBS = [ 'be', 'is', 'am', 'are', 'was', 'were', 'being', 'been', 'have', 'has', 'had', 'having', 'does', 'did', 'doing', 'will', 'would', 'shall', 'should', 'may', 'might', 'can', 'could', 'must', 'ought', ]; /** * Check if a word is an action verb using NLP * @param {string} word - Word to check * @returns {boolean} True if the word is an action verb */ const isActionVerb = (word) => { if (!word || typeof word !== 'string') return false; try { const doc = nlp(word.toLowerCase()); const verbs = doc.verbs(); if (verbs.length === 0) return false; // Check if it's an action verb (not auxiliary/linking verbs when used alone) const text = verbs.text().toLowerCase(); // Allow "do" when it's part of an action phrase like "do something" if (text === 'do') { // Check the original word context to see if it's followed by a noun/action return true; // For now, allow "do" - could refine this logic later } return !AUXILIARY_VERBS.includes(text); } catch (error) { console.error('Error checking verb:', error); return false; } }; /** * Tokenize text handling quoted strings properly * @param {string} text - Text to tokenize * @returns {string[]} Array of tokens */ const tokenizeText = (text) => { const MAX_TEXT_LENGTH = 10000; const tokens = []; let currentToken = ''; let inQuotes = false; let i = 0; const textLength = Math.min(text.length, MAX_TEXT_LENGTH); while (i < textLength) { const char = text[i]; if (char === '"' && (i === 0 || text[i - 1] === '+')) { // Start of a quoted string after + inQuotes = true; currentToken += char; } else if (char === '"' && inQuotes) { // End of quoted string inQuotes = false; currentToken += char; } else if (char === ' ' && !inQuotes) { // Space outside quotes - end current token if (currentToken) { tokens.push(currentToken); currentToken = ''; } } else { // Regular character currentToken += char; } i++; } // Add final token if (currentToken) { tokens.push(currentToken); } return tokens; }; /** * Parse hashtags from text (consecutive groups anywhere) * @param {string} text - Text to parse * @returns {string[]} Array of hashtag names */ const parseHashtags = (text) => { const trimmedText = text.trim(); const matches = []; // Split text into words const words = trimmedText.split(/\s+/); if (words.length === 0) return matches; // Find all consecutive groups of tags/projects let i = 0; while (i < words.length) { // Check if current word starts a tag/project group if (words[i].startsWith('#') || words[i].startsWith('+')) { // Found start of a group, collect all consecutive tags/projects let groupEnd = i; while ( groupEnd < words.length && (words[groupEnd].startsWith('#') || words[groupEnd].startsWith('+')) ) { groupEnd++; } // Process all hashtags in this group for (let j = i; j < groupEnd; j++) { if (words[j].startsWith('#')) { const tagName = words[j].substring(1); if ( tagName && /^[a-zA-Z0-9_-]+$/.test(tagName) && !matches.includes(tagName) ) { matches.push(tagName); } } } // Skip to end of this group i = groupEnd; } else { i++; } } return matches; }; /** * Parse project references from text (consecutive groups anywhere) * @param {string} text - Text to parse * @returns {string[]} Array of project names */ const parseProjectRefs = (text) => { const trimmedText = text.trim(); const matches = []; // Tokenize the text handling quoted strings properly const tokens = tokenizeText(trimmedText); // Find consecutive groups of tags/projects let i = 0; while (i < tokens.length) { // Check if current token starts a tag/project group if (tokens[i].startsWith('#') || tokens[i].startsWith('+')) { // Found start of a group, collect all consecutive tags/projects let groupEnd = i; while ( groupEnd < tokens.length && (tokens[groupEnd].startsWith('#') || tokens[groupEnd].startsWith('+')) ) { groupEnd++; } // Process all project references in this group for (let j = i; j < groupEnd; j++) { if (tokens[j].startsWith('+')) { let projectName = tokens[j].substring(1); // Handle quoted project names if ( projectName.startsWith('"') && projectName.endsWith('"') ) { projectName = projectName.slice(1, -1); } if (projectName && !matches.includes(projectName)) { matches.push(projectName); } } } // Skip to end of this group i = groupEnd; } else { i++; } } return matches; }; /** * Clean text by removing tags and project references (consecutive groups anywhere) * @param {string} text - Text to clean * @returns {string} Cleaned text */ const cleanTextFromTagsAndProjects = (text) => { const trimmedText = text.trim(); const tokens = tokenizeText(trimmedText); const cleanedTokens = []; let i = 0; while (i < tokens.length) { // Check if current token starts a tag/project group if (tokens[i].startsWith('#') || tokens[i].startsWith('+')) { // Skip this entire consecutive group while ( i < tokens.length && (tokens[i].startsWith('#') || tokens[i].startsWith('+')) ) { i++; } } else { // Keep regular tokens cleanedTokens.push(tokens[i]); i++; } } return cleanedTokens.join(' ').trim(); }; /** * Check if text starts with an action verb using NLP * @param {string} text - Text to analyze * @returns {boolean} True if starts with verb */ const startsWithVerb = (text) => { if (!text.trim()) return false; try { const firstWord = text.trim().split(/\s+/)[0]; if (!firstWord) return false; return isActionVerb(firstWord); } catch (error) { console.error('Error checking if text starts with verb:', error); return false; } }; /** * Check if text contains a URL * @param {string} text - Text to check * @returns {boolean} True if contains URL */ const containsUrl = (text) => { const urlRegex = /https?:\/\/[^\s]+/i; return urlRegex.test(text); }; /** * Generate suggestion for an inbox item * @param {string} content - Original content * @param {string[]} tags - Parsed tags * @param {string[]} projects - Parsed projects * @param {string} cleanedContent - Cleaned content * @returns {object} Suggestion object */ const generateSuggestion = (content, tags, projects, cleanedContent) => { const hasProject = projects.length > 0; const hasBookmarkTag = tags.some((tag) => tag.toLowerCase() === 'bookmark'); const textStartsWithVerb = startsWithVerb(cleanedContent); const hasUrl = containsUrl(content); // Detect URLs even without a project (for bookmark tag display) if (hasUrl && !hasProject) { return { type: null, reason: 'url_detected' }; } if (!hasProject) { return { type: null, reason: null }; } // Suggest note for bookmark items with project (explicit bookmark tag) if (hasBookmarkTag) { return { type: 'note', reason: 'bookmark_tag', }; } // Suggest note for URLs with project (auto-bookmark) if (hasUrl) { return { type: 'note', reason: 'url_detected', }; } // Suggest task for items with project that start with a verb if (textStartsWithVerb) { return { type: 'task', reason: 'verb_detected', }; } return { type: null, reason: null }; }; /** * Process inbox item content and generate metadata * @param {string} content - Inbox item content * @returns {object} Processing results */ const processInboxItem = (content) => { // Parse the content const tags = parseHashtags(content); const projects = parseProjectRefs(content); const cleanedContent = cleanTextFromTagsAndProjects(content); // Generate suggestion const suggestion = generateSuggestion( content, tags, projects, cleanedContent ); return { parsed_tags: tags, parsed_projects: projects, cleaned_content: cleanedContent, suggested_type: suggestion.type, suggested_reason: suggestion.reason, }; }; module.exports = { // Core processing functions processInboxItem, // Text analysis functions isActionVerb, startsWithVerb, containsUrl, // Parsing functions parseHashtags, parseProjectRefs, cleanTextFromTagsAndProjects, tokenizeText, // Suggestion generation generateSuggestion, };