tududi/backend/services/inboxProcessingService.js

/**
 * Inbox Item Processing Service
 * Handles text analysis and suggestion generation for inbox items
 */

const nlp = require('compromise');

class InboxProcessingService {
    /**
     * Check if a word is an action verb using NLP
     * @param {string} word - Word to check
     * @returns {boolean} True if the word is an action verb
     */
    static isActionVerb(word) {
        if (!word || typeof word !== 'string') return false;

        try {
            const doc = nlp(word.toLowerCase());
            const verbs = doc.verbs();

            if (verbs.length === 0) return false;

            // Check if it's an action verb (not auxiliary/linking verbs when used alone)
            const text = verbs.text().toLowerCase();

            // Allow "do" when it's part of an action phrase like "do something"
            if (text === 'do') {
                // Check the original word context to see if it's followed by a noun/action
                return true; // For now, allow "do" - could refine this logic later
            }

            const auxiliaryVerbs = ['be', 'is', 'am', 'are', 'was', 'were', 'being', 'been',
                                   'have', 'has', 'had', 'having', 'does', 'did', 'doing',
                                   'will', 'would', 'shall', 'should', 'may', 'might', 'can',
                                   'could', 'must', 'ought'];

            return !auxiliaryVerbs.includes(text);
        } catch (error) {
            console.error('Error checking verb:', error);
            return false;
        }
    }

    /**
     * Tokenize text handling quoted strings properly
     * @param {string} text - Text to tokenize
     * @returns {string[]} Array of tokens
     */
    static tokenizeText(text) {
        const tokens = [];
        let currentToken = '';
        let inQuotes = false;
        let i = 0;

        while (i < text.length) {
            const char = text[i];

            if (char === '"' && (i === 0 || text[i-1] === '+')) {
                // Start of a quoted string after +
                inQuotes = true;
                currentToken += char;
            } else if (char === '"' && inQuotes) {
                // End of quoted string
                inQuotes = false;
                currentToken += char;
            } else if (char === ' ' && !inQuotes) {
                // Space outside quotes - end current token
                if (currentToken) {
                    tokens.push(currentToken);
                    currentToken = '';
                }
            } else {
                // Regular character
                currentToken += char;
            }
            i++;
        }

        // Add final token
        if (currentToken) {
            tokens.push(currentToken);
        }

        return tokens;
    }

    /**
     * Parse hashtags from text (consecutive groups anywhere)
     * @param {string} text - Text to parse
     * @returns {string[]} Array of hashtag names
     */
    static parseHashtags(text) {
        const trimmedText = text.trim();
        const matches = [];

        // Split text into words
        const words = trimmedText.split(/\s+/);
        if (words.length === 0) return matches;

        // Find all consecutive groups of tags/projects
        let i = 0;
        while (i < words.length) {
            // Check if current word starts a tag/project group
            if (words[i].startsWith('#') || words[i].startsWith('+')) {
                // Found start of a group, collect all consecutive tags/projects
                let groupEnd = i;
                while (groupEnd < words.length && (words[groupEnd].startsWith('#') || words[groupEnd].startsWith('+'))) {
                    groupEnd++;
                }

                // Process all hashtags in this group
                for (let j = i; j < groupEnd; j++) {
                    if (words[j].startsWith('#')) {
                        const tagName = words[j].substring(1);
                        if (tagName && /^[a-zA-Z0-9_-]+$/.test(tagName) && !matches.includes(tagName)) {
                            matches.push(tagName);
                        }
                    }
                }

                // Skip to end of this group
                i = groupEnd;
            } else {
                i++;
            }
        }

        return matches;
    }

    /**
     * Parse project references from text (consecutive groups anywhere)
     * @param {string} text - Text to parse
     * @returns {string[]} Array of project names
     */
    static parseProjectRefs(text) {
        const trimmedText = text.trim();
        const matches = [];

        // Tokenize the text handling quoted strings properly
        const tokens = this.tokenizeText(trimmedText);

        // Find consecutive groups of tags/projects
        let i = 0;
        while (i < tokens.length) {
            // Check if current token starts a tag/project group
            if (tokens[i].startsWith('#') || tokens[i].startsWith('+')) {
                // Found start of a group, collect all consecutive tags/projects
                let groupEnd = i;
                while (groupEnd < tokens.length && (tokens[groupEnd].startsWith('#') || tokens[groupEnd].startsWith('+'))) {
                    groupEnd++;
                }

                // Process all project references in this group
                for (let j = i; j < groupEnd; j++) {
                    if (tokens[j].startsWith('+')) {
                        let projectName = tokens[j].substring(1);

                        // Handle quoted project names
                        if (projectName.startsWith('"') && projectName.endsWith('"')) {
                            projectName = projectName.slice(1, -1);
                        }

                        if (projectName && !matches.includes(projectName)) {
                            matches.push(projectName);
                        }
                    }
                }

                // Skip to end of this group
                i = groupEnd;
            } else {
                i++;
            }
        }

        return matches;
    }

    /**
     * Clean text by removing tags and project references (consecutive groups anywhere)
     * @param {string} text - Text to clean
     * @returns {string} Cleaned text
     */
    static cleanTextFromTagsAndProjects(text) {
        const trimmedText = text.trim();
        const tokens = this.tokenizeText(trimmedText);
        const cleanedTokens = [];

        let i = 0;
        while (i < tokens.length) {
            // Check if current token starts a tag/project group
            if (tokens[i].startsWith('#') || tokens[i].startsWith('+')) {
                // Skip this entire consecutive group
                while (i < tokens.length && (tokens[i].startsWith('#') || tokens[i].startsWith('+'))) {
                    i++;
                }
            } else {
                // Keep regular tokens
                cleanedTokens.push(tokens[i]);
                i++;
            }
        }

        return cleanedTokens.join(' ').trim();
    }

    /**
     * Check if text starts with an action verb using NLP
     * @param {string} text - Text to analyze
     * @returns {boolean} True if starts with verb
     */
    static startsWithVerb(text) {
        if (!text.trim()) return false;

        try {
            const firstWord = text.trim().split(/\s+/)[0];
            if (!firstWord) return false;

            return this.isActionVerb(firstWord);
        } catch (error) {
            console.error('Error checking if text starts with verb:', error);
            return false;
        }
    }

    /**
     * Check if text contains a URL
     * @param {string} text - Text to check
     * @returns {boolean} True if contains URL
     */
    static containsUrl(text) {
        const urlRegex = /https?:\/\/[^\s]+/i;
        return urlRegex.test(text);
    }

    /**
     * Generate suggestion for an inbox item
     * @param {string} content - Original content
     * @param {string[]} tags - Parsed tags
     * @param {string[]} projects - Parsed projects
     * @param {string} cleanedContent - Cleaned content
     * @returns {object} Suggestion object
     */
    static generateSuggestion(content, tags, projects, cleanedContent) {
        const hasProject = projects.length > 0;
        const hasBookmarkTag = tags.some(tag => tag.toLowerCase() === 'bookmark');
        const textStartsWithVerb = this.startsWithVerb(cleanedContent);
        const containsUrl = this.containsUrl(content);

        if (!hasProject) {
            return { type: null, reason: null };
        }

        // Suggest note for bookmark items with project (explicit bookmark tag)
        if (hasBookmarkTag) {
            return {
                type: 'note',
                reason: 'bookmark_tag'
            };
        }

        // Suggest note for URLs with project (auto-bookmark)
        if (containsUrl) {
            return {
                type: 'note',
                reason: 'url_detected'
            };
        }

        // Suggest task for items with project that start with a verb
        if (textStartsWithVerb) {
            return {
                type: 'task',
                reason: 'verb_detected'
            };
        }

        return { type: null, reason: null };
    }

    /**
     * Process inbox item content and generate metadata
     * @param {string} content - Inbox item content
     * @returns {object} Processing results
     */
    static processInboxItem(content) {
        // Parse the content
        const tags = this.parseHashtags(content);
        const projects = this.parseProjectRefs(content);
        const cleanedContent = this.cleanTextFromTagsAndProjects(content);

        // Generate suggestion
        const suggestion = this.generateSuggestion(content, tags, projects, cleanedContent);

        return {
            parsed_tags: tags,
            parsed_projects: projects,
            cleaned_content: cleanedContent,
            suggested_type: suggestion.type,
            suggested_reason: suggestion.reason
        };
    }
}

module.exports = InboxProcessingService;