tududi/backend/modules/inbox/inboxProcessingService.js
Chris 1d5de49b48
Add URL detection to inbox processing service (#942)
* fixup! Fix date format inconsistency in Defer Until field

* fixup! fixup! Fix date format inconsistency in Defer Until field

* fixup! fixup! fixup! Fix date format inconsistency in Defer Until field
2026-03-14 08:43:41 +02:00

373 lines
9.8 KiB
JavaScript

/**
* Inbox Item Processing Service
* Handles text analysis and suggestion generation for inbox items
*/
const nlp = require('compromise');
// Helper constants
const AUXILIARY_VERBS = [
'be',
'is',
'am',
'are',
'was',
'were',
'being',
'been',
'have',
'has',
'had',
'having',
'does',
'did',
'doing',
'will',
'would',
'shall',
'should',
'may',
'might',
'can',
'could',
'must',
'ought',
];
/**
* Check if a word is an action verb using NLP
* @param {string} word - Word to check
* @returns {boolean} True if the word is an action verb
*/
const isActionVerb = (word) => {
if (!word || typeof word !== 'string') return false;
try {
const doc = nlp(word.toLowerCase());
const verbs = doc.verbs();
if (verbs.length === 0) return false;
// Check if it's an action verb (not auxiliary/linking verbs when used alone)
const text = verbs.text().toLowerCase();
// Allow "do" when it's part of an action phrase like "do something"
if (text === 'do') {
// Check the original word context to see if it's followed by a noun/action
return true; // For now, allow "do" - could refine this logic later
}
return !AUXILIARY_VERBS.includes(text);
} catch (error) {
console.error('Error checking verb:', error);
return false;
}
};
/**
* Tokenize text handling quoted strings properly
* @param {string} text - Text to tokenize
* @returns {string[]} Array of tokens
*/
const tokenizeText = (text) => {
const tokens = [];
let currentToken = '';
let inQuotes = false;
let i = 0;
while (i < text.length) {
const char = text[i];
if (char === '"' && (i === 0 || text[i - 1] === '+')) {
// Start of a quoted string after +
inQuotes = true;
currentToken += char;
} else if (char === '"' && inQuotes) {
// End of quoted string
inQuotes = false;
currentToken += char;
} else if (char === ' ' && !inQuotes) {
// Space outside quotes - end current token
if (currentToken) {
tokens.push(currentToken);
currentToken = '';
}
} else {
// Regular character
currentToken += char;
}
i++;
}
// Add final token
if (currentToken) {
tokens.push(currentToken);
}
return tokens;
};
/**
* Parse hashtags from text (consecutive groups anywhere)
* @param {string} text - Text to parse
* @returns {string[]} Array of hashtag names
*/
const parseHashtags = (text) => {
const trimmedText = text.trim();
const matches = [];
// Split text into words
const words = trimmedText.split(/\s+/);
if (words.length === 0) return matches;
// Find all consecutive groups of tags/projects
let i = 0;
while (i < words.length) {
// Check if current word starts a tag/project group
if (words[i].startsWith('#') || words[i].startsWith('+')) {
// Found start of a group, collect all consecutive tags/projects
let groupEnd = i;
while (
groupEnd < words.length &&
(words[groupEnd].startsWith('#') ||
words[groupEnd].startsWith('+'))
) {
groupEnd++;
}
// Process all hashtags in this group
for (let j = i; j < groupEnd; j++) {
if (words[j].startsWith('#')) {
const tagName = words[j].substring(1);
if (
tagName &&
/^[a-zA-Z0-9_-]+$/.test(tagName) &&
!matches.includes(tagName)
) {
matches.push(tagName);
}
}
}
// Skip to end of this group
i = groupEnd;
} else {
i++;
}
}
return matches;
};
/**
* Parse project references from text (consecutive groups anywhere)
* @param {string} text - Text to parse
* @returns {string[]} Array of project names
*/
const parseProjectRefs = (text) => {
const trimmedText = text.trim();
const matches = [];
// Tokenize the text handling quoted strings properly
const tokens = tokenizeText(trimmedText);
// Find consecutive groups of tags/projects
let i = 0;
while (i < tokens.length) {
// Check if current token starts a tag/project group
if (tokens[i].startsWith('#') || tokens[i].startsWith('+')) {
// Found start of a group, collect all consecutive tags/projects
let groupEnd = i;
while (
groupEnd < tokens.length &&
(tokens[groupEnd].startsWith('#') ||
tokens[groupEnd].startsWith('+'))
) {
groupEnd++;
}
// Process all project references in this group
for (let j = i; j < groupEnd; j++) {
if (tokens[j].startsWith('+')) {
let projectName = tokens[j].substring(1);
// Handle quoted project names
if (
projectName.startsWith('"') &&
projectName.endsWith('"')
) {
projectName = projectName.slice(1, -1);
}
if (projectName && !matches.includes(projectName)) {
matches.push(projectName);
}
}
}
// Skip to end of this group
i = groupEnd;
} else {
i++;
}
}
return matches;
};
/**
* Clean text by removing tags and project references (consecutive groups anywhere)
* @param {string} text - Text to clean
* @returns {string} Cleaned text
*/
const cleanTextFromTagsAndProjects = (text) => {
const trimmedText = text.trim();
const tokens = tokenizeText(trimmedText);
const cleanedTokens = [];
let i = 0;
while (i < tokens.length) {
// Check if current token starts a tag/project group
if (tokens[i].startsWith('#') || tokens[i].startsWith('+')) {
// Skip this entire consecutive group
while (
i < tokens.length &&
(tokens[i].startsWith('#') || tokens[i].startsWith('+'))
) {
i++;
}
} else {
// Keep regular tokens
cleanedTokens.push(tokens[i]);
i++;
}
}
return cleanedTokens.join(' ').trim();
};
/**
* Check if text starts with an action verb using NLP
* @param {string} text - Text to analyze
* @returns {boolean} True if starts with verb
*/
const startsWithVerb = (text) => {
if (!text.trim()) return false;
try {
const firstWord = text.trim().split(/\s+/)[0];
if (!firstWord) return false;
return isActionVerb(firstWord);
} catch (error) {
console.error('Error checking if text starts with verb:', error);
return false;
}
};
/**
* Check if text contains a URL
* @param {string} text - Text to check
* @returns {boolean} True if contains URL
*/
const containsUrl = (text) => {
const urlRegex = /https?:\/\/[^\s]+/i;
return urlRegex.test(text);
};
/**
* Generate suggestion for an inbox item
* @param {string} content - Original content
* @param {string[]} tags - Parsed tags
* @param {string[]} projects - Parsed projects
* @param {string} cleanedContent - Cleaned content
* @returns {object} Suggestion object
*/
const generateSuggestion = (content, tags, projects, cleanedContent) => {
const hasProject = projects.length > 0;
const hasBookmarkTag = tags.some((tag) => tag.toLowerCase() === 'bookmark');
const textStartsWithVerb = startsWithVerb(cleanedContent);
const hasUrl = containsUrl(content);
// Detect URLs even without a project (for bookmark tag display)
if (hasUrl && !hasProject) {
return { type: null, reason: 'url_detected' };
}
if (!hasProject) {
return { type: null, reason: null };
}
// Suggest note for bookmark items with project (explicit bookmark tag)
if (hasBookmarkTag) {
return {
type: 'note',
reason: 'bookmark_tag',
};
}
// Suggest note for URLs with project (auto-bookmark)
if (hasUrl) {
return {
type: 'note',
reason: 'url_detected',
};
}
// Suggest task for items with project that start with a verb
if (textStartsWithVerb) {
return {
type: 'task',
reason: 'verb_detected',
};
}
return { type: null, reason: null };
};
/**
* Process inbox item content and generate metadata
* @param {string} content - Inbox item content
* @returns {object} Processing results
*/
const processInboxItem = (content) => {
// Parse the content
const tags = parseHashtags(content);
const projects = parseProjectRefs(content);
const cleanedContent = cleanTextFromTagsAndProjects(content);
// Generate suggestion
const suggestion = generateSuggestion(
content,
tags,
projects,
cleanedContent
);
return {
parsed_tags: tags,
parsed_projects: projects,
cleaned_content: cleanedContent,
suggested_type: suggestion.type,
suggested_reason: suggestion.reason,
};
};
module.exports = {
// Core processing functions
processInboxItem,
// Text analysis functions
isActionVerb,
startsWithVerb,
containsUrl,
// Parsing functions
parseHashtags,
parseProjectRefs,
cleanTextFromTagsAndProjects,
tokenizeText,
// Suggestion generation
generateSuggestion,
};