-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Implement VLM Run Invoice Parser node with file upload and parsing logic #1442
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import { VlmRunIcon } from '@/components/icons' | ||
import type { BlockConfig } from '@/blocks/types' | ||
import type { VlmRunResponse } from '@/tools/vlmrun/types' | ||
|
||
export const VlmRunInvoiceParserBlock: BlockConfig<VlmRunResponse> = { | ||
type: 'vlmrun_invoice_parser', | ||
name: 'VLM Run Invoice Parser', | ||
description: 'Upload and parse an invoice to extract structured JSON', | ||
longDescription: | ||
'Uploads an invoice file (PDF/image) and uses VLM Run to extract data like invoice number, total, due date, etc.', | ||
category: 'tools', | ||
bgColor: '#4A90E2', | ||
icon: VlmRunIcon, | ||
|
||
subBlocks: [ | ||
{ | ||
id: 'apiKey', | ||
title: 'API Key', | ||
type: 'short-input', | ||
layout: 'full', | ||
placeholder: 'Your Vlm Run API Key', | ||
password: true, | ||
required: true, | ||
}, | ||
{ | ||
id: 'filePath', | ||
title: 'Invoice File Path', | ||
type: 'short-input', | ||
layout: 'full', | ||
placeholder: '/path/to/invoice.pdf', | ||
required: true, | ||
}, | ||
], | ||
|
||
tools: { | ||
access: ['vlmrun_parse_invoice'], | ||
config: { | ||
tool: () => 'vlmrun_parse_invoice', | ||
}, | ||
}, | ||
|
||
inputs: { | ||
apiKey: { type: 'string', description: 'Vlm Run API Key' }, | ||
filePath: { type: 'string', description: 'Path to the invoice file' }, | ||
}, | ||
|
||
outputs: { | ||
data: { | ||
type: 'any', | ||
description: 'Structured invoice JSON (e.g., { invoice_number, total, due_date })', | ||
}, | ||
}, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,84 +3,86 @@ | |
* | ||
*/ | ||
|
||
import { AgentBlock } from '@/blocks/blocks/agent' | ||
import { AirtableBlock } from '@/blocks/blocks/airtable' | ||
import { ApiBlock } from '@/blocks/blocks/api' | ||
import { ArxivBlock } from '@/blocks/blocks/arxiv' | ||
import { BrowserUseBlock } from '@/blocks/blocks/browser_use' | ||
import { ClayBlock } from '@/blocks/blocks/clay' | ||
import { ConditionBlock } from '@/blocks/blocks/condition' | ||
import { ConfluenceBlock } from '@/blocks/blocks/confluence' | ||
import { DiscordBlock } from '@/blocks/blocks/discord' | ||
import { ElevenLabsBlock } from '@/blocks/blocks/elevenlabs' | ||
import { EvaluatorBlock } from '@/blocks/blocks/evaluator' | ||
import { ExaBlock } from '@/blocks/blocks/exa' | ||
import { FileBlock } from '@/blocks/blocks/file' | ||
import { FirecrawlBlock } from '@/blocks/blocks/firecrawl' | ||
import { FunctionBlock } from '@/blocks/blocks/function' | ||
import { GenericWebhookBlock } from '@/blocks/blocks/generic_webhook' | ||
import { GitHubBlock } from '@/blocks/blocks/github' | ||
import { GmailBlock } from '@/blocks/blocks/gmail' | ||
import { GoogleSearchBlock } from '@/blocks/blocks/google' | ||
import { GoogleCalendarBlock } from '@/blocks/blocks/google_calendar' | ||
import { GoogleDocsBlock } from '@/blocks/blocks/google_docs' | ||
import { GoogleDriveBlock } from '@/blocks/blocks/google_drive' | ||
import { GoogleFormsBlock } from '@/blocks/blocks/google_form' | ||
import { GoogleSheetsBlock } from '@/blocks/blocks/google_sheets' | ||
import { HuggingFaceBlock } from '@/blocks/blocks/huggingface' | ||
import { HunterBlock } from '@/blocks/blocks/hunter' | ||
import { ImageGeneratorBlock } from '@/blocks/blocks/image_generator' | ||
import { JinaBlock } from '@/blocks/blocks/jina' | ||
import { JiraBlock } from '@/blocks/blocks/jira' | ||
import { KnowledgeBlock } from '@/blocks/blocks/knowledge' | ||
import { LinearBlock } from '@/blocks/blocks/linear' | ||
import { LinkupBlock } from '@/blocks/blocks/linkup' | ||
import { MailBlock } from '@/blocks/blocks/mail' | ||
import { McpBlock } from '@/blocks/blocks/mcp' | ||
import { Mem0Block } from '@/blocks/blocks/mem0' | ||
import { MemoryBlock } from '@/blocks/blocks/memory' | ||
import { MicrosoftExcelBlock } from '@/blocks/blocks/microsoft_excel' | ||
import { MicrosoftPlannerBlock } from '@/blocks/blocks/microsoft_planner' | ||
import { MicrosoftTeamsBlock } from '@/blocks/blocks/microsoft_teams' | ||
import { MistralParseBlock } from '@/blocks/blocks/mistral_parse' | ||
import { MongoDBBlock } from '@/blocks/blocks/mongodb' | ||
import { MySQLBlock } from '@/blocks/blocks/mysql' | ||
import { NotionBlock } from '@/blocks/blocks/notion' | ||
import { OneDriveBlock } from '@/blocks/blocks/onedrive' | ||
import { OpenAIBlock } from '@/blocks/blocks/openai' | ||
import { OutlookBlock } from '@/blocks/blocks/outlook' | ||
import { ParallelBlock } from '@/blocks/blocks/parallel' | ||
import { PerplexityBlock } from '@/blocks/blocks/perplexity' | ||
import { PineconeBlock } from '@/blocks/blocks/pinecone' | ||
import { PostgreSQLBlock } from '@/blocks/blocks/postgresql' | ||
import { QdrantBlock } from '@/blocks/blocks/qdrant' | ||
import { RedditBlock } from '@/blocks/blocks/reddit' | ||
import { ResponseBlock } from '@/blocks/blocks/response' | ||
import { RouterBlock } from '@/blocks/blocks/router' | ||
import { S3Block } from '@/blocks/blocks/s3' | ||
import { ScheduleBlock } from '@/blocks/blocks/schedule' | ||
import { SerperBlock } from '@/blocks/blocks/serper' | ||
import { SharepointBlock } from '@/blocks/blocks/sharepoint' | ||
import { SlackBlock } from '@/blocks/blocks/slack' | ||
import { StagehandBlock } from '@/blocks/blocks/stagehand' | ||
import { StagehandAgentBlock } from '@/blocks/blocks/stagehand_agent' | ||
import { StarterBlock } from '@/blocks/blocks/starter' | ||
import { SupabaseBlock } from '@/blocks/blocks/supabase' | ||
import { TavilyBlock } from '@/blocks/blocks/tavily' | ||
import { TelegramBlock } from '@/blocks/blocks/telegram' | ||
import { ThinkingBlock } from '@/blocks/blocks/thinking' | ||
import { TranslateBlock } from '@/blocks/blocks/translate' | ||
import { TwilioSMSBlock } from '@/blocks/blocks/twilio' | ||
import { TypeformBlock } from '@/blocks/blocks/typeform' | ||
import { VisionBlock } from '@/blocks/blocks/vision' | ||
import { WealthboxBlock } from '@/blocks/blocks/wealthbox' | ||
import { WebhookBlock } from '@/blocks/blocks/webhook' | ||
import { WhatsAppBlock } from '@/blocks/blocks/whatsapp' | ||
import { WikipediaBlock } from '@/blocks/blocks/wikipedia' | ||
import { WorkflowBlock } from '@/blocks/blocks/workflow' | ||
import { XBlock } from '@/blocks/blocks/x' | ||
import { YouTubeBlock } from '@/blocks/blocks/youtube' | ||
import type { BlockConfig } from '@/blocks/types' | ||
import { AgentBlock } from "@/blocks/blocks/agent"; | ||
import { AirtableBlock } from "@/blocks/blocks/airtable"; | ||
import { ApiBlock } from "@/blocks/blocks/api"; | ||
import { ArxivBlock } from "@/blocks/blocks/arxiv"; | ||
import { BrowserUseBlock } from "@/blocks/blocks/browser_use"; | ||
import { ClayBlock } from "@/blocks/blocks/clay"; | ||
import { ConditionBlock } from "@/blocks/blocks/condition"; | ||
import { ConfluenceBlock } from "@/blocks/blocks/confluence"; | ||
import { DiscordBlock } from "@/blocks/blocks/discord"; | ||
import { ElevenLabsBlock } from "@/blocks/blocks/elevenlabs"; | ||
import { EvaluatorBlock } from "@/blocks/blocks/evaluator"; | ||
import { ExaBlock } from "@/blocks/blocks/exa"; | ||
import { FileBlock } from "@/blocks/blocks/file"; | ||
import { FirecrawlBlock } from "@/blocks/blocks/firecrawl"; | ||
import { FunctionBlock } from "@/blocks/blocks/function"; | ||
import { GenericWebhookBlock } from "@/blocks/blocks/generic_webhook"; | ||
import { GitHubBlock } from "@/blocks/blocks/github"; | ||
import { GmailBlock } from "@/blocks/blocks/gmail"; | ||
import { GoogleSearchBlock } from "@/blocks/blocks/google"; | ||
import { GoogleCalendarBlock } from "@/blocks/blocks/google_calendar"; | ||
import { GoogleDocsBlock } from "@/blocks/blocks/google_docs"; | ||
import { GoogleDriveBlock } from "@/blocks/blocks/google_drive"; | ||
import { GoogleFormsBlock } from "@/blocks/blocks/google_form"; | ||
import { GoogleSheetsBlock } from "@/blocks/blocks/google_sheets"; | ||
import { HuggingFaceBlock } from "@/blocks/blocks/huggingface"; | ||
import { HunterBlock } from "@/blocks/blocks/hunter"; | ||
import { ImageGeneratorBlock } from "@/blocks/blocks/image_generator"; | ||
import { JinaBlock } from "@/blocks/blocks/jina"; | ||
import { JiraBlock } from "@/blocks/blocks/jira"; | ||
import { KnowledgeBlock } from "@/blocks/blocks/knowledge"; | ||
import { LinearBlock } from "@/blocks/blocks/linear"; | ||
import { LinkupBlock } from "@/blocks/blocks/linkup"; | ||
import { MailBlock } from "@/blocks/blocks/mail"; | ||
import { McpBlock } from "@/blocks/blocks/mcp"; | ||
import { Mem0Block } from "@/blocks/blocks/mem0"; | ||
import { MemoryBlock } from "@/blocks/blocks/memory"; | ||
import { MicrosoftExcelBlock } from "@/blocks/blocks/microsoft_excel"; | ||
import { MicrosoftPlannerBlock } from "@/blocks/blocks/microsoft_planner"; | ||
import { MicrosoftTeamsBlock } from "@/blocks/blocks/microsoft_teams"; | ||
import { MistralParseBlock } from "@/blocks/blocks/mistral_parse"; | ||
import { MongoDBBlock } from "@/blocks/blocks/mongodb"; | ||
import { MySQLBlock } from "@/blocks/blocks/mysql"; | ||
import { NotionBlock } from "@/blocks/blocks/notion"; | ||
import { OneDriveBlock } from "@/blocks/blocks/onedrive"; | ||
import { OpenAIBlock } from "@/blocks/blocks/openai"; | ||
import { OutlookBlock } from "@/blocks/blocks/outlook"; | ||
import { ParallelBlock } from "@/blocks/blocks/parallel"; | ||
import { PerplexityBlock } from "@/blocks/blocks/perplexity"; | ||
import { PineconeBlock } from "@/blocks/blocks/pinecone"; | ||
import { PostgreSQLBlock } from "@/blocks/blocks/postgresql"; | ||
import { QdrantBlock } from "@/blocks/blocks/qdrant"; | ||
import { RedditBlock } from "@/blocks/blocks/reddit"; | ||
import { ResponseBlock } from "@/blocks/blocks/response"; | ||
import { RouterBlock } from "@/blocks/blocks/router"; | ||
import { S3Block } from "@/blocks/blocks/s3"; | ||
import { ScheduleBlock } from "@/blocks/blocks/schedule"; | ||
import { SerperBlock } from "@/blocks/blocks/serper"; | ||
import { SharepointBlock } from "@/blocks/blocks/sharepoint"; | ||
import { SlackBlock } from "@/blocks/blocks/slack"; | ||
import { StagehandBlock } from "@/blocks/blocks/stagehand"; | ||
import { StagehandAgentBlock } from "@/blocks/blocks/stagehand_agent"; | ||
import { StarterBlock } from "@/blocks/blocks/starter"; | ||
import { SupabaseBlock } from "@/blocks/blocks/supabase"; | ||
import { TavilyBlock } from "@/blocks/blocks/tavily"; | ||
import { TelegramBlock } from "@/blocks/blocks/telegram"; | ||
import { ThinkingBlock } from "@/blocks/blocks/thinking"; | ||
import { TranslateBlock } from "@/blocks/blocks/translate"; | ||
import { TwilioSMSBlock } from "@/blocks/blocks/twilio"; | ||
import { TypeformBlock } from "@/blocks/blocks/typeform"; | ||
import { VisionBlock } from "@/blocks/blocks/vision"; | ||
import { WealthboxBlock } from "@/blocks/blocks/wealthbox"; | ||
import { WebhookBlock } from "@/blocks/blocks/webhook"; | ||
import { WhatsAppBlock } from "@/blocks/blocks/whatsapp"; | ||
import { WikipediaBlock } from "@/blocks/blocks/wikipedia"; | ||
import { WorkflowBlock } from "@/blocks/blocks/workflow"; | ||
import { XBlock } from "@/blocks/blocks/x"; | ||
import { YouTubeBlock } from "@/blocks/blocks/youtube"; | ||
import type { BlockConfig } from "@/blocks/types"; | ||
|
||
import { VlmRunInvoiceParserBlock } from "./blocks/vlmrun"; | ||
|
||
// Registry of all available blocks, alphabetically sorted | ||
export const registry: Record<string, BlockConfig> = { | ||
|
@@ -162,15 +164,20 @@ export const registry: Record<string, BlockConfig> = { | |
workflow: WorkflowBlock, | ||
x: XBlock, | ||
youtube: YouTubeBlock, | ||
} | ||
vlmrun_invoice_parser: VlmRunInvoiceParserBlock, | ||
}; | ||
|
||
export const getBlock = (type: string): BlockConfig | undefined => registry[type] | ||
export const getBlock = (type: string): BlockConfig | undefined => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Formatting changes. remove? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah no formatting changes needed |
||
registry[type]; | ||
|
||
export const getBlocksByCategory = (category: 'blocks' | 'tools' | 'triggers'): BlockConfig[] => | ||
Object.values(registry).filter((block) => block.category === category) | ||
export const getBlocksByCategory = ( | ||
category: "blocks" | "tools" | "triggers" | ||
): BlockConfig[] => | ||
Object.values(registry).filter((block) => block.category === category); | ||
|
||
export const getAllBlockTypes = (): string[] => Object.keys(registry) | ||
export const getAllBlockTypes = (): string[] => Object.keys(registry); | ||
|
||
export const isValidBlockType = (type: string): type is string => type in registry | ||
export const isValidBlockType = (type: string): type is string => | ||
type in registry; | ||
|
||
export const getAllBlocks = (): BlockConfig[] => Object.values(registry) | ||
export const getAllBlocks = (): BlockConfig[] => Object.values(registry); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,11 @@ export function UsersIcon(props: SVGProps<SVGSVGElement>) { | |
</svg> | ||
) | ||
} | ||
|
||
export const VlmRunIcon = () => ( | ||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-6 h-6"> | ||
<path d="M12 2L2 12h3v8h14v-8h3L12 2z" /> | ||
</svg> | ||
) | ||
Comment on lines
+24
to
+28
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style: The VlmRunIcon uses a generic house icon. Consider creating a more specific icon that represents invoice parsing or the VLM Run brand. |
||
export function SettingsIcon(props: SVGProps<SVGSVGElement>) { | ||
return ( | ||
<svg | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
"node": ">=20.0.0" | ||
}, | ||
"scripts": { | ||
"dev": "next dev --turbo --port 3000", | ||
"dev": "next dev --port 3000", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please don't disable turbo for dev builds |
||
"dev:classic": "next dev", | ||
"dev:sockets": "bun run socket-server/index.ts", | ||
"dev:full": "concurrently -n \"NextJS,Realtime\" -c \"cyan,magenta\" \"bun run dev\" \"bun run dev:sockets\"", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export { parseInvoiceTool } from './parse_invoice' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import { VlmRun } from 'vlmrun' | ||
import type { ToolConfig } from '@/tools/types' | ||
import type { HttpMethod, VlmRunParams, VlmRunResponse } from './types' | ||
|
||
export const parseInvoiceTool: ToolConfig<VlmRunParams, VlmRunResponse> = { | ||
id: 'vlmrun_parse_invoice', | ||
name: 'Parse Invoice with VLM Run', | ||
description: 'Uploads an invoice file and extracts structured JSON using VLM Run', | ||
version: '1.0.0', | ||
|
||
params: { | ||
apiKey: { | ||
type: 'string', | ||
required: true, | ||
visibility: 'user-only', | ||
description: 'VLM Run API Key', | ||
}, | ||
filePath: { | ||
type: 'string', | ||
required: true, | ||
visibility: 'user-or-llm', | ||
description: 'Path to the invoice file (PDF or image) or URL', | ||
}, | ||
}, | ||
request: { | ||
url: '', | ||
method: 'POST' as HttpMethod, | ||
headers: () => ({ 'Content-Type': 'application/json' }), | ||
body: () => ({}), | ||
}, | ||
// request: { | ||
// url: (params: VlmRunParams) => | ||
// params.filePath.startsWith('http') | ||
// ? 'https://api.vlmrun.com/document/generate' // <-- real API endpoint | ||
// : 'https://api.vlmrun.com/v1/files', | ||
// method: () => 'POST' as HttpMethod, | ||
// headers: (params: VlmRunParams) => ({ | ||
// Authorization: `Bearer ${params.apiKey}`, | ||
// 'Content-Type': 'application/json', | ||
// }), | ||
// body: (params: VlmRunParams) => ({ | ||
// filePath: params.filePath, | ||
// }), | ||
// }, | ||
|
||
transformResponse: async ( | ||
_response: Response, | ||
params?: VlmRunParams | ||
): Promise<VlmRunResponse> => { | ||
if (!params) { | ||
return { | ||
data: null, | ||
success: false, | ||
output: {}, | ||
error: 'Missing parameters', | ||
} | ||
} | ||
|
||
const client = new VlmRun({ apiKey: params.apiKey }) | ||
|
||
try { | ||
let parseResponse | ||
if (params.filePath.startsWith('http://') || params.filePath.startsWith('https://')) { | ||
parseResponse = await client.document.generate({ | ||
url: params.filePath, | ||
model: 'vlm-1', | ||
domain: 'document.invoice', | ||
}) | ||
} else { | ||
const uploadResponse = await client.files.upload({ | ||
filePath: params.filePath, | ||
}) | ||
parseResponse = await client.document.generate({ | ||
fileId: uploadResponse.id, | ||
model: 'vlm-1', | ||
domain: 'document.invoice', | ||
}) | ||
} | ||
|
||
if (parseResponse.status !== 'completed') { | ||
throw new Error(`Parsing failed: status ${parseResponse.status}`) | ||
} | ||
|
||
return { | ||
data: parseResponse.response, | ||
success: true, | ||
output: parseResponse.response, | ||
} | ||
} catch (error) { | ||
return { | ||
data: null, | ||
success: false, | ||
output: {}, | ||
error: `Error processing invoice: ${(error as Error).message}`, | ||
} | ||
} | ||
}, | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We may need to remove these formatting related changes