Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions multimodal/omni-tars/core/src/AgentComposer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,21 @@ export class AgentComposer {
}
}

/**
* Execute onAfterToolCall hooks for all plugins
*/
async executeOnAfterToolCall(
id: string,
toolCall: { toolCallId: string; name: string },
result: unknown,
): Promise<void> {
for (const plugin of this.plugins) {
if (plugin.onAfterToolCall) {
await plugin.onAfterToolCall(id, toolCall, result);
}
}
}

/**
* Get list of available environments from plugins
*/
Expand Down
9 changes: 9 additions & 0 deletions multimodal/omni-tars/core/src/AgentPlugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,13 @@ export class AgentPlugin {
async onAgentLoopEnd(): Promise<void> {
//logic here
}

/** Hook called after each tool call */
async onAfterToolCall(
id: string,
toolCall: { toolCallId: string; name: string },
result: unknown,
): Promise<void> {
//logic here
}
}
9 changes: 9 additions & 0 deletions multimodal/omni-tars/core/src/ComposableAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,13 @@ export class ComposableAgent extends Agent {
// Call parent implementation to ensure proper agent loop termination
await super.onAgentLoopEnd(id);
}

async onAfterToolCall(
id: string,
toolCall: { toolCallId: string; name: string },
result: unknown,
): Promise<void> {
// Execute hooks for all plugins
await this.composer.executeOnAfterToolCall(id, toolCall, result);
}
}
36 changes: 26 additions & 10 deletions multimodal/omni-tars/gui-agent/src/GuiAgentPlugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { Base64ImageParser } from '@agent-infra/media-utils';
import { getScreenInfo, setScreenInfo } from './shared';
import { OperatorManager } from './OperatorManager';
import { BrowserOperator } from '@gui-agent/operator-browser';
import { log } from 'console';

interface GuiAgentPluginOption {
operatorManager: OperatorManager;
Expand Down Expand Up @@ -77,19 +78,19 @@ export class GuiAgentPlugin extends AgentPlugin {
// async onEachAgentLoopStart(): Promise<void> {
// }

async onEachAgentLoopEnd(): Promise<void> {
const events = this.agent.getEventStream().getEvents();
const lastToolCallIsComputerUse = this.findLastMatch<AgentEventStream.Event>(
events,
(item) => item.type === 'tool_call' && item.name === 'browser_vision_control',
);
if (!lastToolCallIsComputerUse) {
this.agent.logger.info('Last tool not GUI action, skipping screenshot');
// async onEachAgentLoopEnd(): Promise<void> {
// }

async onAfterToolCall(
id: string,
toolCall: { toolCallId: string; name: string },
result: unknown,
): Promise<void> {
if (toolCall.name !== 'browser_vision_control') {
this.agent.logger.info('[GuiAgentPlugin] onAfterToolCall not gui tool, skipping screenshot');
return;
}

this.agent.logger.info('onEachAgentLoopEnd lastToolCall', lastToolCallIsComputerUse);

const operator = await this.operatorManager.getInstance();
const output = await operator?.screenshot();
if (!output) {
Expand All @@ -103,7 +104,18 @@ export class GuiAgentPlugin extends AgentPlugin {
return;
}

this.agent.logger.info(
'[GuiAgentPlugin] onAfterToolCall screenshot base64Uri',
base64Uri.length > 20 ? base64Uri.substring(0, 20) + '...' : base64Uri,
);

const meta = operator instanceof BrowserOperator ? await operator.getMeta() : null;

this.agent.logger.info(
'[GuiAgentPlugin] onAfterToolCall screenshot meta',
JSON.stringify(meta),
);

const content: ChatCompletionContentPart[] = [
{
type: 'image_url',
Expand All @@ -121,6 +133,10 @@ export class GuiAgentPlugin extends AgentPlugin {
}

const eventStream = this.agent.getEventStream();

const events = eventStream.getEvents();
this.agent.logger.info('[GuiAgentPlugin] onAfterToolCall events len:', events.length);

const event = eventStream.createEvent('environment_input', {
description: 'Browser Screenshot',
content,
Expand Down
2 changes: 1 addition & 1 deletion multimodal/tarko/agent/src/agent/event-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ export class AgentEventStreamProcessor implements AgentEventStream.Processor {
*/
sendEvent(event: AgentEventStream.Event): void {
this.events.push(event);
// this.logger.debug(`Event added: ${event.type} (${event.id})`);
this.logger.debug(`Event added: ${event.type} (${event.id})`);

// Notify subscribers
this.subscribers.forEach((callback) => {
Expand Down
Loading