fix(look-at): resolve multimodal models from vision-capable providers

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
YeonGyu-Kim
2026-03-08 02:20:48 +09:00
parent 8b0ca63bbb
commit 5d31bf46fa
2 changed files with 206 additions and 11 deletions

View File

@@ -0,0 +1,115 @@
/// <reference types="bun-types" />
import { afterEach, beforeEach, describe, expect, mock, spyOn, test } from "bun:test"
import type { PluginInput } from "@opencode-ai/plugin"
import { resolveMultimodalLookerAgentMetadata } from "./multimodal-agent-metadata"
import { setVisionCapableModelsCache, clearVisionCapableModelsCache } from "../../shared/vision-capable-models-cache"
import * as connectedProvidersCache from "../../shared/connected-providers-cache"
import * as modelAvailability from "../../shared/model-availability"
function createPluginInput(agentData: Array<Record<string, unknown>>): PluginInput {
const client = {} as PluginInput["client"]
Object.assign(client, {
app: {
agents: mock(async () => ({ data: agentData })),
},
})
return {
client,
project: {} as PluginInput["project"],
directory: "/project",
worktree: "/project",
serverUrl: new URL("http://localhost"),
$: {} as PluginInput["$"],
}
}
describe("resolveMultimodalLookerAgentMetadata", () => {
beforeEach(() => {
clearVisionCapableModelsCache()
})
afterEach(() => {
clearVisionCapableModelsCache()
;(modelAvailability.fetchAvailableModels as unknown as { mockRestore?: () => void }).mockRestore?.()
;(connectedProvidersCache.readConnectedProvidersCache as unknown as { mockRestore?: () => void }).mockRestore?.()
})
test("returns configured multimodal-looker model when it already matches a vision-capable override", async () => {
// given
setVisionCapableModelsCache(new Map([
[
"rundao/public/qwen3.5-397b",
{ providerID: "rundao", modelID: "public/qwen3.5-397b" },
],
]))
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
new Set(["rundao/public/qwen3.5-397b"]),
)
spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["rundao"])
const ctx = createPluginInput([
{
name: "multimodal-looker",
model: { providerID: "rundao", modelID: "public/qwen3.5-397b" },
},
])
// when
const result = await resolveMultimodalLookerAgentMetadata(ctx)
// then
expect(result).toEqual({
agentModel: { providerID: "rundao", modelID: "public/qwen3.5-397b" },
agentVariant: undefined,
})
})
test("prefers connected vision-capable provider models before the hardcoded fallback chain", async () => {
// given
setVisionCapableModelsCache(new Map([
[
"rundao/public/qwen3.5-397b",
{ providerID: "rundao", modelID: "public/qwen3.5-397b" },
],
]))
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
new Set(["openai/gpt-5.4", "rundao/public/qwen3.5-397b"]),
)
spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai", "rundao"])
const ctx = createPluginInput([
{
name: "multimodal-looker",
model: { providerID: "openai", modelID: "gpt-5.4" },
variant: "medium",
},
])
// when
const result = await resolveMultimodalLookerAgentMetadata(ctx)
// then
expect(result).toEqual({
agentModel: { providerID: "rundao", modelID: "public/qwen3.5-397b" },
agentVariant: undefined,
})
})
test("falls back to the hardcoded multimodal chain when no dynamic vision model exists", async () => {
// given
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
new Set(["google/gemini-3-flash"]),
)
spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["google"])
const ctx = createPluginInput([])
// when
const result = await resolveMultimodalLookerAgentMetadata(ctx)
// then
expect(result).toEqual({
agentModel: { providerID: "google", modelID: "gemini-3-flash" },
agentVariant: undefined,
})
})
})

View File

@@ -1,6 +1,11 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { MULTIMODAL_LOOKER_AGENT } from "./constants"
import { log } from "../../shared"
import { fetchAvailableModels } from "../../shared/model-availability"
import { log } from "../../shared/logger"
import { readConnectedProvidersCache } from "../../shared/connected-providers-cache"
import { resolveModelPipeline } from "../../shared/model-resolution-pipeline"
import { readVisionCapableModelsCache } from "../../shared/vision-capable-models-cache"
import { buildMultimodalLookerFallbackChain } from "./multimodal-fallback-chain"
type AgentModel = { providerID: string; modelID: string }
@@ -19,6 +24,20 @@ function isObject(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null
}
function getFullModelKey(model: AgentModel): string {
return `${model.providerID}/${model.modelID}`
}
function parseAgentModel(model: string): AgentModel | undefined {
const [providerID, ...modelIDParts] = model.split("/")
const modelID = modelIDParts.join("/")
if (!providerID || modelID.length === 0) {
return undefined
}
return { providerID, modelID }
}
function toAgentInfo(value: unknown): AgentInfo | null {
if (!isObject(value)) return null
const name = typeof value["name"] === "string" ? value["name"] : undefined
@@ -33,22 +52,83 @@ function toAgentInfo(value: unknown): AgentInfo | null {
return { name, model, variant }
}
async function resolveRegisteredAgentMetadata(
ctx: PluginInput,
): Promise<ResolvedAgentMetadata> {
const agentsResult = await ctx.client.app?.agents?.()
const agentsRaw = isObject(agentsResult) ? agentsResult["data"] : undefined
const agents = Array.isArray(agentsRaw) ? agentsRaw.map(toAgentInfo).filter(Boolean) : []
const matched = agents.find(
(agent) => agent?.name?.toLowerCase() === MULTIMODAL_LOOKER_AGENT.toLowerCase()
)
return {
agentModel: matched?.model,
agentVariant: matched?.variant,
}
}
async function resolveDynamicAgentMetadata(
ctx: PluginInput,
visionCapableModels = readVisionCapableModelsCache(),
): Promise<ResolvedAgentMetadata> {
const fallbackChain = buildMultimodalLookerFallbackChain(visionCapableModels)
const connectedProviders = readConnectedProvidersCache()
const availableModels = await fetchAvailableModels(ctx.client, {
connectedProviders,
})
const resolution = resolveModelPipeline({
constraints: {
availableModels,
connectedProviders,
},
policy: {
fallbackChain,
},
})
const agentModel = resolution ? parseAgentModel(resolution.model) : undefined
return {
agentModel,
agentVariant: resolution?.variant,
}
}
function isConfiguredVisionModel(
configuredModel: AgentModel | undefined,
dynamicModel: AgentModel | undefined,
): boolean {
if (!configuredModel || !dynamicModel) {
return false
}
return getFullModelKey(configuredModel) === getFullModelKey(dynamicModel)
}
export async function resolveMultimodalLookerAgentMetadata(
ctx: PluginInput
): Promise<ResolvedAgentMetadata> {
try {
const agentsResult = await ctx.client.app?.agents?.()
const agentsRaw = isObject(agentsResult) ? agentsResult["data"] : undefined
const agents = Array.isArray(agentsRaw) ? agentsRaw.map(toAgentInfo).filter(Boolean) : []
const registeredMetadata = await resolveRegisteredAgentMetadata(ctx)
const visionCapableModels = readVisionCapableModelsCache()
const matched = agents.find(
(agent) => agent?.name?.toLowerCase() === MULTIMODAL_LOOKER_AGENT.toLowerCase()
)
return {
agentModel: matched?.model,
agentVariant: matched?.variant,
if (registeredMetadata.agentModel && visionCapableModels.length === 0) {
return registeredMetadata
}
const dynamicMetadata = await resolveDynamicAgentMetadata(ctx, visionCapableModels)
if (isConfiguredVisionModel(registeredMetadata.agentModel, dynamicMetadata.agentModel)) {
return registeredMetadata
}
if (dynamicMetadata.agentModel) {
return dynamicMetadata
}
return registeredMetadata
} catch (error) {
log("[look_at] Failed to resolve multimodal-looker model info", error)
return {}