fix(look-at): resolve multimodal models from vision-capable providers
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
115
src/tools/look-at/multimodal-agent-metadata.test.ts
Normal file
115
src/tools/look-at/multimodal-agent-metadata.test.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
/// <reference types="bun-types" />
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, mock, spyOn, test } from "bun:test"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { resolveMultimodalLookerAgentMetadata } from "./multimodal-agent-metadata"
|
||||
import { setVisionCapableModelsCache, clearVisionCapableModelsCache } from "../../shared/vision-capable-models-cache"
|
||||
import * as connectedProvidersCache from "../../shared/connected-providers-cache"
|
||||
import * as modelAvailability from "../../shared/model-availability"
|
||||
|
||||
function createPluginInput(agentData: Array<Record<string, unknown>>): PluginInput {
|
||||
const client = {} as PluginInput["client"]
|
||||
Object.assign(client, {
|
||||
app: {
|
||||
agents: mock(async () => ({ data: agentData })),
|
||||
},
|
||||
})
|
||||
|
||||
return {
|
||||
client,
|
||||
project: {} as PluginInput["project"],
|
||||
directory: "/project",
|
||||
worktree: "/project",
|
||||
serverUrl: new URL("http://localhost"),
|
||||
$: {} as PluginInput["$"],
|
||||
}
|
||||
}
|
||||
|
||||
describe("resolveMultimodalLookerAgentMetadata", () => {
|
||||
beforeEach(() => {
|
||||
clearVisionCapableModelsCache()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
clearVisionCapableModelsCache()
|
||||
;(modelAvailability.fetchAvailableModels as unknown as { mockRestore?: () => void }).mockRestore?.()
|
||||
;(connectedProvidersCache.readConnectedProvidersCache as unknown as { mockRestore?: () => void }).mockRestore?.()
|
||||
})
|
||||
|
||||
test("returns configured multimodal-looker model when it already matches a vision-capable override", async () => {
|
||||
// given
|
||||
setVisionCapableModelsCache(new Map([
|
||||
[
|
||||
"rundao/public/qwen3.5-397b",
|
||||
{ providerID: "rundao", modelID: "public/qwen3.5-397b" },
|
||||
],
|
||||
]))
|
||||
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["rundao/public/qwen3.5-397b"]),
|
||||
)
|
||||
spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["rundao"])
|
||||
const ctx = createPluginInput([
|
||||
{
|
||||
name: "multimodal-looker",
|
||||
model: { providerID: "rundao", modelID: "public/qwen3.5-397b" },
|
||||
},
|
||||
])
|
||||
|
||||
// when
|
||||
const result = await resolveMultimodalLookerAgentMetadata(ctx)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({
|
||||
agentModel: { providerID: "rundao", modelID: "public/qwen3.5-397b" },
|
||||
agentVariant: undefined,
|
||||
})
|
||||
})
|
||||
|
||||
test("prefers connected vision-capable provider models before the hardcoded fallback chain", async () => {
|
||||
// given
|
||||
setVisionCapableModelsCache(new Map([
|
||||
[
|
||||
"rundao/public/qwen3.5-397b",
|
||||
{ providerID: "rundao", modelID: "public/qwen3.5-397b" },
|
||||
],
|
||||
]))
|
||||
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["openai/gpt-5.4", "rundao/public/qwen3.5-397b"]),
|
||||
)
|
||||
spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai", "rundao"])
|
||||
const ctx = createPluginInput([
|
||||
{
|
||||
name: "multimodal-looker",
|
||||
model: { providerID: "openai", modelID: "gpt-5.4" },
|
||||
variant: "medium",
|
||||
},
|
||||
])
|
||||
|
||||
// when
|
||||
const result = await resolveMultimodalLookerAgentMetadata(ctx)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({
|
||||
agentModel: { providerID: "rundao", modelID: "public/qwen3.5-397b" },
|
||||
agentVariant: undefined,
|
||||
})
|
||||
})
|
||||
|
||||
test("falls back to the hardcoded multimodal chain when no dynamic vision model exists", async () => {
|
||||
// given
|
||||
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["google/gemini-3-flash"]),
|
||||
)
|
||||
spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["google"])
|
||||
const ctx = createPluginInput([])
|
||||
|
||||
// when
|
||||
const result = await resolveMultimodalLookerAgentMetadata(ctx)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({
|
||||
agentModel: { providerID: "google", modelID: "gemini-3-flash" },
|
||||
agentVariant: undefined,
|
||||
})
|
||||
})
|
||||
})
|
||||
@@ -1,6 +1,11 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import { MULTIMODAL_LOOKER_AGENT } from "./constants"
|
||||
import { log } from "../../shared"
|
||||
import { fetchAvailableModels } from "../../shared/model-availability"
|
||||
import { log } from "../../shared/logger"
|
||||
import { readConnectedProvidersCache } from "../../shared/connected-providers-cache"
|
||||
import { resolveModelPipeline } from "../../shared/model-resolution-pipeline"
|
||||
import { readVisionCapableModelsCache } from "../../shared/vision-capable-models-cache"
|
||||
import { buildMultimodalLookerFallbackChain } from "./multimodal-fallback-chain"
|
||||
|
||||
type AgentModel = { providerID: string; modelID: string }
|
||||
|
||||
@@ -19,6 +24,20 @@ function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null
|
||||
}
|
||||
|
||||
function getFullModelKey(model: AgentModel): string {
|
||||
return `${model.providerID}/${model.modelID}`
|
||||
}
|
||||
|
||||
function parseAgentModel(model: string): AgentModel | undefined {
|
||||
const [providerID, ...modelIDParts] = model.split("/")
|
||||
const modelID = modelIDParts.join("/")
|
||||
if (!providerID || modelID.length === 0) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return { providerID, modelID }
|
||||
}
|
||||
|
||||
function toAgentInfo(value: unknown): AgentInfo | null {
|
||||
if (!isObject(value)) return null
|
||||
const name = typeof value["name"] === "string" ? value["name"] : undefined
|
||||
@@ -33,22 +52,83 @@ function toAgentInfo(value: unknown): AgentInfo | null {
|
||||
return { name, model, variant }
|
||||
}
|
||||
|
||||
async function resolveRegisteredAgentMetadata(
|
||||
ctx: PluginInput,
|
||||
): Promise<ResolvedAgentMetadata> {
|
||||
const agentsResult = await ctx.client.app?.agents?.()
|
||||
const agentsRaw = isObject(agentsResult) ? agentsResult["data"] : undefined
|
||||
const agents = Array.isArray(agentsRaw) ? agentsRaw.map(toAgentInfo).filter(Boolean) : []
|
||||
|
||||
const matched = agents.find(
|
||||
(agent) => agent?.name?.toLowerCase() === MULTIMODAL_LOOKER_AGENT.toLowerCase()
|
||||
)
|
||||
|
||||
return {
|
||||
agentModel: matched?.model,
|
||||
agentVariant: matched?.variant,
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveDynamicAgentMetadata(
|
||||
ctx: PluginInput,
|
||||
visionCapableModels = readVisionCapableModelsCache(),
|
||||
): Promise<ResolvedAgentMetadata> {
|
||||
const fallbackChain = buildMultimodalLookerFallbackChain(visionCapableModels)
|
||||
const connectedProviders = readConnectedProvidersCache()
|
||||
const availableModels = await fetchAvailableModels(ctx.client, {
|
||||
connectedProviders,
|
||||
})
|
||||
|
||||
const resolution = resolveModelPipeline({
|
||||
constraints: {
|
||||
availableModels,
|
||||
connectedProviders,
|
||||
},
|
||||
policy: {
|
||||
fallbackChain,
|
||||
},
|
||||
})
|
||||
|
||||
const agentModel = resolution ? parseAgentModel(resolution.model) : undefined
|
||||
return {
|
||||
agentModel,
|
||||
agentVariant: resolution?.variant,
|
||||
}
|
||||
}
|
||||
|
||||
function isConfiguredVisionModel(
|
||||
configuredModel: AgentModel | undefined,
|
||||
dynamicModel: AgentModel | undefined,
|
||||
): boolean {
|
||||
if (!configuredModel || !dynamicModel) {
|
||||
return false
|
||||
}
|
||||
|
||||
return getFullModelKey(configuredModel) === getFullModelKey(dynamicModel)
|
||||
}
|
||||
|
||||
export async function resolveMultimodalLookerAgentMetadata(
|
||||
ctx: PluginInput
|
||||
): Promise<ResolvedAgentMetadata> {
|
||||
try {
|
||||
const agentsResult = await ctx.client.app?.agents?.()
|
||||
const agentsRaw = isObject(agentsResult) ? agentsResult["data"] : undefined
|
||||
const agents = Array.isArray(agentsRaw) ? agentsRaw.map(toAgentInfo).filter(Boolean) : []
|
||||
const registeredMetadata = await resolveRegisteredAgentMetadata(ctx)
|
||||
const visionCapableModels = readVisionCapableModelsCache()
|
||||
|
||||
const matched = agents.find(
|
||||
(agent) => agent?.name?.toLowerCase() === MULTIMODAL_LOOKER_AGENT.toLowerCase()
|
||||
)
|
||||
|
||||
return {
|
||||
agentModel: matched?.model,
|
||||
agentVariant: matched?.variant,
|
||||
if (registeredMetadata.agentModel && visionCapableModels.length === 0) {
|
||||
return registeredMetadata
|
||||
}
|
||||
|
||||
const dynamicMetadata = await resolveDynamicAgentMetadata(ctx, visionCapableModels)
|
||||
|
||||
if (isConfiguredVisionModel(registeredMetadata.agentModel, dynamicMetadata.agentModel)) {
|
||||
return registeredMetadata
|
||||
}
|
||||
|
||||
if (dynamicMetadata.agentModel) {
|
||||
return dynamicMetadata
|
||||
}
|
||||
|
||||
return registeredMetadata
|
||||
} catch (error) {
|
||||
log("[look_at] Failed to resolve multimodal-looker model info", error)
|
||||
return {}
|
||||
|
||||
Reference in New Issue
Block a user