feat(gui): add webview element screenshots
Some checks are pending
Security Scan / security (push) Waiting to run
Test / test (push) Successful in 1m36s

This commit is contained in:
Virgil 2026-04-02 13:55:56 +00:00
parent a4c696ec01
commit 57fb567a68
6 changed files with 256 additions and 14 deletions

View file

@ -345,6 +345,16 @@ func (s *Service) handleWSMessage(msg WSMessage) (any, bool, error) {
return nil, false, e
}
result, handled, err = s.Core().PERFORM(webview.TaskScreenshot{Window: w})
case "webview:screenshot-element":
w, e := wsRequire(msg.Data, "window")
if e != nil {
return nil, false, e
}
sel, e := wsRequire(msg.Data, "selector")
if e != nil {
return nil, false, e
}
result, handled, err = s.Core().PERFORM(webview.TaskScreenshotElement{Window: w, Selector: sel})
case "webview:scroll":
w, e := wsRequire(msg.Data, "window")
if e != nil {
@ -521,6 +531,22 @@ func (s *Service) handleWSMessage(msg WSMessage) (any, bool, error) {
}
sel, _ := msg.Data["selector"].(string) // selector optional for dom-tree (defaults to root)
result, handled, err = s.Core().QUERY(webview.QueryDOMTree{Window: w, Selector: sel})
case "webview:source":
w, e := wsRequire(msg.Data, "window")
if e != nil {
return nil, false, e
}
result, handled, err = s.Core().QUERY(webview.QueryDOMTree{Window: w})
case "webview:element-info":
w, e := wsRequire(msg.Data, "window")
if e != nil {
return nil, false, e
}
sel, e := wsRequire(msg.Data, "selector")
if e != nil {
return nil, false, e
}
result, handled, err = s.Core().QUERY(webview.QuerySelector{Window: w, Selector: sel})
case "webview:url":
w, e := wsRequire(msg.Data, "window")
if e != nil {

View file

@ -110,6 +110,30 @@ func (s *Subsystem) webviewScreenshot(_ context.Context, _ *mcp.CallToolRequest,
return nil, WebviewScreenshotOutput{Base64: sr.Base64, MimeType: sr.MimeType}, nil
}
// --- webview_screenshot_element ---
type WebviewScreenshotElementInput struct {
Window string `json:"window"`
Selector string `json:"selector"`
}
type WebviewScreenshotElementOutput struct {
Base64 string `json:"base64"`
MimeType string `json:"mimeType"`
}
func (s *Subsystem) webviewScreenshotElement(_ context.Context, _ *mcp.CallToolRequest, input WebviewScreenshotElementInput) (*mcp.CallToolResult, WebviewScreenshotElementOutput, error) {
result, _, err := s.core.PERFORM(webview.TaskScreenshotElement{Window: input.Window, Selector: input.Selector})
if err != nil {
return nil, WebviewScreenshotElementOutput{}, err
}
sr, ok := result.(webview.ScreenshotResult)
if !ok {
return nil, WebviewScreenshotElementOutput{}, fmt.Errorf("unexpected result type from webview element screenshot")
}
return nil, WebviewScreenshotElementOutput{Base64: sr.Base64, MimeType: sr.MimeType}, nil
}
// --- webview_scroll ---
type WebviewScrollInput struct {
@ -328,6 +352,12 @@ func (s *Subsystem) webviewQuery(_ context.Context, _ *mcp.CallToolRequest, inpu
return nil, WebviewQueryOutput{Element: el}, nil
}
// --- webview_element_info ---
func (s *Subsystem) webviewElementInfo(_ context.Context, _ *mcp.CallToolRequest, input WebviewQueryInput) (*mcp.CallToolResult, WebviewQueryOutput, error) {
return s.webviewQuery(nil, nil, input)
}
// --- webview_query_all ---
type WebviewQueryAllInput struct {
@ -374,6 +404,12 @@ func (s *Subsystem) webviewDOMTree(_ context.Context, _ *mcp.CallToolRequest, in
return nil, WebviewDOMTreeOutput{HTML: html}, nil
}
// --- webview_source ---
func (s *Subsystem) webviewSource(_ context.Context, _ *mcp.CallToolRequest, input WebviewDOMTreeInput) (*mcp.CallToolResult, WebviewDOMTreeOutput, error) {
return s.webviewDOMTree(nil, nil, input)
}
// --- webview_computed_style ---
type WebviewComputedStyleInput struct {
@ -613,6 +649,7 @@ func (s *Subsystem) registerWebviewTools(server *mcp.Server) {
mcp.AddTool(server, &mcp.Tool{Name: "webview_type", Description: "Type text into an element in a webview"}, s.webviewType)
mcp.AddTool(server, &mcp.Tool{Name: "webview_navigate", Description: "Navigate a webview to a URL"}, s.webviewNavigate)
mcp.AddTool(server, &mcp.Tool{Name: "webview_screenshot", Description: "Capture a webview screenshot as base64 PNG"}, s.webviewScreenshot)
mcp.AddTool(server, &mcp.Tool{Name: "webview_screenshot_element", Description: "Capture a specific element as base64 PNG"}, s.webviewScreenshotElement)
mcp.AddTool(server, &mcp.Tool{Name: "webview_scroll", Description: "Scroll a webview to an absolute position"}, s.webviewScroll)
mcp.AddTool(server, &mcp.Tool{Name: "webview_hover", Description: "Hover over an element in a webview"}, s.webviewHover)
mcp.AddTool(server, &mcp.Tool{Name: "webview_select", Description: "Select an option in a select element"}, s.webviewSelect)
@ -622,8 +659,10 @@ func (s *Subsystem) registerWebviewTools(server *mcp.Server) {
mcp.AddTool(server, &mcp.Tool{Name: "webview_console", Description: "Get captured console messages from a webview"}, s.webviewConsole)
mcp.AddTool(server, &mcp.Tool{Name: "webview_console_clear", Description: "Clear captured console messages"}, s.webviewConsoleClear)
mcp.AddTool(server, &mcp.Tool{Name: "webview_query", Description: "Find a single DOM element by CSS selector"}, s.webviewQuery)
mcp.AddTool(server, &mcp.Tool{Name: "webview_element_info", Description: "Get detailed information about a DOM element"}, s.webviewElementInfo)
mcp.AddTool(server, &mcp.Tool{Name: "webview_query_all", Description: "Find all DOM elements matching a CSS selector"}, s.webviewQueryAll)
mcp.AddTool(server, &mcp.Tool{Name: "webview_dom_tree", Description: "Get HTML content of a webview"}, s.webviewDOMTree)
mcp.AddTool(server, &mcp.Tool{Name: "webview_source", Description: "Get page HTML source"}, s.webviewSource)
mcp.AddTool(server, &mcp.Tool{Name: "webview_computed_style", Description: "Get computed styles for an element"}, s.webviewComputedStyle)
mcp.AddTool(server, &mcp.Tool{Name: "webview_performance", Description: "Get page performance metrics"}, s.webviewPerformance)
mcp.AddTool(server, &mcp.Tool{Name: "webview_resources", Description: "List loaded page resources"}, s.webviewResources)

View file

@ -94,6 +94,12 @@ type TaskScreenshot struct {
Window string `json:"window"`
}
// TaskScreenshotElement captures a specific element as PNG. Result: ScreenshotResult
type TaskScreenshotElement struct {
Window string `json:"window"`
Selector string `json:"selector"`
}
// TaskScroll scrolls to an absolute position (window.scrollTo). Result: nil
type TaskScroll struct {
Window string `json:"window"`

View file

@ -2,10 +2,15 @@
package webview
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"image"
"image/draw"
"image/png"
"math"
"reflect"
"strconv"
"strings"
@ -381,6 +386,19 @@ func (s *Service) handleTask(_ *core.Core, t core.Task) (any, bool, error) {
Base64: base64.StdEncoding.EncodeToString(png),
MimeType: "image/png",
}, true, nil
case TaskScreenshotElement:
conn, err := s.getConn(t.Window)
if err != nil {
return nil, true, err
}
png, err := captureElementScreenshot(conn, t.Selector)
if err != nil {
return nil, true, err
}
return ScreenshotResult{
Base64: base64.StdEncoding.EncodeToString(png),
MimeType: "image/png",
}, true, nil
case TaskScroll:
conn, err := s.getConn(t.Window)
if err != nil {
@ -433,8 +451,26 @@ func (s *Service) handleTask(_ *core.Core, t core.Task) (any, bool, error) {
_, err = conn.Evaluate(highlightScript(t.Selector, t.Colour))
return nil, true, err
case TaskOpenDevTools:
ws, err := core.ServiceFor[*window.Service](s.Core(), "window")
if err != nil {
return nil, true, err
}
pw, ok := ws.Manager().Get(t.Window)
if !ok {
return nil, true, fmt.Errorf("window not found: %s", t.Window)
}
pw.OpenDevTools()
return nil, true, nil
case TaskCloseDevTools:
ws, err := core.ServiceFor[*window.Service](s.Core(), "window")
if err != nil {
return nil, true, err
}
pw, ok := ws.Manager().Get(t.Window)
if !ok {
return nil, true, fmt.Errorf("window not found: %s", t.Window)
}
pw.CloseDevTools()
return nil, true, nil
case TaskInjectNetworkLogging:
conn, err := s.getConn(t.Window)
@ -502,6 +538,91 @@ func coerceToNetworkEntries(v any) ([]NetworkEntry, error) {
return coerceJSON[[]NetworkEntry](v)
}
type elementScreenshotBounds struct {
Left float64 `json:"left"`
Top float64 `json:"top"`
Width float64 `json:"width"`
Height float64 `json:"height"`
DevicePixelRatio float64 `json:"devicePixelRatio"`
}
func elementScreenshotScript(selector string) string {
sel := jsQuote(selector)
return fmt.Sprintf(`(function(){
const el = document.querySelector(%s);
if (!el) return null;
try { el.scrollIntoView({block: "center", inline: "center"}); } catch (e) {}
const rect = el.getBoundingClientRect();
return {
left: rect.left,
top: rect.top,
width: rect.width,
height: rect.height,
devicePixelRatio: window.devicePixelRatio || 1
};
})()`, sel)
}
func captureElementScreenshot(conn connector, selector string) ([]byte, error) {
result, err := conn.Evaluate(elementScreenshotScript(selector))
if err != nil {
return nil, err
}
if result == nil {
return nil, fmt.Errorf("webview: element not found: %s", selector)
}
bounds, err := coerceJSON[elementScreenshotBounds](result)
if err != nil {
return nil, err
}
if bounds.Width <= 0 || bounds.Height <= 0 {
return nil, fmt.Errorf("webview: element has no measurable bounds: %s", selector)
}
raw, err := conn.Screenshot()
if err != nil {
return nil, err
}
img, _, err := image.Decode(bytes.NewReader(raw))
if err != nil {
return nil, err
}
scale := bounds.DevicePixelRatio
if scale <= 0 {
scale = 1
}
left := int(math.Floor(bounds.Left * scale))
top := int(math.Floor(bounds.Top * scale))
right := int(math.Ceil((bounds.Left + bounds.Width) * scale))
bottom := int(math.Ceil((bounds.Top + bounds.Height) * scale))
srcBounds := img.Bounds()
if left < srcBounds.Min.X {
left = srcBounds.Min.X
}
if top < srcBounds.Min.Y {
top = srcBounds.Min.Y
}
if right > srcBounds.Max.X {
right = srcBounds.Max.X
}
if bottom > srcBounds.Max.Y {
bottom = srcBounds.Max.Y
}
if right <= left || bottom <= top {
return nil, fmt.Errorf("webview: element is outside the captured screenshot: %s", selector)
}
crop := image.NewRGBA(image.Rect(0, 0, right-left, bottom-top))
draw.Draw(crop, crop.Bounds(), img, image.Point{X: left, Y: top}, draw.Src)
var buf bytes.Buffer
if err := png.Encode(&buf, crop); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// realConnector wraps *gowebview.Webview, converting types at the boundary.
type realConnector struct {
wv *gowebview.Webview

View file

@ -2,7 +2,12 @@
package webview
import (
"bytes"
"context"
"encoding/base64"
"image"
"image/color"
"image/png"
"strings"
"testing"
@ -107,7 +112,11 @@ func (m *mockConnector) GetConsole() []ConsoleMessage { return m.console }
func newTestService(t *testing.T, mock *mockConnector) (*Service, *core.Core) {
t.Helper()
factory := Register()
c, err := core.New(core.WithService(factory), core.WithServiceLock())
c, err := core.New(
core.WithService(window.Register(window.NewMockPlatform())),
core.WithService(factory),
core.WithServiceLock(),
)
require.NoError(t, err)
require.NoError(t, c.ServiceStartup(context.Background(), nil))
svc := core.MustServiceFor[*Service](c, "webview")
@ -203,6 +212,43 @@ func TestTaskScreenshot_Good(t *testing.T) {
assert.NotEmpty(t, sr.Base64)
}
func TestTaskScreenshotElement_Good(t *testing.T) {
img := image.NewRGBA(image.Rect(0, 0, 4, 4))
for y := 0; y < 4; y++ {
for x := 0; x < 4; x++ {
img.SetRGBA(x, y, color.RGBA{R: uint8(x * 40), G: uint8(y * 40), B: 200, A: 255})
}
}
var buf bytes.Buffer
require.NoError(t, png.Encode(&buf, img))
mock := &mockConnector{
screenshot: buf.Bytes(),
evalFn: func(script string) (any, error) {
return map[string]any{
"left": 1.0,
"top": 1.0,
"width": 2.0,
"height": 2.0,
"devicePixelRatio": 1.0,
}, nil
},
}
_, c := newTestService(t, mock)
result, handled, err := c.PERFORM(TaskScreenshotElement{Window: "main", Selector: "#card"})
require.NoError(t, err)
assert.True(t, handled)
sr, ok := result.(ScreenshotResult)
require.True(t, ok)
raw, err := base64.StdEncoding.DecodeString(sr.Base64)
require.NoError(t, err)
decoded, err := png.Decode(bytes.NewReader(raw))
require.NoError(t, err)
assert.Equal(t, image.Rect(0, 0, 2, 2), decoded.Bounds())
}
func TestTaskClearConsole_Good(t *testing.T) {
mock := &mockConnector{}
_, c := newTestService(t, mock)
@ -214,6 +260,8 @@ func TestTaskClearConsole_Good(t *testing.T) {
func TestTaskDevTools_Good(t *testing.T) {
_, c := newTestService(t, &mockConnector{})
_, _, err := c.PERFORM(window.TaskOpenWindow{Opts: []window.WindowOption{window.WithName("main")}})
require.NoError(t, err)
_, handled, err := c.PERFORM(TaskOpenDevTools{Window: "main"})
require.NoError(t, err)
assert.True(t, handled)

View file

@ -9,19 +9,19 @@ type Platform interface {
// PlatformWindowOptions are the backend-specific options passed to CreateWindow.
type PlatformWindowOptions struct {
Name string
Title string
URL string
Width, Height int
X, Y int
MinWidth, MinHeight int
MaxWidth, MaxHeight int
Frameless bool
Hidden bool
AlwaysOnTop bool
BackgroundColour [4]uint8 // RGBA
DisableResize bool
EnableFileDrop bool
Name string
Title string
URL string
Width, Height int
X, Y int
MinWidth, MinHeight int
MaxWidth, MaxHeight int
Frameless bool
Hidden bool
AlwaysOnTop bool
BackgroundColour [4]uint8 // RGBA
DisableResize bool
EnableFileDrop bool
}
// PlatformWindow is a live window handle from the backend.
@ -54,6 +54,8 @@ type PlatformWindow interface {
Hide()
Fullscreen()
UnFullscreen()
OpenDevTools()
CloseDevTools()
// Events
OnWindowEvent(handler func(event WindowEvent))