feat(gui): add webview element screenshots
This commit is contained in:
parent
a4c696ec01
commit
57fb567a68
6 changed files with 256 additions and 14 deletions
|
|
@ -345,6 +345,16 @@ func (s *Service) handleWSMessage(msg WSMessage) (any, bool, error) {
|
|||
return nil, false, e
|
||||
}
|
||||
result, handled, err = s.Core().PERFORM(webview.TaskScreenshot{Window: w})
|
||||
case "webview:screenshot-element":
|
||||
w, e := wsRequire(msg.Data, "window")
|
||||
if e != nil {
|
||||
return nil, false, e
|
||||
}
|
||||
sel, e := wsRequire(msg.Data, "selector")
|
||||
if e != nil {
|
||||
return nil, false, e
|
||||
}
|
||||
result, handled, err = s.Core().PERFORM(webview.TaskScreenshotElement{Window: w, Selector: sel})
|
||||
case "webview:scroll":
|
||||
w, e := wsRequire(msg.Data, "window")
|
||||
if e != nil {
|
||||
|
|
@ -521,6 +531,22 @@ func (s *Service) handleWSMessage(msg WSMessage) (any, bool, error) {
|
|||
}
|
||||
sel, _ := msg.Data["selector"].(string) // selector optional for dom-tree (defaults to root)
|
||||
result, handled, err = s.Core().QUERY(webview.QueryDOMTree{Window: w, Selector: sel})
|
||||
case "webview:source":
|
||||
w, e := wsRequire(msg.Data, "window")
|
||||
if e != nil {
|
||||
return nil, false, e
|
||||
}
|
||||
result, handled, err = s.Core().QUERY(webview.QueryDOMTree{Window: w})
|
||||
case "webview:element-info":
|
||||
w, e := wsRequire(msg.Data, "window")
|
||||
if e != nil {
|
||||
return nil, false, e
|
||||
}
|
||||
sel, e := wsRequire(msg.Data, "selector")
|
||||
if e != nil {
|
||||
return nil, false, e
|
||||
}
|
||||
result, handled, err = s.Core().QUERY(webview.QuerySelector{Window: w, Selector: sel})
|
||||
case "webview:url":
|
||||
w, e := wsRequire(msg.Data, "window")
|
||||
if e != nil {
|
||||
|
|
|
|||
|
|
@ -110,6 +110,30 @@ func (s *Subsystem) webviewScreenshot(_ context.Context, _ *mcp.CallToolRequest,
|
|||
return nil, WebviewScreenshotOutput{Base64: sr.Base64, MimeType: sr.MimeType}, nil
|
||||
}
|
||||
|
||||
// --- webview_screenshot_element ---
|
||||
|
||||
type WebviewScreenshotElementInput struct {
|
||||
Window string `json:"window"`
|
||||
Selector string `json:"selector"`
|
||||
}
|
||||
|
||||
type WebviewScreenshotElementOutput struct {
|
||||
Base64 string `json:"base64"`
|
||||
MimeType string `json:"mimeType"`
|
||||
}
|
||||
|
||||
func (s *Subsystem) webviewScreenshotElement(_ context.Context, _ *mcp.CallToolRequest, input WebviewScreenshotElementInput) (*mcp.CallToolResult, WebviewScreenshotElementOutput, error) {
|
||||
result, _, err := s.core.PERFORM(webview.TaskScreenshotElement{Window: input.Window, Selector: input.Selector})
|
||||
if err != nil {
|
||||
return nil, WebviewScreenshotElementOutput{}, err
|
||||
}
|
||||
sr, ok := result.(webview.ScreenshotResult)
|
||||
if !ok {
|
||||
return nil, WebviewScreenshotElementOutput{}, fmt.Errorf("unexpected result type from webview element screenshot")
|
||||
}
|
||||
return nil, WebviewScreenshotElementOutput{Base64: sr.Base64, MimeType: sr.MimeType}, nil
|
||||
}
|
||||
|
||||
// --- webview_scroll ---
|
||||
|
||||
type WebviewScrollInput struct {
|
||||
|
|
@ -328,6 +352,12 @@ func (s *Subsystem) webviewQuery(_ context.Context, _ *mcp.CallToolRequest, inpu
|
|||
return nil, WebviewQueryOutput{Element: el}, nil
|
||||
}
|
||||
|
||||
// --- webview_element_info ---
|
||||
|
||||
func (s *Subsystem) webviewElementInfo(_ context.Context, _ *mcp.CallToolRequest, input WebviewQueryInput) (*mcp.CallToolResult, WebviewQueryOutput, error) {
|
||||
return s.webviewQuery(nil, nil, input)
|
||||
}
|
||||
|
||||
// --- webview_query_all ---
|
||||
|
||||
type WebviewQueryAllInput struct {
|
||||
|
|
@ -374,6 +404,12 @@ func (s *Subsystem) webviewDOMTree(_ context.Context, _ *mcp.CallToolRequest, in
|
|||
return nil, WebviewDOMTreeOutput{HTML: html}, nil
|
||||
}
|
||||
|
||||
// --- webview_source ---
|
||||
|
||||
func (s *Subsystem) webviewSource(_ context.Context, _ *mcp.CallToolRequest, input WebviewDOMTreeInput) (*mcp.CallToolResult, WebviewDOMTreeOutput, error) {
|
||||
return s.webviewDOMTree(nil, nil, input)
|
||||
}
|
||||
|
||||
// --- webview_computed_style ---
|
||||
|
||||
type WebviewComputedStyleInput struct {
|
||||
|
|
@ -613,6 +649,7 @@ func (s *Subsystem) registerWebviewTools(server *mcp.Server) {
|
|||
mcp.AddTool(server, &mcp.Tool{Name: "webview_type", Description: "Type text into an element in a webview"}, s.webviewType)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_navigate", Description: "Navigate a webview to a URL"}, s.webviewNavigate)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_screenshot", Description: "Capture a webview screenshot as base64 PNG"}, s.webviewScreenshot)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_screenshot_element", Description: "Capture a specific element as base64 PNG"}, s.webviewScreenshotElement)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_scroll", Description: "Scroll a webview to an absolute position"}, s.webviewScroll)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_hover", Description: "Hover over an element in a webview"}, s.webviewHover)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_select", Description: "Select an option in a select element"}, s.webviewSelect)
|
||||
|
|
@ -622,8 +659,10 @@ func (s *Subsystem) registerWebviewTools(server *mcp.Server) {
|
|||
mcp.AddTool(server, &mcp.Tool{Name: "webview_console", Description: "Get captured console messages from a webview"}, s.webviewConsole)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_console_clear", Description: "Clear captured console messages"}, s.webviewConsoleClear)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_query", Description: "Find a single DOM element by CSS selector"}, s.webviewQuery)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_element_info", Description: "Get detailed information about a DOM element"}, s.webviewElementInfo)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_query_all", Description: "Find all DOM elements matching a CSS selector"}, s.webviewQueryAll)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_dom_tree", Description: "Get HTML content of a webview"}, s.webviewDOMTree)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_source", Description: "Get page HTML source"}, s.webviewSource)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_computed_style", Description: "Get computed styles for an element"}, s.webviewComputedStyle)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_performance", Description: "Get page performance metrics"}, s.webviewPerformance)
|
||||
mcp.AddTool(server, &mcp.Tool{Name: "webview_resources", Description: "List loaded page resources"}, s.webviewResources)
|
||||
|
|
|
|||
|
|
@ -94,6 +94,12 @@ type TaskScreenshot struct {
|
|||
Window string `json:"window"`
|
||||
}
|
||||
|
||||
// TaskScreenshotElement captures a specific element as PNG. Result: ScreenshotResult
|
||||
type TaskScreenshotElement struct {
|
||||
Window string `json:"window"`
|
||||
Selector string `json:"selector"`
|
||||
}
|
||||
|
||||
// TaskScroll scrolls to an absolute position (window.scrollTo). Result: nil
|
||||
type TaskScroll struct {
|
||||
Window string `json:"window"`
|
||||
|
|
|
|||
|
|
@ -2,10 +2,15 @@
|
|||
package webview
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/draw"
|
||||
"image/png"
|
||||
"math"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
|
@ -381,6 +386,19 @@ func (s *Service) handleTask(_ *core.Core, t core.Task) (any, bool, error) {
|
|||
Base64: base64.StdEncoding.EncodeToString(png),
|
||||
MimeType: "image/png",
|
||||
}, true, nil
|
||||
case TaskScreenshotElement:
|
||||
conn, err := s.getConn(t.Window)
|
||||
if err != nil {
|
||||
return nil, true, err
|
||||
}
|
||||
png, err := captureElementScreenshot(conn, t.Selector)
|
||||
if err != nil {
|
||||
return nil, true, err
|
||||
}
|
||||
return ScreenshotResult{
|
||||
Base64: base64.StdEncoding.EncodeToString(png),
|
||||
MimeType: "image/png",
|
||||
}, true, nil
|
||||
case TaskScroll:
|
||||
conn, err := s.getConn(t.Window)
|
||||
if err != nil {
|
||||
|
|
@ -433,8 +451,26 @@ func (s *Service) handleTask(_ *core.Core, t core.Task) (any, bool, error) {
|
|||
_, err = conn.Evaluate(highlightScript(t.Selector, t.Colour))
|
||||
return nil, true, err
|
||||
case TaskOpenDevTools:
|
||||
ws, err := core.ServiceFor[*window.Service](s.Core(), "window")
|
||||
if err != nil {
|
||||
return nil, true, err
|
||||
}
|
||||
pw, ok := ws.Manager().Get(t.Window)
|
||||
if !ok {
|
||||
return nil, true, fmt.Errorf("window not found: %s", t.Window)
|
||||
}
|
||||
pw.OpenDevTools()
|
||||
return nil, true, nil
|
||||
case TaskCloseDevTools:
|
||||
ws, err := core.ServiceFor[*window.Service](s.Core(), "window")
|
||||
if err != nil {
|
||||
return nil, true, err
|
||||
}
|
||||
pw, ok := ws.Manager().Get(t.Window)
|
||||
if !ok {
|
||||
return nil, true, fmt.Errorf("window not found: %s", t.Window)
|
||||
}
|
||||
pw.CloseDevTools()
|
||||
return nil, true, nil
|
||||
case TaskInjectNetworkLogging:
|
||||
conn, err := s.getConn(t.Window)
|
||||
|
|
@ -502,6 +538,91 @@ func coerceToNetworkEntries(v any) ([]NetworkEntry, error) {
|
|||
return coerceJSON[[]NetworkEntry](v)
|
||||
}
|
||||
|
||||
type elementScreenshotBounds struct {
|
||||
Left float64 `json:"left"`
|
||||
Top float64 `json:"top"`
|
||||
Width float64 `json:"width"`
|
||||
Height float64 `json:"height"`
|
||||
DevicePixelRatio float64 `json:"devicePixelRatio"`
|
||||
}
|
||||
|
||||
func elementScreenshotScript(selector string) string {
|
||||
sel := jsQuote(selector)
|
||||
return fmt.Sprintf(`(function(){
|
||||
const el = document.querySelector(%s);
|
||||
if (!el) return null;
|
||||
try { el.scrollIntoView({block: "center", inline: "center"}); } catch (e) {}
|
||||
const rect = el.getBoundingClientRect();
|
||||
return {
|
||||
left: rect.left,
|
||||
top: rect.top,
|
||||
width: rect.width,
|
||||
height: rect.height,
|
||||
devicePixelRatio: window.devicePixelRatio || 1
|
||||
};
|
||||
})()`, sel)
|
||||
}
|
||||
|
||||
func captureElementScreenshot(conn connector, selector string) ([]byte, error) {
|
||||
result, err := conn.Evaluate(elementScreenshotScript(selector))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if result == nil {
|
||||
return nil, fmt.Errorf("webview: element not found: %s", selector)
|
||||
}
|
||||
bounds, err := coerceJSON[elementScreenshotBounds](result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if bounds.Width <= 0 || bounds.Height <= 0 {
|
||||
return nil, fmt.Errorf("webview: element has no measurable bounds: %s", selector)
|
||||
}
|
||||
raw, err := conn.Screenshot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
img, _, err := image.Decode(bytes.NewReader(raw))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
scale := bounds.DevicePixelRatio
|
||||
if scale <= 0 {
|
||||
scale = 1
|
||||
}
|
||||
left := int(math.Floor(bounds.Left * scale))
|
||||
top := int(math.Floor(bounds.Top * scale))
|
||||
right := int(math.Ceil((bounds.Left + bounds.Width) * scale))
|
||||
bottom := int(math.Ceil((bounds.Top + bounds.Height) * scale))
|
||||
|
||||
srcBounds := img.Bounds()
|
||||
if left < srcBounds.Min.X {
|
||||
left = srcBounds.Min.X
|
||||
}
|
||||
if top < srcBounds.Min.Y {
|
||||
top = srcBounds.Min.Y
|
||||
}
|
||||
if right > srcBounds.Max.X {
|
||||
right = srcBounds.Max.X
|
||||
}
|
||||
if bottom > srcBounds.Max.Y {
|
||||
bottom = srcBounds.Max.Y
|
||||
}
|
||||
if right <= left || bottom <= top {
|
||||
return nil, fmt.Errorf("webview: element is outside the captured screenshot: %s", selector)
|
||||
}
|
||||
|
||||
crop := image.NewRGBA(image.Rect(0, 0, right-left, bottom-top))
|
||||
draw.Draw(crop, crop.Bounds(), img, image.Point{X: left, Y: top}, draw.Src)
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := png.Encode(&buf, crop); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
// realConnector wraps *gowebview.Webview, converting types at the boundary.
|
||||
type realConnector struct {
|
||||
wv *gowebview.Webview
|
||||
|
|
|
|||
|
|
@ -2,7 +2,12 @@
|
|||
package webview
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/png"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
|
|
@ -107,7 +112,11 @@ func (m *mockConnector) GetConsole() []ConsoleMessage { return m.console }
|
|||
func newTestService(t *testing.T, mock *mockConnector) (*Service, *core.Core) {
|
||||
t.Helper()
|
||||
factory := Register()
|
||||
c, err := core.New(core.WithService(factory), core.WithServiceLock())
|
||||
c, err := core.New(
|
||||
core.WithService(window.Register(window.NewMockPlatform())),
|
||||
core.WithService(factory),
|
||||
core.WithServiceLock(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, c.ServiceStartup(context.Background(), nil))
|
||||
svc := core.MustServiceFor[*Service](c, "webview")
|
||||
|
|
@ -203,6 +212,43 @@ func TestTaskScreenshot_Good(t *testing.T) {
|
|||
assert.NotEmpty(t, sr.Base64)
|
||||
}
|
||||
|
||||
func TestTaskScreenshotElement_Good(t *testing.T) {
|
||||
img := image.NewRGBA(image.Rect(0, 0, 4, 4))
|
||||
for y := 0; y < 4; y++ {
|
||||
for x := 0; x < 4; x++ {
|
||||
img.SetRGBA(x, y, color.RGBA{R: uint8(x * 40), G: uint8(y * 40), B: 200, A: 255})
|
||||
}
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, png.Encode(&buf, img))
|
||||
|
||||
mock := &mockConnector{
|
||||
screenshot: buf.Bytes(),
|
||||
evalFn: func(script string) (any, error) {
|
||||
return map[string]any{
|
||||
"left": 1.0,
|
||||
"top": 1.0,
|
||||
"width": 2.0,
|
||||
"height": 2.0,
|
||||
"devicePixelRatio": 1.0,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
_, c := newTestService(t, mock)
|
||||
|
||||
result, handled, err := c.PERFORM(TaskScreenshotElement{Window: "main", Selector: "#card"})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, handled)
|
||||
sr, ok := result.(ScreenshotResult)
|
||||
require.True(t, ok)
|
||||
|
||||
raw, err := base64.StdEncoding.DecodeString(sr.Base64)
|
||||
require.NoError(t, err)
|
||||
decoded, err := png.Decode(bytes.NewReader(raw))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, image.Rect(0, 0, 2, 2), decoded.Bounds())
|
||||
}
|
||||
|
||||
func TestTaskClearConsole_Good(t *testing.T) {
|
||||
mock := &mockConnector{}
|
||||
_, c := newTestService(t, mock)
|
||||
|
|
@ -214,6 +260,8 @@ func TestTaskClearConsole_Good(t *testing.T) {
|
|||
|
||||
func TestTaskDevTools_Good(t *testing.T) {
|
||||
_, c := newTestService(t, &mockConnector{})
|
||||
_, _, err := c.PERFORM(window.TaskOpenWindow{Opts: []window.WindowOption{window.WithName("main")}})
|
||||
require.NoError(t, err)
|
||||
_, handled, err := c.PERFORM(TaskOpenDevTools{Window: "main"})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, handled)
|
||||
|
|
|
|||
|
|
@ -9,19 +9,19 @@ type Platform interface {
|
|||
|
||||
// PlatformWindowOptions are the backend-specific options passed to CreateWindow.
|
||||
type PlatformWindowOptions struct {
|
||||
Name string
|
||||
Title string
|
||||
URL string
|
||||
Width, Height int
|
||||
X, Y int
|
||||
MinWidth, MinHeight int
|
||||
MaxWidth, MaxHeight int
|
||||
Frameless bool
|
||||
Hidden bool
|
||||
AlwaysOnTop bool
|
||||
BackgroundColour [4]uint8 // RGBA
|
||||
DisableResize bool
|
||||
EnableFileDrop bool
|
||||
Name string
|
||||
Title string
|
||||
URL string
|
||||
Width, Height int
|
||||
X, Y int
|
||||
MinWidth, MinHeight int
|
||||
MaxWidth, MaxHeight int
|
||||
Frameless bool
|
||||
Hidden bool
|
||||
AlwaysOnTop bool
|
||||
BackgroundColour [4]uint8 // RGBA
|
||||
DisableResize bool
|
||||
EnableFileDrop bool
|
||||
}
|
||||
|
||||
// PlatformWindow is a live window handle from the backend.
|
||||
|
|
@ -54,6 +54,8 @@ type PlatformWindow interface {
|
|||
Hide()
|
||||
Fullscreen()
|
||||
UnFullscreen()
|
||||
OpenDevTools()
|
||||
CloseDevTools()
|
||||
|
||||
// Events
|
||||
OnWindowEvent(handler func(event WindowEvent))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue