# Playwright + lxml: fetch only the product block and parse it
from playwright.sync_api import sync_playwright
from lxml import html
import pydantic as p
class Product(p.BaseModel):
title: str
price: float
sku: str
with sync_playwright() as pw:
b = pw.chromium.launch(headless=True)
pctx = b.new_context()
page = pctx.new_page()
page.goto("https://example.com/product/123", wait_until="domcontentloaded")
tree = html.fromstring(page.content())
node = tree.xpath("//section[@data-testid='product']")[0]
item = Product(
title=node.xpath(".//h1/text()")[0].strip(),
price=float(node.xpath(".//meta[@itemprop='price']/@content")[0]),
sku=node.xpath(".//*[@data-sku]/@data-sku")[0]
)
print(item.model_dump_json())