An HTML Validator
Outline
- Represent HTML as a
Nodeinductive:Text,Element(tag + attrs + children) - Visitor pattern:
Visitor αis a record of callbacks invoked aswalkenters and leaves each node;walkhandles the recursion - Validator 1: check that
<img>elements have analtattribute - Validator 2: check that
<a>elements have anhrefattribute - Both validators are built with the same
walkinfrastructure; adding a new check is just a new function, not a change towalk
Code
i
-- HTML node tree
abbrev Attrs := List (String × String)
inductive Node where
| Text : String → Node
| Element : String → Attrs → List Node → Node
deriving Repr
-- Visitor record: callbacks invoked on enter and leave
structure Visitor (α : Type) where
onEnter : String → Attrs → α → α
onLeave : String → α → α
onText : String → α → α
-- Walk the tree, threading accumulator through callbacks
def walk {α} (v : Visitor α) (acc : α) : Node → α
| Node.Text t => v.onText t acc
| Node.Element tag attrs children =>
let acc' := v.onEnter tag attrs acc
let acc'' := children.foldl (walk v) acc'
v.onLeave tag acc''
-- Helper: find attribute value
def getAttr (attrs : Attrs) (name : String) : Option String :=
(attrs.find? (·.1 == name)).map (·.2)
-- Validator: collect error messages
abbrev Errors := List String
def noopVisitor : Visitor Errors :=
{ onEnter := fun _ _ e => e, onLeave := fun _ e => e, onText := fun _ e => e }
-- Check <img> elements have alt attribute
def imgAltVisitor : Visitor Errors :=
{ noopVisitor with
onEnter := fun tag attrs errs =>
if tag == "img" && getAttr attrs "alt" == none
then errs ++ ["img missing alt"]
else errs }
-- Check <a> elements have href attribute
def linkHrefVisitor : Visitor Errors :=
{ noopVisitor with
onEnter := fun tag attrs errs =>
if tag == "a" && getAttr attrs "href" == none
then errs ++ ["a missing href"]
else errs }
-- Compose validators by running both over the tree
def validate (doc : Node) : Errors :=
walk imgAltVisitor [] doc ++ walk linkHrefVisitor [] doc
-- Tests ----------------------------------------------------------------
def goodDoc : Node :=
Node.Element "html" [] [
Node.Element "body" [] [
Node.Element "img" [("src", "pic.jpg"), ("alt", "a picture")] [],
Node.Element "a" [("href", "https://example.com")] [Node.Text "click"]
]
]
def badDoc : Node :=
Node.Element "html" [] [
Node.Element "body" [] [
Node.Element "img" [("src", "pic.jpg")] [], -- missing alt
Node.Element "a" [] [Node.Text "click"] -- missing href
]
]
#guard validate goodDoc == []
#guard validate badDoc == ["img missing alt", "a missing href"]