ohm-grammar-solace/stwl.ohm
2025-06-10 18:20:08 +02:00

424 lines
18 KiB
Text

STWL {
Program = &(Directive*) SourceElement*
/*
STWL - Strongly Typed Web Language is just a working title
This grammar is based on the ES5 and ES6 grammars and extends/cuts it where
necessary to clear STWL from many, in my opinion unnecessary, features
while extending it with a clearer syntax
*/
sourceCharacter = any
// Override Ohm's built-in definition of space.
space := whitespace | lineTerminator | comment
whitespace = "\t"
| "\x0B" -- verticalTab
| "\x0C" -- formFeed
| " "
| "\u00A0" -- noBreakSpace
| "\uFEFF" -- byteOrderMark
| unicodeSpaceSeparator
lineTerminator = "\n" | "\r" | "\u2028" | "\u2029"
lineTerminatorSequence = "\n" | "\r" ~"\n" | "\u2028" | "\u2029" | "\r\n"
comment = multiLineComment | singleLineComment
multiLineComment = "/*" (~"*/" sourceCharacter)* "*/"
singleLineComment = "//" (~lineTerminator sourceCharacter)*
identifier (an identifier) = ~reservedWord identifierName
identifierName = identifierStart identifierPart*
identifierStart = letter | "$" | "_"
identifierPart = identifierStart | unicodeDigit
letter += unicodeCategoryNl
unicodeCategoryNl
= "\u2160".."\u2182" | "\u3007" | "\u3021".."\u3029"
unicodeDigit (a digit)
= "\u0030".."\u0039" | "\u0660".."\u0669" | "\u06F0".."\u06F9" | "\u0966".."\u096F" | "\u09E6".."\u09EF" | "\u0A66".."\u0A6F" | "\u0AE6".."\u0AEF" | "\u0B66".."\u0B6F" | "\u0BE7".."\u0BEF" | "\u0C66".."\u0C6F" | "\u0CE6".."\u0CEF" | "\u0D66".."\u0D6F" | "\u0E50".."\u0E59" | "\u0ED0".."\u0ED9" | "\u0F20".."\u0F29" | "\uFF10".."\uFF19"
unicodeSpaceSeparator = "\u2000".."\u200B" | "\u3000"
reservedWord = keyword | futureReservedWord | nullLiteral | booleanLiteral
// Note: keywords that are the complete prefix of another keyword should
// be prioritized (e.g. 'in' should come before 'instanceof')
keyword = for | while | do | break | continue | match
| if | else | const | var | live | computed
| catch | fn | return | void | this
| has | enum | interface | extends | implements | struct
| export | import | defer
futureReservedWord = fallthrough
literal = nullLiteral | booleanLiteral | numericLiteral
| stringLiteral | regularExpressionLiteral
// Optionals, which can be either Some<T> or None, are translated to T | null
nullLiteral = "None" ~identifierPart
booleanLiteral = ("true" | "false") ~identifierPart
// For semantics on how decimal literals are constructed, see section 7.8.3
// Note that the ordering of hexIntegerLiteral and decimalLiteral is reversed w.r.t. the spec
// This is intentional: the order decimalLiteral | hexIntegerLiteral will parse
// "0x..." as a decimal literal "0" followed by "x..."
numericLiteral = octalIntegerLiteral | hexIntegerLiteral | decimalLiteral
decimalLiteral = decimalIntegerLiteral "." decimalDigit* exponentPart -- bothParts
| "." decimalDigit+ exponentPart -- decimalsOnly
| decimalIntegerLiteral exponentPart -- integerOnly
decimalIntegerLiteral = nonZeroDigit decimalDigit* -- nonZero
| "0" -- zero
decimalDigit = "0".."9"
nonZeroDigit = "1".."9"
exponentPart = exponentIndicator signedInteger -- present
| -- absent
exponentIndicator = "e" | "E"
signedInteger = "+" decimalDigit* -- positive
| "-" decimalDigit* -- negative
| decimalDigit+ -- noSign
hexIntegerLiteral = "0x" hexDigit+
| "0X" hexDigit+
// hexDigit defined in Ohm's built-in rules (otherwise: hexDigit = "0".."9" | "a".."f" | "A".."F")
octalIntegerLiteral = "0" octalDigit+
octalDigit = "0".."7"
zeroToThree = "0".."3"
fourToSeven = "4".."7"
// For semantics on how string literals are constructed, see section 7.8.4
stringLiteral = "\"" doubleStringCharacter* "\""
| "'" singleStringCharacter* "'"
doubleStringCharacter = ~("\"" | "\\" | lineTerminator) sourceCharacter -- nonEscaped
| "\\" escapeSequence -- escaped
| lineContinuation -- lineContinuation
singleStringCharacter = ~("'" | "\\" | lineTerminator) sourceCharacter -- nonEscaped
| "\\" escapeSequence -- escaped
| lineContinuation -- lineContinuation
lineContinuation = "\\" lineTerminatorSequence
escapeSequence = unicodeEscapeSequence
| hexEscapeSequence
| octalEscapeSequence
| characterEscapeSequence // Must come last.
characterEscapeSequence = singleEscapeCharacter
| nonEscapeCharacter
singleEscapeCharacter = "'" | "\"" | "\\" | "b" | "f" | "n" | "r" | "t" | "v"
nonEscapeCharacter = ~(escapeCharacter | lineTerminator) sourceCharacter
escapeCharacter = singleEscapeCharacter | decimalDigit | "x" | "u"
octalEscapeSequence = zeroToThree octalDigit octalDigit -- whole
| fourToSeven octalDigit -- eightTimesfourToSeven
| zeroToThree octalDigit ~decimalDigit -- eightTimesZeroToThree
| octalDigit ~decimalDigit -- octal
hexEscapeSequence = "x" hexDigit hexDigit
unicodeEscapeSequence = "u" hexDigit hexDigit hexDigit hexDigit
// §7.8.5 Regular Expression Literals -- https://es5.github.io/#x7.8.5
regularExpressionLiteral = "/" regularExpressionBody "/" regularExpressionFlags
regularExpressionBody = regularExpressionFirstChar regularExpressionChar*
regularExpressionFirstChar = ~("*" | "\\" | "/" | "[") regularExpressionNonTerminator
| regularExpressionBackslashSequence
| regularExpressionClass
regularExpressionChar = ~("\\" | "/" | "[") regularExpressionNonTerminator
| regularExpressionBackslashSequence
| regularExpressionClass
regularExpressionBackslashSequence = "\\" regularExpressionNonTerminator
regularExpressionNonTerminator = ~(lineTerminator) sourceCharacter
regularExpressionClass = "[" regularExpressionClassChar* "]"
regularExpressionClassChar = ~("]" | "\\") regularExpressionNonTerminator
| regularExpressionBackslashSequence
regularExpressionFlags = identifierPart*
// === Implementation-level rules (not part of the spec) ===
multiLineCommentNoNL = "/*" (~("*/" | lineTerminator) sourceCharacter)* "*/"
// does not accept lineTerminators, not even implicit ones in a multiLineComment (cf. section 7.4)
spacesNoNL = (whitespace | singleLineComment | multiLineCommentNoNL)*
// A semicolon is "automatically inserted" if a newline or the end of the input stream is
// reached, or the offending token is "}".
// See https://es5.github.io/#x7.9 for more information.
// NOTE: Applications of this rule *must* appear in a lexical context -- either in the body of a
// lexical rule, or inside `#()`.
sc = space* (";" | end)
| spacesNoNL (lineTerminator | ~multiLineCommentNoNL multiLineComment | &"}")
// Convenience rules for parsing keyword tokens.
for = "for" ~identifierPart
while = "while" ~identifierPart
do = "do" ~identifierPart // will we support do-while loops?
break = "break" ~identifierPart
continue = "continue" ~identifierPart
if = "if" ~identifierPart
else = "else" ~identifierPart
const = "const" ~identifierPart
var = "var" ~identifierPart
live = "live" ~identifierPart
track = "track" ~identifierPart
computed = "computed" ~identifierPart
catch = "catch" ~identifierPart
fn = "fn" ~identifierPart
return = "return" ~identifierPart
void = "void" ~identifierPart
this = "_" ~identifierPart
in = "in" ~identifierPart -- value in ArrayLike
has = "has" ~identifierPart -- struct has Property
enum = "enum" ~identifierPart
interface = "interface" ~identifierPart
extends = "extends" ~identifierPart
implements = "implements" ~identifierPart
export = "export" ~identifierPart
import = "import" ~identifierPart
struct = "struct" ~identifierPart
defer = "defer" ~identifierPart
match = "match" ~identifierPart
fallthrough = "fallthrough" ~identifierPart
// end of lexical rules
noIn = ~in
withIn =
noHas = ~has
withHas =
noInHas = ~(in | has)
withInHas =
// §A.3 Expressions -- https://es5.github.io/#A.3
PrimaryExpression = this
| identifier
| literal
// ( litToken.type === "regexp"
// ? this.ast(_fromIdx, "RegExpExpr",{body: litToken.value.body
// flags: litToken.value.flags}, [])
// : this.ast(_fromIdx, "LiteralExpr",{type: litToken.type
// value: litToken.value}, []) )
| ArrayLiteral
| ObjectLiteral
| "(" Expression<withIn> ")" -- parenExpr
ArrayLiteral = "[" ListOf<AssignmentExpressionOrElision, ","> "]"
AssignmentExpressionOrElision = AssignmentExpression<withIn>
| -- elision
ObjectLiteral = "{" ListOf<PropertyAssignment, ","> "}" -- noTrailingComma
| "{" NonemptyListOf<PropertyAssignment, ","> "," "}" -- trailingComma
PropertyAssignment = get PropertyName "(" ")" "{" FunctionBody "}" -- getter
| set PropertyName "(" FormalParameter ")" "{" FunctionBody "}" -- setter
| PropertyName ":" AssignmentExpression<withIn> -- simple
PropertyName = identifierName
| stringLiteral
| numericLiteral
MemberExpression = MemberExpression "[" Expression<withIn> "]" -- arrayRefExp
| MemberExpression "." identifierName -- propRefExp
| new MemberExpression Arguments -- newExp
| FunctionExpression
| PrimaryExpression
NewExpression = MemberExpression
| new NewExpression -- newExp
CallExpression = CallExpression "[" Expression<withIn> "]" -- arrayRefExp
| CallExpression "." identifierName -- propRefExp
| CallExpression Arguments -- callExpExp
| MemberExpression Arguments -- memberExpExp
Arguments = "(" ListOf<AssignmentExpression<withIn>, ","> ")"
LeftHandSideExpression = CallExpression
| NewExpression
PostfixExpression = LeftHandSideExpression #(spacesNoNL "++") -- postIncrement
| LeftHandSideExpression #(spacesNoNL "--") -- postDecrement
| LeftHandSideExpression
UnaryExpression = void UnaryExpression -- voidExp
| "++" UnaryExpression -- preIncrement
| "--" UnaryExpression -- preDecrement
| "+" UnaryExpression -- unaryPlus
| "-" UnaryExpression -- unaryMinus
| "~" UnaryExpression -- bnot
| "!" UnaryExpression -- lnot
| PostfixExpression
MultiplicativeExpression = MultiplicativeExpression "*" UnaryExpression -- mul
| MultiplicativeExpression "/" UnaryExpression -- div
| MultiplicativeExpression "%" UnaryExpression -- mod
| UnaryExpression
AdditiveExpression = AdditiveExpression "+" MultiplicativeExpression -- add
| AdditiveExpression "-" MultiplicativeExpression -- sub
| MultiplicativeExpression
ShiftExpression = ShiftExpression "<<" AdditiveExpression -- lsl
| ShiftExpression ">>>" AdditiveExpression -- lsr
| ShiftExpression ">>" AdditiveExpression -- asr
| AdditiveExpression
RelationalExpression<guardIn>
= RelationalExpression<guardIn> "<" ShiftExpression -- lt
| RelationalExpression<guardIn> ">" ShiftExpression -- gt
| RelationalExpression<guardIn> "<=" ShiftExpression -- le
| RelationalExpression<guardIn> ">=" ShiftExpression -- ge
| RelationalExpression<guardIn> guardIn "in" ShiftExpression -- inExp
| ShiftExpression
EqualityExpression<guardIn>
= EqualityExpression<guardIn> "==" RelationalExpression<guardIn> -- equal
| EqualityExpression<guardIn> "!=" RelationalExpression<guardIn> -- notEqual
| EqualityExpression<guardIn> "===" RelationalExpression<guardIn> -- eq
| EqualityExpression<guardIn> "!==" RelationalExpression<guardIn> -- notEq
| RelationalExpression<guardIn>
BitwiseANDExpression<guardIn>
= BitwiseANDExpression<guardIn> "&" EqualityExpression<guardIn> -- band
| EqualityExpression<guardIn>
BitwiseXORExpression<guardIn>
= BitwiseXORExpression<guardIn> "^" BitwiseANDExpression<guardIn> -- bxor
| BitwiseANDExpression<guardIn>
BitwiseORExpression<guardIn>
= BitwiseORExpression<guardIn> "|" BitwiseXORExpression<guardIn> -- bor
| BitwiseXORExpression<guardIn>
LogicalANDExpression<guardIn>
= LogicalANDExpression<guardIn> "&&" BitwiseORExpression<guardIn> -- land
| BitwiseORExpression<guardIn>
LogicalORExpression<guardIn>
= LogicalORExpression<guardIn> "||" LogicalANDExpression<guardIn> -- lor
| LogicalANDExpression<guardIn>
ConditionalExpression<guardIn>
= LogicalORExpression<guardIn> "?" AssignmentExpression<withIn> ":" AssignmentExpression<guardIn> -- conditional
| LogicalORExpression<guardIn>
AssignmentExpression<guardIn>
= LeftHandSideExpression assignmentOperator AssignmentExpression<guardIn> -- assignment
| ConditionalExpression<guardIn>
Expression<guardIn> (an expression)
= Expression<guardIn> "," AssignmentExpression<guardIn> -- commaExp
| AssignmentExpression<guardIn>
assignmentOperator = "=" | ">>>=" | "<<=" | ">>="
| "*=" | "/=" | "%=" | "+=" | "-=" | "&=" | "^=" | "|="
// Statements -- (extends https://es5.github.io/#A.4)
Statement
= Block
| VariableStatement
| EmptyStatement
| ExpressionStatement
| IfStatement
| IterationStatement
| ContinueStatement
| BreakStatement
| ReturnStatement
Block = "{" StatementList "}"
LambdaParameters = "|" ListOf<identifier, ","> "|"
StatementList = Statement*
ComputedStatement = computed identifier Block
TrackStatement = track ListOf<identifier, ","> LambdaParameters? Block
VariableStatement = VariableAssignment VariableDeclarationList<withIn> #sc
VariableAssignment = var | const | live
VariableDeclarationList<guardIn> = NonemptyListOf<VariableDeclaration<guardIn>, ",">
VariableDeclaration<guardIn> = identifier Initialiser<guardIn>?
Initialiser<guardIn> = "=" AssignmentExpression<guardIn>
EmptyStatement = ";" // note: this semicolon eats newlines
ExpressionStatement = ~("{" | function) Expression<withIn> #sc
IfStatement = if "(" Expression<withIn> ")" Statement (else Statement)?
IterationStatement = do Statement while "(" Expression<withIn> ")" #sc -- doWhile
| while "(" Expression<withIn> ")" Statement -- whileDo
| for "(" Expression<noIn>? ";"
Expression<withIn>? ";"
Expression<withIn>? ")" Statement -- for3
| for "(" var VariableDeclarationList<noIn> ";"
Expression<withIn>? ";"
Expression<withIn>? ")" Statement -- for3var
| for "(" LeftHandSideExpression in
Expression<withIn> ")" Statement -- forIn
| for "(" var VariableDeclaration<noIn> in
Expression<withIn> ")" Statement -- forInVar
ContinueStatement = continue #((spacesNoNL identifier)? sc)
BreakStatement = break #((spacesNoNL identifier)? sc)
ReturnStatement = return (#(spacesNoNL ~space) Expression<withIn>)? #sc
Catch = catch "(" FormalParameter ")" Block
Defer = defer (when Expression<withIn>)? Block
// Pattern Matching
MatchExpr = match Expression<withIn> (if Pattern)? "{" MatchArm+ "}" (else Block)?
MatchArm = Pattern "=>" (Expression<withIn> | Block)
Pattern = identifier
| literal
| GuardPattern
GuardPattern = Expression<withIn> // Just an expression! No special syntax needed
// §A.5 Functions and Programs -- https://es5.github.io/#A.5
FunctionDeclaration
= function identifier "(" FormalParameterList ")" "{" FunctionBody "}"
FunctionExpression
= function identifier "(" FormalParameterList ")" "{" FunctionBody "}" -- named
| function "(" FormalParameterList ")" "{" FunctionBody "}" -- anonymous
FormalParameterList = ListOf<FormalParameter, ",">
FormalParameter = identifier
/*
Note: The Directive Prologue is the longest sequence of ExpressionStatement
productions occurring as the initial SourceElement (see https://es5.github.io/#x14.1)
*/
FunctionBody = &(Directive*) SourceElement*
SourceElement = Declaration | Statement
// Broken out so es6 can override to include ConstDecl and LetDecl
Declaration = FunctionDeclaration
Directive = stringLiteral #sc
}
ES5Lax <: ES5 {
futureReservedWord := futureReservedWordLax
}