| top | | | ^ | | | | | | section top | | 12 ::
Grammar properties are
- foreign: production-name
- Foreign text alternatives:
a union of four element strings: output variables, input variables,
language, and foreign code.
- nullable: production-name
- Whether the production is nullable.
- weight: production-name
- Production weight (minimum
nonempty number of terminal that can be generated.
- action: production-name [ alternative ]
-
Reduce action.
- action: production-name [ %shift ]
- Shift action.
- symbol-name.character
- Glyph translations.
- cs: production-name alternative
-
Context sensitive constraints (attributes).
- properties: production-name
-
Description of production properties.
- anyright:
- If any productions are right (or embedding) recursive.
- attributes:
- Metanotions declared to be attributes.
- compiled:
- If a parser was constructed.
- everywhere: symbol-name
- Everywhere reserved symbols.
- finite:
- Set of productions with only a finite set of generated terminal.
- glyph: glyph-name
- Character with this name.
- k:
- Maximum lookahead.
- maxexcept:
- Maximum string length of all except...character.
- name:
- Grammar name.
- order:
- Produced from orderring.
- predicates:
- Predicate productions.
- reserved: symbol
- Reserved symbols in the lexicon.
- signatures:
- Declared hypernotion signatures.
- start:
- Start production.
- type:
- Grammar type in the Chomsky hierarchy, 2 or 3.
- ep: production-name
- Original production names for created productions.
This will be used to create automata ep: properties.
-
- class: symbol|character symbol-set := class-numbers
-
List each class with its symbols, indexed by its members.
- class-number: symbol|character := last-class
-
Last class number for symbol and character classes.
- members: symbol|character class-numbers := symbol-set
-
List each class with its symbols, indexed by its class numbers.
- reserved: reserved-table-index :=
{spelling reserved-symbol...}
-
Each unique reserved symbol list.
| ^ | | | | section top | |
proc truncate {string} {
while 1 {
set i [string first \n $string]
if {$i==0} {
set string [string range $string 1 end]
} else {
break
}
}
if {$i>0} {incr i -1; set string [string range $string 0 $i]...}
if {[string length $string]>40} {
set string [string range $string 0 39]...
}
return $string
}
proc parser vw2grammar {
sym grammar [namespace current]::hr
hr k: := 2
hr type: := 3
hr name: := hyperrules
hr start: := %start
hr conflicts: := {}
hr foreigns: := {}
hr symtab: := {}
hr implementor: := ::wyrm::model
sym grammar [namespace current]::mr
mr k: := 2
mr type: := 3
mr name: := metarules
sym grammar [namespace current]::rew
oav bundle [namespace current]::outp
log Begin parse input.
<Initialise glyph names before parsing the grammar>
set grammarhtml {}
array set header {1 "" 2 "" 3 "" 4 "" 5 "" 6 "" n 0}
set chunknumber 0
set ps {}
set state rule
set curr %start
set first 1
set input ""
foreach chunk $vw2grammar {
incr chunknumber
set token [wyrm::expat::create]
wyrm::expat::resume $token [
exec $::env(SHELL) -c {tidy --indent 0 --output-xml 1 --quiet 1; exit 0} << $chunk 2> /dev/null
]
set ts 0
foreach piece [wyrm::expat::end $token] {
set what [assoc get $piece %what]
vassoc put piece %chunk $chunknumber
switch $what {
start {
set name [assoc get $piece %name]
assoc get piece -exact est class c
switch -glob [lindex $ts end]/$name {
0/body {
lappend ts body
}
0/* {
;
}
*/cow* {
lappend ts $name
}
{*/h[1-6]} {
set i [string index $name end]
set header($i) ""
set headr(n) $i
lappend grammarhtml $piece
lappend ts $name
}
default {
lappend grammarhtml $piece
lappend ts $name
}
}
}
end {
switch -glob [lindex $ts end] {
0 {
;
}
cow* {
set ts [lrange $ts 0 end-1]
}
{*/h[1-6]} {
set i [string index $name end]
set header($i) ""
set headr(n) [expr {$i-1}]
lappend grammarhtml $piece
set ts [lrange $ts 0 end-1]
}
default {
lappend grammarhtml $piece
set ts [lrange $ts 0 end-1]
}
}
}
content {
set data [assoc get piece %data]
switch -glob [lindex $ts end] {
0 {
;
}
{*/h[1-6]} {
set i [string index $name end]
append header($i) $data
lappend grammarhtml $piece
}
code - cowhn - cowsep {
setp {ps state input} [dpda $ps $state $input[assoc get piece %data] {
foreign-chunk
{((?~code=.*)),((?~N=.*)),<foreigncode>(?~ch=[^][]+)}
{($code$ch),($N),<foreigncode>}
skip-newline
{<(?~state=.*)>[\n\r]}
{<[incr line; set state]>}
skip-blanks
{<(?~state=.*)>[ \t]+}
{<$state>}
skip-comments
{<(?~state=.*)>{[^{}]*}}
{<$state>}
end-of-input
{<rule>$}
{<quit>}
start-a-rule
{<rule>}
{(rulekind),<notion>}
<Tree rewriting rules>
<Metarules>
<Foreign rules>
<Hyperrules>
<Property rules>
unknown-text
{(.*),(#NS),<rulekind>(?~rest=[^.]*)[.]}
{<[report E \[$line\] could not identify the text: [truncate $rest]; concat rule]>}
unknown-rule
{(.*),((?~N=.*)),<rulekind>(?~rest=[^.]*)[.]}
{<[report E \[$line\] could not identify the rule: "[truncate [concat $N ## $rest]]"; concat rule]>}
find-full-stop-at-end-of-rule
{(.*),<endrule>[.]}
{<rule>}
error-at-end-rule
{(metarule),<endrule>(?~rest=[^.]+)[.]}
{<[report E \[$curr\] expected '.' to end metarule: [truncate $rest]; concat rule]>}
error-at-end-rule
{((?~what=.*)),<endrule>(?~rest=[^.]+)[.]}
{<[report E \[$curr\] expected '.' to end $what: [truncate $rest]; concat rule]>}
error-at-end-rule
{(metarule),<endrule>(?~rest=.*)}
{<[report E \[$curr\] expected '.' to end metarule: [truncate $rest]; concat quit]>}
error-at-end-rule
{((?~what=.*)),<endrule>(?~rest=.*)}
{<[report E \[$curr\] expected '.' to end $what: [truncate $rest]; concat quit]>}
<Parse the next hypernotion, metanotion, or symbol>
skip-comments
{<(?~state=.*)>\{[^{}]*\}}
{<$state>}
}]
}
default {
lappend grammarhtml $piece
}
}
}
}
}
}
<Ensure foreign and hyperrules are disjoint>
<Symbol table postparse processing>
<Make aleph definitions of undefined attributes>
log Complete parse input.
foreach {character name} [array get glyphName] {
hr glyph: $name := $character
}
}
| ^ | | | | | | section top | | | ^ | | | | | | section top | |
start-property
{(start),<propertyrule>}
{(startproperty),<notion>}
start-property
{((proto|meta|hyper)),((?~N=.*)),<startproperty>}
{(start property),<[set first 0; hr %start := $N,%end; concat endrule]>}
| ^ | | | | | | section top | |
k-property
{(k),<propertyrule>}
{(),<kproperty>}
k-property
{((?~K=.*)),<kproperty>(?~k=\d+)}
{($K$k),<kproperty>}
k-property
{((?~K=.*)),<kproperty>}
{(k property),<[hr k: := $K; concat endrule]>}
| ^ | | | | | | section top | |
name-property
{(name),<propertyrule>}
{(nameproperty),<notion>}
name-property
{((proto|meta|hyper)),((?~N=.*)),<nameproperty>}
{(name property),<[hr name: := $N; mr name: := $N_metarules; concat endrule]>}
| ^ | | | | | | section top | |
grammar-type-property
{(type),<propertyrule>}
{(),<typeproperty>}
grammar-type-property
{((?~K=.*)),<typeproperty>(?~k=\d+)}
{($K$k),<typeproperty>}
grammar-type-property
{((?~K=2|3)),<typeproperty>}
{(type property),<[hr type: := $K; concat endrule]>}
grammar-type-property
{((?~K=.*)),<typeproperty>}
{(type property),<[report E \[$line\] grammar type not 2 or 3: $K; concat endrule]>}
| ^ | | | | | | section top | |
include-property
{(include),<propertyrule>'(?~uri=([^']|'')*)'}
{($uri),(),<endincludeuri>}
end-include-property
{((?~garbage=.*)),<endincludeuri>(?~text=[^.])}
{($garbage$text),<endincludeuri>}
end-include-property
{((?~uri=.*)),((?~garbage=.*)),<endincludeuri>[.]}
{<rule>[
if {[string length $garbage]} {report E unexpected text after include uri: [truncate $garbage]}
uri get [string map {'' '} $uri]
]}
include-property
{(include),<propertyrule>\[}
{(%include),(),(1),<foreigncode>}
included-foreign-text
{(%include),((?~code=.*)),(1),<foreigncode>\]}
{(include property),<[
gen foreign_include $code
concat endrule
]>}
include-property
{(include),<propertyrule>}
{(include property),<[
report E \[$line\] url or foreign code expected after include
concat endrule
]>}
| ^ | | | | | | section top | | | ^ | | | | | | section top | |
implementor-property
{(implementor),<propertyrule>'(?~N=([^']|'')+)'}
{(implementor property),<[hr implementor: := [string map {'' '} $N]; concat endrule]>}
| ^ | | | | | | section top | |
identify-a-metarule
{(meta),((?~N=.*)),<rulekind>:\s*:}
{($N),<[set curr $N; concat metarule]>}
identify-a-bad-metarule
{(hyper|proto),(#NS),<rulekind>:\s*:[^.]*}
{(metarule),<[
report E \[$line\] missing metanotion before '::'
concat endrule
]>}
metarule
{<metarule>}
{(),(mrmember)<notion>}
metarule-member
{((?~rule=.*)),((proto|meta|hyper)),((?~HN=.*)),<mrmember>}
{([sym union $rule $HN]),<mrmemberseparator>}
metarule-alternative
{<mrmemberseparator>[;|]}
{(mrmember),<notion>}
metarule-completed
{((?~MN=.*)),((?~rule=.*)),<mrmemberseparator>}
{(metarule),<[mr $MN +:= $rule; concat endrule]>}
| ^ | | | | | | section top | |
identify-a-bad-hyperrule
{(hyper|proto),(#NS),<rulekind>:\s*[^.]*}
{(hyperrule),<[
report E \[$line\] missing hypernotion before ':'
concat endrule
]>}
identify-a-hyperrule
{((proto|meta|hyper)),((?~N=.*)),<rulekind>:}
{($N),<[set curr $N; concat hyperrule]>}
hyperrule
{<hyperrule>}
{(),(),(hrmember)<notion>}
hyperrule-equal-member
{((proto|meta|hyper)),((?~HN=.*)),<hrmember>=\s*=}
{($HN%eq%),(hrposteqmember),<notion>}
hyperrule-equal-member
{((proto|meta|hyper)),((?~HN=.*)),<hrmember>=}
{($HN%eq%),(hrposteqmember),<notion>}
hyperrule-not-equal-member
{((proto|meta|hyper)),((?~HN=.*)),<hrmember>/\s*=}
{($HN%ne%),(hrposteqmember),<notion>}
hyperrule-not-equal-member
{((proto|meta|hyper)),((?~HN=.*)),<hrmember>}
{($HN%ne%),(hrposteqmember),<notion>}
hyperrule-post-equal-member
{((?~HN1=.*)),((proto|meta|hyper)),((?~HN2=.*)),<hrposteqmember>}
{(hyper),($HN1$HN2),<hrmember>}
hyperrule-member
{((?~alternative=.*)),((proto|meta|hyper)),((?~HN=.*)),<hrmember>}
{([
if {[sym null $alternative]} {set HN} else {sym concat $alternative $HN}
]),<hrmemberseparator>}
hyperrule-member
{<hrmemberseparator>,}
{(hrmember),<notion>}
hyperrule-alternative
{((?~rule=.*)),((?~alternative=.*)),<hrmemberseparator>[;|]}
{([sym union $rule $alternative]),(),(hrmember),<notion>}
hyperrule-completed
{((?~HN=[^%].*)),((?~rule=.*)),((?~alternative=.*)),<hrmemberseparator>}
{([if {[string match *symbol $HN]} {concat symbol rule} else {concat hyperrule}]),<[
hr $HN +:= [sym union $rule $alternative]
if {$first && ![string match *symbol $HN]} {hr %start := $HN,%end; set first 0}
concat endrule
]>}
| ^ | | | | | | section top | | 24 ::
A notion is here used to refer to a sequence of small and/or large marks,
and things that become small marks like literals. The calling sequence for
this dpda state is
- rule-comment
- rule-match
- (next-state),<notion>
- ...
- rule-comment
- (proto|meta|hyper),(large-and-small-marks)<next-state>
- rule-substitution
'proto' indicates only small marks were parsed (or no marks were parsed), 'meta'
only large marks, 'meta' only large marks, and 'hyper' for a mixture. The
symbol is the concatenation of the marks, or #NS if no marks.
| ^ | | | | section top | |
start-a-protonotion
{<notion>(?~N=[a-z()]+)}
{(proto),($N),<notioni>}
start-a-protonotion
{<notion>(?~N=[#'])}
{(proto),(),<notioni>$N}
start-a-hypernotion
{<notion>(?~N=\[)}
{(hyper),(),<notioni>$N}
start-a-metanotion
{<notion>(?~N=[A-Z0-9]+)}
{(meta),($N),<notioni>}
empty-notion
{((?~state=.*)),<notion>}
{(proto),(#NS),<$state>}
<Literal glyphs in a hypernotion>
<Decimal numbers in a hypernotion>
continue-a-protonotion
{(proto),((?~PN=.*)),<notioni>(?~N=[a-z()]+)}
{(proto),($PN$N),<notioni>}
continue-a-metanotion
{(meta),((?~MN=.*)),<notioni>(?~N=[A-Z0-9]+)}
{(meta),($MN$N),<notioni>}
continue-a-hypernotion
{(.*),((?~HN=.*)),<notioni>(?~N=[a-z()A-Z0-9]+)}
{(hyper),($HN$N),<notioni>}
end-a-notion
{((?~state=.*)),((?~kind=.*)),((?~N=.*)),<notioni>}
{($kind),([sym encode string $N]),<$state>}
| ^ | | | | | | section top | | | ^ | | | | | | section top | |
\n newline \r return \t tab
" " space ! exclaim "\"" quote # hash
$ dollar % percent & ampersand '' apostrophe
( leftparen ) rightparen * asterisk + plus
, comma - dash . fullstop / slash
0 zero 1 one 2 two 3 three
4 four 5 five 6 six 7 seven
8 eight 9 nine : colon ; semicolon
< lessthan = equals > greaterthan ? query
@ at A largea B largeb C largec
D larged E largee F largef G largeg
H largeh I largei J largej K largek
L largel M largem N largen O largeo
P largep Q largeq R larger S larges
T larget U largeu V largev W largew
X largex Y largey Z largez [ leftbracket
\\ backslash ] rightbracket ^ circumflex _ underline
@ at a lettera b letterb c letterc
d letterd e lettere f letterf g letterg
h letterh i letteri j letterj k letterk
l letterl m letterm n lettern o lettero
p letterp q letterq r letterr s letters
t lettert u letteru v letterv w letterw
x letterx y lettery z letterz \{ leftbrace
| verticalbar \} rightbrace ~ tilde
| ^ | | | | | | section top | |
variable defaultGlyphName
array set glyphName [array get defaultGlyphName]
| ^ | | | | | | section top | |
glyph-property
{(glyph),<propertyrule>'(?~char=''|[^'])'}
{($char),(glyphproperty),<notion>}
glyph-property
{(glyph),<propertyrule>}
{<[report E \[$line\] missing literal after 'glyph=' property; concat quit]>}
glyph-property
{(.*),(proto),(#NS),<glyphproperty>}
{(glyph property),<[report E \[$line\] missing glyphname; concat endrule]>}
glyph-property
{((?~char=.*)),(proto),((?~N=.*)),<glyphproperty>}
{(glyph property),<[set glyphName($char) $N; concat endrule]>}
| ^ | | | | | | section top | |
literal-glyphs
{<notioni>'(?~string=([^']|'')*)'}
{<notioni>[string map [array get glyphName] $string]}
literal-glyphs-error
{<notioni>'}
{<[report E \[$curr\] missing close quote ('); concat notioni]>}
| ^ | | | | | | section top | |
NUMBER
{<notioni>#(?~num=[0-9 \t\n]+)}
{<notioni>number([string map {
0 zero 1 one 2 two 3 three 4 four
5 five 6 six 7 seven 8 eight 9 nine
" " "" "\t" "" "\n" ""
} $num])}
missing-NUMBER
{<notioni>#}
{<[report E \[$curr\] missing digits after "'#'"; concat notioni]>}
| ^ | | | | | | section top | | 32. Foreign rules ::
Foreign text is a way to enter programming code written in another
language into the text of a grammar. Attributes can be made available
for input and output to the text. As far as the context free grammar
is concerned, each foreign rule is interpretted as an empty production;
the foreign text is then evaluated during a reduction.
The foreign text must contain a balanced number of brackets; no escapes are available,
nor are any kinds of string or comments interpretted to hide unbalanced brackets. If the
foreign code must used unbalanced brackets, it must do so outside the text of the grammar.
foreign-rule ::=
hypernotion : foreign-texts.
foreign-texts ::=
foreign-text |
foreign-texts; foreign-text
foreign-text ::=
foreign-output foreign-input language [foreign-code]
| language immediate [foreign-code]
foreign-output ::= [ (metanotion...) ] foreign-input ::= [ metanotion... ] language ::= [ protonotion ]
- The language cannot end in 'immediate'
foreign-code ::= [ foreign-chunk... ] foreign-chunk ::= any-characters-except-[-and-]
| [foreign-code]
Normally in the cf parser, foreign texts are evaluated with the attributes after
parsing. If the small marks 'immediate' are included after the language,
means the foreign text is evaluated immediately on reduction within the parser
itself. In a scanner, all foreign texts are immediate, whether marked so or not.
Because immediate texts are evaluated during the parse, they can alter the lexical
interpretation and other aspects of parser so that it can accept languages that
it could not otherwise.
For example, C has well known ambiguity with typedef names. A C grammar can use
immediate foreign texts in the parser and scanners with a rudimentary symbol table
to remove this ambiguity.
NEST block:
left brace symbol, push new typedef level,
NEST declarations into NEST1, NEST1 statements,
right brace symbol, pop off typedef level.
NEST typedef: typedef symbol, TAG symbol, make typedef.
IDENTIFIER symbol: TAG symbol, relabel if typedef.
TAG symbol: letter, letter or digit sequence option.
include = [
typedef struct TopLevel TopLevel;
struct TopLevel {int depth; char *tag; TopLevel *under;};
TopLevel *topLevel = 0; int topLevelDepth = 0;
].
push new typedef level: immediate [
topLevelDepth++;
].
pop new typedef level: immediate [
topLevelDepth--;
while (topLevel && topLevel->depth>topLevelDepth) {
TopLevel *u = topLevel->under; free(topLevel); topLevel = u;
}
].
relabel if typedef: immediate [
TopLevel *t; for (t=topLevel; t; t=t->under) {
if (strcmp(t->tag,Tcl_GetString(bufferContents()))==0) {
PS->override = true;
PS->reserved = 0;
PS->symbol = TYPENAMEsymbol;
PS->nameclass = 0;
break
}
}
]
make typedef: immediate [
TopLevel *t; t = malloc(sizeof(TopLevel));
t->depth = topLevelDepth; t->tag = bufferString(lastlexeme.name,0);
t->under = topLevel; topLeve = t;
]
An immediate foreign text cannot have explicit output or input variables. This
is because variables are propagated by the attribute evalator after the parse
is completed, but immediate texts are evaluated before the parse is completed.
Immediate texts which need to communicate need to establish some protocol
with global variables.
(The language string cannot end in 'immediate' unless there is another
'immediate' after it. ximmediat e[...] is an immediate text in
language x; ximmediat e immediate[...] is an immediate text in
language ximmediate.)
identify-a-foreign-rule
{((proto|meta|hyper)),((?~HN=.*)),<rulekind>:\s*(?~la=(\([A-Z0-9 \t\n]*\))?[A-Z0-9 \t\n]*[a-z() \t\n]*\[)}
{($HN),(),<[set curr $N; concat foreignrule]>$la}
foreign-text-variables
{<foreignrule>(\((?~output=([A-Z0-9 \t\n]*))\))?(?~input=[A-Z0-9 \t\n]*)(?~language=[a-z() \t\n]*)}
{([regsub -all \\s+ $output {}]),([regsub -all \\s+ $input {}]),([regsub -all \\s+ $language {}]),(),(0),<foreigncode>}
foreign-code-begin
{(0)<foreigncode>\[}
{(1)<foreigncode>}
foreign-code-begin
{((?~HN=.*)),((?~alts=.*)),((?~output=.*)),((?~input=.*)),((language=.*)),((?~code=.*)),(0),<foreigncode>}
{(foreign rule),<[report E missing foreign-code '\[...\]': $HN; concat endrule]>}
foreign-code-end
{((?~alts=.*)),((?~output=.*)),((?~input=.*)),((language=[^%]*)),((?~code=.*)),(1),<foreigncode>\]}
{([
if {![string length $output]} {set output -}
if {![string length $input]} {set input -}
if {![string length $language]} {set language -}
if {[string match *immediate $language]} {
if {![string equal $output$input --]} {
report E \[$line\] immediate foreign text cannot have variables: ($output)$input
}
}
sym union $alts [sym encode string [list $output $input $language $code]]
]),<foreigntextor>}
foreign-code-nest
{((?~code=.*)),((?~N=.*)),<foreigncode>\[}
{($code\[),([expr {$N+1}]),<foreigncode>}
foreign-code-unnest
{((?~code=.*)),((?~N=.*)),<foreigncode>\]}
{($code\]),([expr {$N-1}]),<foreigncode>}
foreign-text-OR-or-END
{<foreigntextor>[;|]}
{<foreignrule>}
foreign-text-completed
{((?~HN=.*)),((?~alts=.*)),<foreigntextor>}
{(foreign rule),<[
hr foreigns: +:= $HN
hr foreign: $HN +:= $alts
concat endrule
]>}
| ^ | | | | | | section top | |
sym each hn [sym intersect [hr foreigns:] [hr::nonterminals]] {
report E production cannot be both a hyperrule and a foreign-rule: $hn
hr::delete foreign: $hn
}
sym each hn [hr foreigns:] {hr $hn := "#NS"}
| ^ | | |
| |