DNA.
wyrm-dfa
Version.
1.0.9
Language.
c
Manpage.
dfa (3WY)
Testbase.
Test Script
Test Report
Import.
Interface.
wyrmwif
Package.
wyrmwif
wyrm-io
Export.
Implementation.
wyrm-dfa.c
Interface.
wyrm-dfa.h
Package.
wyrmdfa.dylib

Deterministic Finite Automata

Sections.
Package Files
DFA
Character Transducer
Channel Reader
String Source
Package Interface
Make.
Package.
if {[info exists ::env(debugdfa)] && $::env(debugdfa)} {
  set debugdfa -DDEBUG_DFA
} else {
  set debugdfa -DNORMAL_DFA
}
compile $debugdfa -cc -ld -o [
  export package
] [
  export implementation
] -- -list [import interface] [export interface]
Script.
rule wyrm-dfa.package $so/wyrmdfa[info sharedlibextension]
rule wyrm-dfa.test [list \
    pkgIndex.tcl \
    $test/wyrm-dfa.test.html \
]

rule clean :: {} "
  -rm $test/wyrm-dfa.TESTING
"
rule clobber :: {} "
  -rm $include/wyrm-dfa.h
  -rm $so/wyrmdfa[info sharedlibextension]
"
   
top

1 :: #defines and declarations to implement DFAs in C code.

Copyright (C) 2004 SM Ryan

Wyrmwif Tcl extensions. For non-profit uses only, provided this copyright is preserved on all copies, this work may be freely copied, modified, redistributed, compiled, and incorporated in other works. This work is distributed with no warranty of any kind; no author or distributor accepts any responsibility for the consequences of using it, or for whether it serves any particular purpose or works at all, unless he or she says so in writing.

   
   

Package Files

   
top
 
section    top
		
#ifndef WYRM_DFA_H
#define WYRM_DFA_H

	//	wyrm-dfa.dna - Copyright (C) 2004 SM Ryan.  All rights reserved.

	#include "wyrmwif.h"
	#include "wyrm-io.h"
	
	<DFA defines>
	<Character finite transducer defines>
	<Channel reader transducer defines>
	<String source transducer declares>

	int Wyrmdfa_Init(Intr intr);
	int Wyrmdfa_SafeInit(Intr intr);

#endif

		
 
section    top

static const char COPYRIGHT[] =
		"wyrm-dfa.dna - Copyright (C) 2004 SM Ryan.  All rights reserved.";

#include "wyrm-dfa.h"

<String source transducer implementation>
<Package>

		
   
   

DFA

   
top
 
section    top

6. DFA defines :: dfa-machine ::= dfa-call dfa-state... afd-call

Overall structure of a DFA. This must be embedded in a function.

dfa-state ::= state-call state-transition... etats-call

A state transitions to another state or final based on the current input symbol.

state-transition ::= transition-chooser statement... next-state

If this transition is selected, C code can be evaluated on the state transition before continuing at the next state. If no transition of a state matches the input, the DFA is terminated as if a final state, with an exit-code -1.

transition-chooser ::= is-call | in-call | any-call

next-state ::= move-call | emove-call

dfa-call ::= dfa(initial,Symbol,getsymbol,cookie)

initial: The initial state of the machine.
Symbol: The C type of the symbol, such as int.
getsymbol: A function of the type Symbol getsymbol(ClientData). This is called to acquire each new symbol as needed.
cookie: The parameter to the getsymbol function on each call.

afd-call ::= afd(var)

var: An integer lvalue which assigned the state-code from the final-call.

state-call ::= state(name)

Begin the state.

etats-call ::= etats

End the state.

final-call ::= final(state-code)

End the state and the machine. The state-code is an integer which is assigned to the variable of afd. This can be used to indicate which final state was the matched.

is-call ::= is(value)

The transition matches if the current symbol == value.

in-call ::= in(lower-value,upper-value)

The transition matches if lower-value <= the current symbol <= upper-value.

any-call ::= any

The transition matches no matter what the current symbol is.

move-call ::= move(state)

Continue at the named state. The getsymbol routine is called to get a new current symbol.

emove-call ::= emove(state)

Continue at the named state with the same current symbol.


#ifndef DEBUG_DFA
	#undef dstate
	#define dstate(x)
	#undef dtransition
	#define dtransition(x)
#endif
#ifndef dstate
	#define dstate(x) puts("state: " x);
#endif
#ifndef dtransition
	#define dtransition(x) puts("  on: " x);
#endif

#define dfa(initial,Symbol,getsymbol,cookie) \
	{ \
		int v_final; Symbol symbol; \
		Symbol (*v_getsymbol)(ClientData) = (Symbol (*)(ClientData))(getsymbol); \
		ClientData v_cookie = (cookie); \
		goto s_##initial; \
		for (;;) {
	#define state(name) \
			s_##name: symbol = v_getsymbol(v_cookie); e_##name: dstate(#name)
		#define is(value) \
				if (symbol==value) {dtransition("is " #value)
		#define in(ll,ul) \
				if (ll<=symbol && symbol<=ul) {dtransition("in " #ll ".." #ul)
		#define any \
				{dtransition("any")
		#define move(state) \
				goto s_##state;}
		#define emove(state) \
				goto e_##state;}
		#define final(value) \
				v_final = (value); break;}
	#define etats \
				v_final = -1; break;
#define afd(var) \
		} \
		var = v_final; \
	}

		
   
   

Character Transducer

   
top
 
section    top

8. Character finite transducer defines :: The character transducer augments the basic DFA with the ability to construct a string based on the input symbols. The symbol type is int, with characters represented by nonnegative integers and the distinguished value EOF. The getsymbol routine must return compatiable values. The output string is initialised to an empty string. It is appended to with append or shift call.

dfa-machine ::= transducer-call dfa-state... endtransducer-call

statement ::= append-call

next-state ::= shift-call

transducer-call ::= transducer(initial,getsymbol,cookie)

initial: The initial state of the machine.
getsymbol: A function of the type int getsymbol(ClientData). This is called to acquire each new symbol as needed.
cookie: The parameter to the getsymbol function on each call.

endtransducer-call ::= endtransducer(var,string-var)

var: An integer lvalue which assigned the state-code from the final-call.
var: A char* lvalue which assigned the result string. The caller is responsible for disposing the string.

append-call ::= append(substring)

The substring is appended to the result string.

shift-call ::= shift(state)

Append the current symbol to the result string. (The symbol can be modified first by modifying the variable symbol.) Then continue at the named state. The getsymbol routine is called to get a new current symbol.


#define transducer(initial,getsymbol,cookie) \
	{ \
		Tcl_DString v_string; char v_r[1]; Tcl_DStringInit(&v_string); \
		dfa(initial,int,getsymbol,cookie)
	#define shift(state) \
			*v_r = symbol; Tcl_DStringAppend(&v_string,v_r,1); move(state)
	#define append(string) \
			Tcl_DStringAppend(&v_string,string,-1)
#define endtransducer(var,stringvar) \
		afd(var) \
		(stringvar) = Tcl_Alloc(Tcl_DStringLength(&v_string)+1); \
		strcpy(stringvar,Tcl_DStringValue(&v_string)); \
		Tcl_DStringFree(&v_string); \
	}

		
   
   

Channel Reader

   
top
 
section    top

#define channeldfa(initial,channel) transducer(initial,cgetc,channel)

		
   
   

String Source

   
top
 
section    top

#define stringdfa(initial,object) \
	{ \
		WyrmDfaString v_strobj; v_strobj.o = incr(object); \
		v_strobj.s = Tcl_GetStringFromObj(v_strobj.o,&v_strobj.n); \
		transducer(initial,wyrm_dfaString,&v_strobj)
#define endstring(var,stringvar) \
		endtransducer(var,stringvar) \
		decr(v_strobj.o); \
	}

typedef struct {int n; chars s; Obj o;} WyrmDfaString;
int wyrm_dfaString(ClientData data);

		
 
section    top

int wyrm_dfaString(ClientData data) {
	WyrmDfaString *st = data;
	if (st->s && st->n>0) {
		int ch = 0xFF & st->s[0]; st->s += 1; st->n -= 1; return ch;
	}else {
		return EOF;
	}
}

		
   
   

Package Interface

   
top

#ifdef TESTING
	#include <ctype.h>
	
	static int Ndfa; static Obj *Pdfa;
	static int Qdfa(ClientData clientData) {
		if (Ndfa<=0) {
			return -1;
		}
		int x = -1;
		Tcl_GetIntFromObj(0,*Pdfa,&x);
		Pdfa++; Ndfa--;
		return x;
	}

	static int qdfacommand(ClientData clientData,Intr intr,int N,Tcl_Obj *const P[]) {
		int machine;
		if (Tcl_GetIntFromObj(intr,P[1],&machine)!=TCL_OK) return TCL_ERROR;
		switch (machine) {
			case 1: {
				int fs; Ndfa = N-2; Pdfa = P+2;
				dfa(even,int,Qdfa,0)
					state(even)
						is(0) move(even)
						is(1) move(odd)
						is(2) emove(two)
						is(-1) final(2)
					etats
					state(odd)
						is(0) move(odd)
						is(1) move(even)
						is(-1) final(1)
					etats
					state(two)
						is(0) move(two)
						in(1,4) emove(three)
						is(5) final(3)
					etats
					state(three)
						is(1) move(two)
						is(2) move(four)
						any move(two)
					etats
					state(four)
						is(3) emove(two)
					etats
				afd(fs)
				Tcl_SetObjResult(intr,Tcl_NewIntObj(fs));
			}	return TCL_OK;
			case 2: {
				int fs; chars st; Obj E[2]; Ndfa = N-2; Pdfa = P+2;
				transducer(start,Qdfa,0)
					state(start)
						is(65) shift(trailer)
						is(32) move(start)
					etats
					state(trailer)
						is(65) shift(trailer)
						is(97) symbol -= 32; shift(trailer)
						is(48) shift(trailer)
						is(1) append("/"); move(trailer)
						is(32) move(trailer)
						is(0) final(0)
					etats
				endtransducer(fs,st)
				E[0] = Tcl_NewIntObj(fs);
				E[1] = Tcl_NewStringObj(st,-1);
				Tcl_SetObjResult(intr,Tcl_NewListObj(2,E));
				dispose(st);
			}	return TCL_OK;
			case 3: {
				int mood; Tcl_Channel channel = Tcl_GetChannel(intr,Tcl_GetString(P[2]),&mood);
				int fs; chars st; Obj E[2];
				channeldfa(prefix,channel)
					state(prefix)
						is('+') append(" PRE"); shift(prefix)
						is('-') append(" PRE"); shift(prefix)
						any emove(primary)
					etats
					state(primary)
						in('a','z') append(" "); shift(infix)
						in('A','Z') append(" "); shift(infix)
						in('0','9') append(" "); shift(number)
					etats
					state(number)
						in('0','9') shift(number)
						any emove(infix)
					etats
					state(infix)
						is('+') append(" IN"); shift(prefix)
						is('-') append(" IN"); shift(prefix)
						is('*') append(" IN"); shift(prefix)
						is('/') append(" IN"); shift(prefix)
						is('.') append(" END"); final(1)
						is(EOF) append(" END"); final(0)
					etats
				endtransducer(fs,st)
				E[0] = Tcl_NewIntObj(fs);
				E[1] = Tcl_NewStringObj(st,-1);
				Tcl_SetObjResult(intr,Tcl_NewListObj(2,E));
				dispose(st);
			}	return TCL_OK;
			case 4: {
				int fs; chars st; Obj E[2]; int ec;
				stringdfa(init,P[2])
					state(init)
						is('+') ec = 1; shift(tao)
						is('-') ec = 1; shift(tao)
						is('/') ec = 2; shift(tao)
						is('*') ec = 2; shift(tao)
						is(':') ec = 2; shift(tao)
						is('=') ec = 2; shift(tao)
						in('a','z') shift(tag)
						in('A','Z') symbol = tolower(symbol); shift(tag)
						is('_') shift(tag)
						in('0','9') shift(integer)
						is('\"') move(string)
						is(' ') move(init)
						is('#') move(comment)
					etats
					state(tao)
						is('/') shift(tao)
						is('*') shift(tao)
						is(':') shift(tao)
						is('=') shift(tao)
						any final(ec)
					etats
					state(tag)
						in('a','z') shift(tag)
						in('A','Z') symbol = tolower(symbol); shift(tag)
						is('_') shift(tag)
						in('0','9') shift(tag)
						any final(3)
					etats
					state(integer)
						is('_') move(integer)
						in('0','9') shift(integer)
						any final(4)
					etats
					state(string)
						is('\"') move(quote)
						is(EOF) final(5)
						any shift(string)
					etats
					state(quote)
						is('\"')shift(string)
						any final(5)
					etats
					state(comment)
						is('#') move(init)
						is(EOF) final(6)
						any move(comment)
					etats
				endstring(fs,st)
				E[0] = Tcl_NewIntObj(fs);
				E[1] = Tcl_NewStringObj(st,-1);
				Tcl_SetObjResult(intr,Tcl_NewListObj(2,E));
				dispose(st);
			}	return TCL_OK;
		}
		Tcl_Panic("you can't get here from there");
	}
#endif

int Wyrmdfa_Init(Intr intr) {
	Tcl_PkgRequire(intr,"wyrmwif","1",0);
	Tcl_PkgProvide(intr,"wyrmdfa",VERSION);
	#ifdef TESTING
		Tcl_CreateObjCommand(intr,"::wyrm::qdfa",qdfacommand,0,0);
		return Tcl_VarEval(intr,"namespace eval ::wyrm {namespace export qdfa}",0);
	#else
		return TCL_OK;
	#endif
}

int Wyrmdfa_SafeInit(Intr intr) {
	return Wyrmdfa_Init(intr);
}

		
 
section    top
    DFA Callback dfas.
      DFA000
      Plain DFA.
        Plain moves, is, and final.
          DFA100
        E-moves.
          DFA101
        Range match.
          DFA102
        Any match.
          DFA103
        Nothing matched initial state.
          DFA104
        Nothing matched subsequent state.
          DFA105
        Empty input.
          DFA106
      Transducer.
        Shifts.
          DFA200
        Filterred hifts.
          DFA201
        Shifts and moves.
          DFA202
        Appends.
          DFA203
        Nothing matched initial state.
          DFA204
        Nothing matched subsequent state.
          DFA205
        Empty input.
          DFA206
      File reader.
        Shifts and appends.
          DFA300
        Read to EOF.
          DFA301
        Nothing matched initial state.
          DFA302
        Nothing matched subsequent state.
          DFA303
        Empty input.
          DFA304
      String reader.
        Shifts.
          DFA400
        Filterred shifts.
          DFA401
        Read to end of string.
          DFA402
        Nothing matched initial state.
          DFA403
        Nothing matched subsequent state.
          DFA404
        Empty input.
          DFA405