parsing_combinators.txt revision 608:7e06bf1dcb09
1221828SgrehanParsing
2221828Sgrehan
3221828Sgrehan- primtive types
4221828Sgrehan- lists of various sorts
5221828Sgrehan
6221828Sgrehan    Basic models
7221828Sgrehan
8221828Sgrehan	prefix		A,BCDF	    scans left-to-right
9221828Sgrehan	suffix		ABCD:F	    scans right-to-left
10221828Sgrehan
11221828Sgrehan    Model 1:
12221828Sgrehan    data is sep,op,next,combine
13221828Sgrehan
14221828Sgrehan	if (sep found in data)
15221828Sgrehan	    split data into prefix, rest
16221828Sgrehan	    return combine( op(prefix), next(rest) )
17221828Sgrehan	else
18221828Sgrehan	    return combine( default, next(data) )
19221828Sgrehan
20221828Sgrehan    pop = suffix1( ":", op3, op4, ++ )
21221828Sgrehan    op = prefix1( ",", op2, pop, ++ )
22221828Sgrehan
23221828Sgrehan    op( A,BDF.FGH:Z ) =
24221828Sgrehan
25221828Sgrehan	op2(A) ++ pop( BDF.FGH:Z ) =
26221828Sgrehan	op2(A) ++ op4( BDF.FGH ) ++ op3( Z )
27221828Sgrehan    
28221828Sgrehan    Model 2:
29221828Sgrehan    data is sep,op,next,combine
30221828Sgrehan
31221828Sgrehan	if (sep found in data)
32270070Sgrehan	    split data into prefix, rest
33270070Sgrehan	    return combine( op(prefix), next(rest) )
34270070Sgrehan	else
35284894Sneel	    return op(data)
36284894Sneel
37284894Sneel    example
38284894Sneel	op = prefix2( ":", op2, null, ++ ) 
39295124Sgrehan	op.setNext( op ) ;
40284894Sneel
41268976Sjhb	op( A:B:C:D ) =
42221828Sgrehan
43240922Sneel	op2(A) ++ op2(B) ++ op2(C) ++ op2(D)
44221828Sgrehan
45248477Sneel
46248477Sneel
47248477Sneelreduce( sep, initial, op, combine )
48248477Sneel
49248477Sneel    operate( data )
50248477Sneel	if (sep in data)
51248477Sneel	    split data into prefix, rest
52248477Sneel	    return combine( op(prefix), operate( rest ))
53248477Sneel	else
54248477Sneel	    return combine( op(data), initial ) 
55295124Sgrehan
56295124Sgrehanreduce( sep, op1, op2, initial, combine )
57295124Sgrehan
58268953Sjhb    operate(data)
59295124Sgrehan	if (sep in data)	// either first from left orfirst from right
60268953Sjhb	    split data into prefix, rest
61295124Sgrehan	    return combine( op1( prefix ), op2( rest ) )
62295124Sgrehan	else
63295124Sgrehan	    return opx( data )
64295124Sgrehan
65295124Sgrehantype<X,Y>
66295124Sgrehan    class combine
67295124Sgrehan	init : X
68295124Sgrehan	add( X, Y )  : X
69295124Sgrehan    op1( String ) : X
70295124Sgrehan    op2( String ) : Y
71295124Sgrehan
72295124Sgrehan    reduce( sep, op1, op2, comb ) ( String ) : X 
73295124Sgrehan
74295124Sgrehan    operate(data)
75295124Sgrehan	if (sep in data)	// either first from left orfirst from right
76295124Sgrehan	    split data into prefix, rest
77295124Sgrehan	    return comb.add( op2.operate( rest ), op1.operate( prefix ) )
78295124Sgrehan	else
79295124Sgrehan	    return comb.add( comb.init(), op1.operate( data ) )
80295124Sgrehan
81295124Sgrehan    example
82295124Sgrehan
83295124Sgrehan	op = reduce( ":", op1, null, comb )
84295124Sgrehan	op.setop2( op ) 
85295124Sgrehan
86295124Sgrehan	op.operate( "A:B:C" ) =
87295124Sgrehan	comb.add( op.operate( "B:C" ), op1.operate( "A" ) ) = 
88295124Sgrehan	comb.add( comb.add( op.operate("C"), op1.operate("B") ), op1.operate( "A" ) =
89295124Sgrehan	comb.add( comb.add( comb.add( comb.init(), op1.operate("C") ), op1.operate("B") ),
90295124Sgrehan	    op1.operate("A") )
91295124Sgrehan
92295124Sgrehan
93295124SgrehanSplitter interface
94295124Sgrehan
95295124Sgrehaninterface Splitter {
96295124Sgrehan    List split( String str ) 
97295124Sgrehan}
98295124Sgrehan    
99295124Sgrehanvariations:
100295124Sgrehan    - separated list		    SL
101295124Sgrehan    - first sep rest		    FSR
102295124Sgrehan	- fail if not present	    one arg
103295124Sgrehan	- default value		    two args
104221828Sgrehan    - rest sep last		    RSL
105221828Sgrehan	- fail if not present	    one arg
106221828Sgrehan	- default value		    two args
107256176Sneel
108248477SneelHave we just pushed the real problem off a level?
109248477Sneel
110256072SneelHow do we combine:
111284899Sneel    op1 = FSR("@",v12)
112284900Sneel    op2 = LSR(":",9090)
113248477Sneel
114248477Sneel    str = 1.2@myhost:2345
115268953Sjhb
116295124Sgrehan    op1(str) = ( "1.2" "myhost:2345" )
117270074Sgrehan
118270074Sgrehan    define splice( int index, Operator op ) on a list of strings, with op( String ) : (String)
119221828Sgrehan    to replace the indexth element of a list of strings with the list returned
120221828Sgrehan    from op( element ).
121221828Sgrehan
122221828Sgrehan    compose( op1, splice( 1, op2 )) is the correct parser.
123270159Sgrehan
124270159Sgrehan
125221828SgrehanA grammar for parsers?
126221828Sgrehan
127284894Sneelparser	:	    simple_parser
128268935Sjhb	|	    parser ":" simple_parser ;
129270071Sgrehan
130239026Sneelsimple_parser :	    ident
131284894Sneel	      |	    ident "(" param_list ")" ;
132284894Sneel
133221828Sgrehanparam_list    :	    param
134262350Sjhb	      |	    param_list "," param ;
135262350Sjhb
136261088Sjhbparam	      :	    constant
137261088Sjhb	      |	    parser ;
138261088Sjhb   
139267393Sjhbconstant is a Java constant
140268891Sjhbident x is interpreted as either a public static method on OperationFactory
141268891Sjhbnamed xAction which takes as arguments the types of the param list, or as
142268891Sjhbthe fully qualified class name of a class that implements Operation and has
143268972Sjhba constructor which takes as arguments the types of the param list.
144268972Sjhb
145221828SgrehanFrom parser table:
146221828Sgrehan
147241486SneeldebugFlags		string
148221828SgrehanORBInitialHost		string
149221828SgrehanORBInitialPort		integer
150221828SgrehanORBServerHost		string
151221828SgrehanORBServerPort		integer
152221828SgrehanorbId			string
153221828SgrehanhighWaterMark		integer
154221828SgrehanlowWaterMark		integer
155221828Sgrehanetc.
156262350Sjhb
157262350SjhbgiopVersion		construct(GIOPVersion.class):map(integer):list('.')
158262350SjhbgiopFragmentSize	mod(ORBConstants.GIOP_FRAGMENT_DIVISOR):min(ORBConstants.GIOP_FRAGMENT_SIZE):integer
159262350Sjhb
160262350SjhbLisp notation:
161221828Sgrehan    parse((mod ORBConstants.GIOP_FRAGMENT_DIVISOR) (min ...) (integer))
162270159Sgrehan
163270159Sgrehangiop11BuffMgr		makeMap(map) where map is constructed in java with
164270159Sgrehan			map.get("GROW") = Integer(0)
165221828Sgrehan			map.get("CLCT") = Integer(1)
166221828Sgrehan			map.get("STRM") = Integer(2)
167221828Sgrehan
168221828SgrehangiopTargetAddressPreference	intToShort:integerRange(0,3)
169221828SgrehangiopAddressDisposition		another map variant
170221828Sgrehan
171221828SgrehancharData		construct(CodeSetComponentInfo.class):string
172240922Sneel
173240922Sneel
174240922SneelWhat about corbaloc:?
175261088Sjhb
176261088Sjhbv12 = GIOPVersion.v12 ;
177268976Sjhb
178268976SjhbgiopVersion = construct( GIOPVersion.class ):mapSequence( [integer,integer] ):FSR(".") 
179268976Sjhb
180284900SneeliiopAddress =  mapSequence( [giopVersion,identity,integer] ):
181284900Sneel	       splice( 1, LSR( ":" 9090 )):
182284900Sneel	       FSR( "@", v12 )
183284900Sneel
184284900SneeladdressHandler = choice( 
185268976Sjhb    "iiop:",	iiopAddress
186270159Sgrehan    ":",	iiopAddress 
187284900Sneel)
188284900Sneel
189268976SjhbaddressList = map(addressHandler):SL(",")
190268976Sjhb
191268976Sjhbchoice( 
192268976Sjhb    "corbaloc:", mapSequence( [addressList,string] ):RSL("/", "NameService"),
193284894Sneel    "corbaname:", ...
194284894Sneel)
195268976Sjhb