parsing_combinators.txt revision 608:7e06bf1dcb09
1221828SgrehanParsing 2221828Sgrehan 3221828Sgrehan- primtive types 4221828Sgrehan- lists of various sorts 5221828Sgrehan 6221828Sgrehan Basic models 7221828Sgrehan 8221828Sgrehan prefix A,BCDF scans left-to-right 9221828Sgrehan suffix ABCD:F scans right-to-left 10221828Sgrehan 11221828Sgrehan Model 1: 12221828Sgrehan data is sep,op,next,combine 13221828Sgrehan 14221828Sgrehan if (sep found in data) 15221828Sgrehan split data into prefix, rest 16221828Sgrehan return combine( op(prefix), next(rest) ) 17221828Sgrehan else 18221828Sgrehan return combine( default, next(data) ) 19221828Sgrehan 20221828Sgrehan pop = suffix1( ":", op3, op4, ++ ) 21221828Sgrehan op = prefix1( ",", op2, pop, ++ ) 22221828Sgrehan 23221828Sgrehan op( A,BDF.FGH:Z ) = 24221828Sgrehan 25221828Sgrehan op2(A) ++ pop( BDF.FGH:Z ) = 26221828Sgrehan op2(A) ++ op4( BDF.FGH ) ++ op3( Z ) 27221828Sgrehan 28221828Sgrehan Model 2: 29221828Sgrehan data is sep,op,next,combine 30221828Sgrehan 31221828Sgrehan if (sep found in data) 32270070Sgrehan split data into prefix, rest 33270070Sgrehan return combine( op(prefix), next(rest) ) 34270070Sgrehan else 35284894Sneel return op(data) 36284894Sneel 37284894Sneel example 38284894Sneel op = prefix2( ":", op2, null, ++ ) 39295124Sgrehan op.setNext( op ) ; 40284894Sneel 41268976Sjhb op( A:B:C:D ) = 42221828Sgrehan 43240922Sneel op2(A) ++ op2(B) ++ op2(C) ++ op2(D) 44221828Sgrehan 45248477Sneel 46248477Sneel 47248477Sneelreduce( sep, initial, op, combine ) 48248477Sneel 49248477Sneel operate( data ) 50248477Sneel if (sep in data) 51248477Sneel split data into prefix, rest 52248477Sneel return combine( op(prefix), operate( rest )) 53248477Sneel else 54248477Sneel return combine( op(data), initial ) 55295124Sgrehan 56295124Sgrehanreduce( sep, op1, op2, initial, combine ) 57295124Sgrehan 58268953Sjhb operate(data) 59295124Sgrehan if (sep in data) // either first from left orfirst from right 60268953Sjhb split data into prefix, rest 61295124Sgrehan return combine( op1( prefix ), op2( rest ) ) 62295124Sgrehan else 63295124Sgrehan return opx( data ) 64295124Sgrehan 65295124Sgrehantype<X,Y> 66295124Sgrehan class combine 67295124Sgrehan init : X 68295124Sgrehan add( X, Y ) : X 69295124Sgrehan op1( String ) : X 70295124Sgrehan op2( String ) : Y 71295124Sgrehan 72295124Sgrehan reduce( sep, op1, op2, comb ) ( String ) : X 73295124Sgrehan 74295124Sgrehan operate(data) 75295124Sgrehan if (sep in data) // either first from left orfirst from right 76295124Sgrehan split data into prefix, rest 77295124Sgrehan return comb.add( op2.operate( rest ), op1.operate( prefix ) ) 78295124Sgrehan else 79295124Sgrehan return comb.add( comb.init(), op1.operate( data ) ) 80295124Sgrehan 81295124Sgrehan example 82295124Sgrehan 83295124Sgrehan op = reduce( ":", op1, null, comb ) 84295124Sgrehan op.setop2( op ) 85295124Sgrehan 86295124Sgrehan op.operate( "A:B:C" ) = 87295124Sgrehan comb.add( op.operate( "B:C" ), op1.operate( "A" ) ) = 88295124Sgrehan comb.add( comb.add( op.operate("C"), op1.operate("B") ), op1.operate( "A" ) = 89295124Sgrehan comb.add( comb.add( comb.add( comb.init(), op1.operate("C") ), op1.operate("B") ), 90295124Sgrehan op1.operate("A") ) 91295124Sgrehan 92295124Sgrehan 93295124SgrehanSplitter interface 94295124Sgrehan 95295124Sgrehaninterface Splitter { 96295124Sgrehan List split( String str ) 97295124Sgrehan} 98295124Sgrehan 99295124Sgrehanvariations: 100295124Sgrehan - separated list SL 101295124Sgrehan - first sep rest FSR 102295124Sgrehan - fail if not present one arg 103295124Sgrehan - default value two args 104221828Sgrehan - rest sep last RSL 105221828Sgrehan - fail if not present one arg 106221828Sgrehan - default value two args 107256176Sneel 108248477SneelHave we just pushed the real problem off a level? 109248477Sneel 110256072SneelHow do we combine: 111284899Sneel op1 = FSR("@",v12) 112284900Sneel op2 = LSR(":",9090) 113248477Sneel 114248477Sneel str = 1.2@myhost:2345 115268953Sjhb 116295124Sgrehan op1(str) = ( "1.2" "myhost:2345" ) 117270074Sgrehan 118270074Sgrehan define splice( int index, Operator op ) on a list of strings, with op( String ) : (String) 119221828Sgrehan to replace the indexth element of a list of strings with the list returned 120221828Sgrehan from op( element ). 121221828Sgrehan 122221828Sgrehan compose( op1, splice( 1, op2 )) is the correct parser. 123270159Sgrehan 124270159Sgrehan 125221828SgrehanA grammar for parsers? 126221828Sgrehan 127284894Sneelparser : simple_parser 128268935Sjhb | parser ":" simple_parser ; 129270071Sgrehan 130239026Sneelsimple_parser : ident 131284894Sneel | ident "(" param_list ")" ; 132284894Sneel 133221828Sgrehanparam_list : param 134262350Sjhb | param_list "," param ; 135262350Sjhb 136261088Sjhbparam : constant 137261088Sjhb | parser ; 138261088Sjhb 139267393Sjhbconstant is a Java constant 140268891Sjhbident x is interpreted as either a public static method on OperationFactory 141268891Sjhbnamed xAction which takes as arguments the types of the param list, or as 142268891Sjhbthe fully qualified class name of a class that implements Operation and has 143268972Sjhba constructor which takes as arguments the types of the param list. 144268972Sjhb 145221828SgrehanFrom parser table: 146221828Sgrehan 147241486SneeldebugFlags string 148221828SgrehanORBInitialHost string 149221828SgrehanORBInitialPort integer 150221828SgrehanORBServerHost string 151221828SgrehanORBServerPort integer 152221828SgrehanorbId string 153221828SgrehanhighWaterMark integer 154221828SgrehanlowWaterMark integer 155221828Sgrehanetc. 156262350Sjhb 157262350SjhbgiopVersion construct(GIOPVersion.class):map(integer):list('.') 158262350SjhbgiopFragmentSize mod(ORBConstants.GIOP_FRAGMENT_DIVISOR):min(ORBConstants.GIOP_FRAGMENT_SIZE):integer 159262350Sjhb 160262350SjhbLisp notation: 161221828Sgrehan parse((mod ORBConstants.GIOP_FRAGMENT_DIVISOR) (min ...) (integer)) 162270159Sgrehan 163270159Sgrehangiop11BuffMgr makeMap(map) where map is constructed in java with 164270159Sgrehan map.get("GROW") = Integer(0) 165221828Sgrehan map.get("CLCT") = Integer(1) 166221828Sgrehan map.get("STRM") = Integer(2) 167221828Sgrehan 168221828SgrehangiopTargetAddressPreference intToShort:integerRange(0,3) 169221828SgrehangiopAddressDisposition another map variant 170221828Sgrehan 171221828SgrehancharData construct(CodeSetComponentInfo.class):string 172240922Sneel 173240922Sneel 174240922SneelWhat about corbaloc:? 175261088Sjhb 176261088Sjhbv12 = GIOPVersion.v12 ; 177268976Sjhb 178268976SjhbgiopVersion = construct( GIOPVersion.class ):mapSequence( [integer,integer] ):FSR(".") 179268976Sjhb 180284900SneeliiopAddress = mapSequence( [giopVersion,identity,integer] ): 181284900Sneel splice( 1, LSR( ":" 9090 )): 182284900Sneel FSR( "@", v12 ) 183284900Sneel 184284900SneeladdressHandler = choice( 185268976Sjhb "iiop:", iiopAddress 186270159Sgrehan ":", iiopAddress 187284900Sneel) 188284900Sneel 189268976SjhbaddressList = map(addressHandler):SL(",") 190268976Sjhb 191268976Sjhbchoice( 192268976Sjhb "corbaloc:", mapSequence( [addressList,string] ):RSL("/", "NameService"), 193284894Sneel "corbaname:", ... 194284894Sneel) 195268976Sjhb