1/* -*- Mode: C; indent-tabs-mode:nil; c-basic-offset: 8-*- */ 2 3/* 4 * This file is part of The Croco Library 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of version 2.1 of the GNU Lesser General Public 8 * License as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 18 * USA 19 * 20 * Author: Dodji Seketeli 21 * See the COPYRIGHTS file for copyrights information. 22 */ 23 24/** 25 *@file 26 *The definition of the #CRTknzr (tokenizer) 27 *class. 28 */ 29 30#include <config.h> 31#include "string.h" 32#include "cr-tknzr.h" 33#include "cr-doc-handler.h" 34 35struct _CRTknzrPriv { 36 /**The parser input stream of bytes*/ 37 CRInput *input; 38 39 /** 40 *A cache where tknzr_unget_token() 41 *puts back the token. tknzr_get_next_token() 42 *first look in this cache, and if and 43 *only if it's empty, fetches the next token 44 *from the input stream. 45 */ 46 CRToken *token_cache; 47 48 /** 49 *The position of the end of the previous token 50 *or char fetched. 51 */ 52 CRInputPos prev_pos; 53 54 CRDocHandler *sac_handler; 55 56 /** 57 *The reference count of the current instance 58 *of #CRTknzr. Is manipulated by cr_tknzr_ref() 59 *and cr_tknzr_unref(). 60 */ 61 glong ref_count; 62}; 63 64#define PRIVATE(obj) ((obj)->priv) 65 66/** 67 *return TRUE if the character is a number ([0-9]), FALSE otherwise 68 *@param a_char the char to test. 69 */ 70#define IS_NUM(a_char) (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE) 71 72/** 73 *Checks if 'status' equals CR_OK. If not, goto the 'error' label. 74 * 75 *@param status the status (of type enum CRStatus) to test. 76 *@param is_exception if set to FALSE, the final status returned the 77 *current function will be CR_PARSING_ERROR. If set to TRUE, the 78 *current status will be the current value of the 'status' variable. 79 * 80 */ 81#define CHECK_PARSING_STATUS(status, is_exception) \ 82if ((status) != CR_OK) \ 83{ \ 84 if (is_exception == FALSE) \ 85 { \ 86 status = CR_PARSING_ERROR ; \ 87 } \ 88 goto error ; \ 89} 90 91/** 92 *Peeks the next char from the input stream of the current tokenizer. 93 *invokes CHECK_PARSING_STATUS on the status returned by 94 *cr_tknzr_input_peek_char(). 95 * 96 *@param the current instance of #CRTkzr. 97 *@param to_char a pointer to the char where to store the 98 *char peeked. 99 */ 100#define PEEK_NEXT_CHAR(a_tknzr, a_to_char) \ 101{\ 102status = cr_tknzr_peek_char (a_tknzr, a_to_char) ; \ 103CHECK_PARSING_STATUS (status, TRUE) \ 104} 105 106/** 107 *Reads the next char from the input stream of the current parser. 108 *In case of error, jumps to the "error:" label located in the 109 *function where this macro is called. 110 *@param parser the curent instance of #CRTknzr 111 *@param to_char a pointer to the guint32 char where to store 112 *the character read. 113 */ 114#define READ_NEXT_CHAR(a_tknzr, to_char) \ 115status = cr_tknzr_read_char (a_tknzr, to_char) ;\ 116CHECK_PARSING_STATUS (status, TRUE) 117 118/** 119 *Gets information about the current position in 120 *the input of the parser. 121 *In case of failure, this macro returns from the 122 *calling function and 123 *returns a status code of type enum #CRStatus. 124 *@param parser the current instance of #CRTknzr. 125 *@param pos out parameter. A pointer to the position 126 *inside the current parser input. Must 127 */ 128#define RECORD_INITIAL_POS(a_tknzr, a_pos) \ 129status = cr_input_get_cur_pos (PRIVATE \ 130(a_tknzr)->input, a_pos) ; \ 131g_return_val_if_fail (status == CR_OK, status) 132 133/** 134 *Gets the address of the current byte inside the 135 *parser input. 136 *@param parser the current instance of #CRTknzr. 137 *@param addr out parameter a pointer (guchar*) 138 *to where the address must be put. 139 */ 140#define RECORD_CUR_BYTE_ADDR(a_tknzr, a_addr) \ 141status = cr_input_get_cur_byte_addr \ 142 (PRIVATE (a_tknzr)->input, a_addr) ; \ 143CHECK_PARSING_STATUS (status, TRUE) 144 145/** 146 *Peeks a byte from the topmost parser input at 147 *a given offset from the current position. 148 *If it fails, goto the "error:" label. 149 * 150 *@param a_parser the current instance of #CRTknzr. 151 *@param a_offset the offset of the byte to peek, the 152 *current byte having the offset '0'. 153 *@param a_byte_ptr out parameter a pointer (guchar*) to 154 *where the peeked char is to be stored. 155 */ 156#define PEEK_BYTE(a_tknzr, a_offset, a_byte_ptr) \ 157status = cr_tknzr_peek_byte (a_tknzr, \ 158 a_offset, \ 159 a_byte_ptr) ; \ 160CHECK_PARSING_STATUS (status, TRUE) ; 161 162#define BYTE(a_input, a_n, a_eof) \ 163cr_input_peek_byte2 (a_input, a_n, a_eof) 164 165/** 166 *Reads a byte from the topmost parser input 167 *steam. 168 *If it fails, goto the "error" label. 169 *@param a_parser the current instance of #CRTknzr. 170 *@param a_byte_ptr the guchar * where to put the read char. 171 */ 172#define READ_NEXT_BYTE(a_tknzr, a_byte_ptr) \ 173status = \ 174cr_input_read_byte (PRIVATE (a_tknzr)->input, a_byte_ptr) ;\ 175CHECK_PARSING_STATUS (status, TRUE) ; 176 177/** 178 *Skips a given number of byte in the topmost 179 *parser input. Don't update line and column number. 180 *In case of error, jumps to the "error:" label 181 *of the surrounding function. 182 *@param a_parser the current instance of #CRTknzr. 183 *@param a_nb_bytes the number of bytes to skip. 184 */ 185#define SKIP_BYTES(a_tknzr, a_nb_bytes) \ 186status = cr_input_seek_index (PRIVATE (a_tknzr)->input, \ 187 CR_SEEK_CUR, a_nb_bytes) ; \ 188CHECK_PARSING_STATUS (status, TRUE) ; 189 190/** 191 *Skip utf8 encoded characters. 192 *Updates line and column numbers. 193 *@param a_parser the current instance of #CRTknzr. 194 *@param a_nb_chars the number of chars to skip. Must be of 195 *type glong. 196 */ 197#define SKIP_CHARS(a_tknzr, a_nb_chars) \ 198{ \ 199glong nb_chars = a_nb_chars ; \ 200status = cr_input_consume_chars \ 201 (PRIVATE (a_tknzr)->input,0, &nb_chars) ; \ 202CHECK_PARSING_STATUS (status, TRUE) ; \ 203} 204 205/** 206 *Tests the condition and if it is false, sets 207 *status to "CR_PARSING_ERROR" and goto the 'error' 208 *label. 209 *@param condition the condition to test. 210 */ 211#define ENSURE_PARSING_COND(condition) \ 212if (! (condition)) {status = CR_PARSING_ERROR; goto error ;} 213 214static enum CRStatus cr_tknzr_parse_nl (CRTknzr * a_this, 215 guchar ** a_start, 216 guchar ** a_end, 217 CRParsingLocation *a_location); 218 219static enum CRStatus cr_tknzr_parse_w (CRTknzr * a_this, 220 guchar ** a_start, 221 guchar ** a_end, 222 CRParsingLocation *a_location) ; 223 224static enum CRStatus cr_tknzr_parse_unicode_escape (CRTknzr * a_this, 225 guint32 * a_unicode, 226 CRParsingLocation *a_location) ; 227 228static enum CRStatus cr_tknzr_parse_escape (CRTknzr * a_this, 229 guint32 * a_esc_code, 230 CRParsingLocation *a_location); 231 232static enum CRStatus cr_tknzr_parse_string (CRTknzr * a_this, 233 CRString ** a_str); 234 235static enum CRStatus cr_tknzr_parse_comment (CRTknzr * a_this, 236 CRString ** a_comment); 237 238static enum CRStatus cr_tknzr_parse_nmstart (CRTknzr * a_this, 239 guint32 * a_char, 240 CRParsingLocation *a_location); 241 242static enum CRStatus cr_tknzr_parse_num (CRTknzr * a_this, 243 CRNum ** a_num); 244 245/********************************** 246 *PRIVATE methods 247 **********************************/ 248 249/** 250 *Parses a "w" as defined by the css spec at [4.1.1]: 251 * w ::= [ \t\r\n\f]* 252 * 253 *@param a_this the current instance of #CRTknzr. 254 *@param a_start out param. Upon successfull completion, points 255 *to the beginning of the parsed white space, points to NULL otherwise. 256 *Can also point to NULL is there is no white space actually. 257 *@param a_end out param. Upon successfull completion, points 258 *to the end of the parsed white space, points to NULL otherwise. 259 *Can also point to NULL is there is no white space actually. 260 */ 261static enum CRStatus 262cr_tknzr_parse_w (CRTknzr * a_this, 263 guchar ** a_start, 264 guchar ** a_end, 265 CRParsingLocation *a_location) 266{ 267 guint32 cur_char = 0; 268 CRInputPos init_pos; 269 enum CRStatus status = CR_OK; 270 271 g_return_val_if_fail (a_this && PRIVATE (a_this) 272 && PRIVATE (a_this)->input 273 && a_start && a_end, 274 CR_BAD_PARAM_ERROR); 275 276 RECORD_INITIAL_POS (a_this, &init_pos); 277 278 *a_start = NULL; 279 *a_end = NULL; 280 281 READ_NEXT_CHAR (a_this, &cur_char); 282 283 if (cr_utils_is_white_space (cur_char) == FALSE) { 284 status = CR_PARSING_ERROR; 285 goto error; 286 } 287 if (a_location) { 288 cr_tknzr_get_parsing_location (a_this, 289 a_location) ; 290 } 291 RECORD_CUR_BYTE_ADDR (a_this, a_start); 292 *a_end = *a_start; 293 294 for (;;) { 295 gboolean is_eof = FALSE; 296 297 cr_input_get_end_of_file (PRIVATE (a_this)->input, &is_eof); 298 if (is_eof) 299 break; 300 301 status = cr_tknzr_peek_char (a_this, &cur_char); 302 if (status == CR_END_OF_INPUT_ERROR) { 303 status = CR_OK; 304 break; 305 } else if (status != CR_OK) { 306 goto error; 307 } 308 309 if (cr_utils_is_white_space (cur_char) == TRUE) { 310 READ_NEXT_CHAR (a_this, &cur_char); 311 RECORD_CUR_BYTE_ADDR (a_this, a_end); 312 } else { 313 break; 314 } 315 } 316 317 return CR_OK; 318 319 error: 320 cr_tknzr_set_cur_pos (a_this, &init_pos); 321 322 return status; 323} 324 325/** 326 *Parses a newline as defined in the css2 spec: 327 * nl ::= \n|\r\n|\r|\f 328 * 329 *@param a_this the "this pointer" of the current instance of #CRTknzr. 330 *@param a_start a pointer to the first character of the successfully 331 *parsed string. 332 *@param a_end a pointer to the last character of the successfully parsed 333 *string. 334 *@result CR_OK uppon successfull completion, an error code otherwise. 335 */ 336static enum CRStatus 337cr_tknzr_parse_nl (CRTknzr * a_this, 338 guchar ** a_start, 339 guchar ** a_end, 340 CRParsingLocation *a_location) 341{ 342 CRInputPos init_pos; 343 guchar next_chars[2] = { 0 }; 344 enum CRStatus status = CR_PARSING_ERROR; 345 346 g_return_val_if_fail (a_this && PRIVATE (a_this) 347 && a_start && a_end, CR_BAD_PARAM_ERROR); 348 349 RECORD_INITIAL_POS (a_this, &init_pos); 350 351 PEEK_BYTE (a_this, 1, &next_chars[0]); 352 PEEK_BYTE (a_this, 2, &next_chars[1]); 353 354 if ((next_chars[0] == '\r' && next_chars[1] == '\n')) { 355 SKIP_BYTES (a_this, 1); 356 if (a_location) { 357 cr_tknzr_get_parsing_location 358 (a_this, a_location) ; 359 } 360 SKIP_CHARS (a_this, 1); 361 362 RECORD_CUR_BYTE_ADDR (a_this, a_end); 363 364 status = CR_OK; 365 } else if (next_chars[0] == '\n' 366 || next_chars[0] == '\r' || next_chars[0] == '\f') { 367 SKIP_CHARS (a_this, 1); 368 if (a_location) { 369 cr_tknzr_get_parsing_location 370 (a_this, a_location) ; 371 } 372 RECORD_CUR_BYTE_ADDR (a_this, a_start); 373 *a_end = *a_start; 374 status = CR_OK; 375 } else { 376 status = CR_PARSING_ERROR; 377 goto error; 378 } 379 return CR_OK ; 380 381 error: 382 cr_tknzr_set_cur_pos (a_this, &init_pos) ; 383 return status; 384} 385 386/** 387 *Go ahead in the parser input, skipping all the spaces. 388 *If the next char if not a white space, this function does nothing. 389 *In any cases, it stops when it encounters a non white space character. 390 * 391 *@param a_this the current instance of #CRTknzr. 392 *@return CR_OK upon successfull completion, an error code otherwise. 393 */ 394static enum CRStatus 395cr_tknzr_try_to_skip_spaces (CRTknzr * a_this) 396{ 397 enum CRStatus status = CR_ERROR; 398 guint32 cur_char = 0; 399 400 g_return_val_if_fail (a_this && PRIVATE (a_this) 401 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); 402 403 status = cr_input_peek_char (PRIVATE (a_this)->input, &cur_char); 404 405 if (status != CR_OK) { 406 if (status == CR_END_OF_INPUT_ERROR) 407 return CR_OK; 408 return status; 409 } 410 411 if (cr_utils_is_white_space (cur_char) == TRUE) { 412 glong nb_chars = -1; /*consume all spaces */ 413 414 status = cr_input_consume_white_spaces 415 (PRIVATE (a_this)->input, &nb_chars); 416 } 417 418 return status; 419} 420 421/** 422 *Parses a "comment" as defined in the css spec at [4.1.1]: 423 *COMMENT ::= \/\*[^*]*\*+([^/][^*]*\*+)*\/ . 424 *This complex regexp is just to say that comments start 425 *with the two chars '/''*' and ends with the two chars '*''/'. 426 *It also means that comments cannot be nested. 427 *So based on that, I've just tried to implement the parsing function 428 *simply and in a straight forward manner. 429 */ 430static enum CRStatus 431cr_tknzr_parse_comment (CRTknzr * a_this, 432 CRString ** a_comment) 433{ 434 enum CRStatus status = CR_OK; 435 CRInputPos init_pos; 436 guint32 cur_char = 0, next_char= 0; 437 CRString *comment = NULL; 438 CRParsingLocation loc = {0} ; 439 440 g_return_val_if_fail (a_this && PRIVATE (a_this) 441 && PRIVATE (a_this)->input, 442 CR_BAD_PARAM_ERROR); 443 444 RECORD_INITIAL_POS (a_this, &init_pos); 445 READ_NEXT_CHAR (a_this, &cur_char) ; 446 ENSURE_PARSING_COND (cur_char == '/'); 447 cr_tknzr_get_parsing_location (a_this, &loc) ; 448 449 READ_NEXT_CHAR (a_this, &cur_char); 450 ENSURE_PARSING_COND (cur_char == '*'); 451 comment = cr_string_new (); 452 for (;;) { 453 READ_NEXT_CHAR (a_this, &cur_char); 454 455 /*make sure there are no nested comments */ 456 if (cur_char == '/') { 457 READ_NEXT_CHAR (a_this, &cur_char); 458 ENSURE_PARSING_COND (cur_char != '*'); 459 g_string_append_c (comment->stryng, '/'); 460 g_string_append_unichar (comment->stryng, 461 cur_char); 462 continue; 463 } 464 465 /*Detect the end of the comments region */ 466 if (cur_char == '*') { 467 PEEK_NEXT_CHAR (a_this, &next_char); 468 469 if (next_char == '/') { 470 /* 471 *end of comments region 472 *Now, call the right SAC callback. 473 */ 474 SKIP_CHARS (a_this, 1) ; 475 status = CR_OK; 476 break; 477 } else { 478 g_string_append_c (comment->stryng, 479 '*'); 480 } 481 } 482 g_string_append_unichar (comment->stryng, cur_char); 483 } 484 485 if (status == CR_OK) { 486 cr_parsing_location_copy (&comment->location, 487 &loc) ; 488 *a_comment = comment; 489 return CR_OK; 490 } 491 error: 492 493 if (comment) { 494 cr_string_destroy (comment); 495 comment = NULL; 496 } 497 498 cr_tknzr_set_cur_pos (a_this, &init_pos); 499 500 return status; 501} 502 503/** 504 *Parses an 'unicode' escape sequence defined 505 *in css spec at chap 4.1.1: 506 *unicode ::= \\[0-9a-f]{1,6}[ \n\r\t\f]? 507 *@param a_this the current instance of #CRTknzr. 508 *@param a_start out parameter. A pointer to the start 509 *of the unicode escape sequence. Must *NOT* be deleted by 510 *the caller. 511 *@param a_end out parameter. A pointer to the last character 512 *of the unicode escape sequence. Must *NOT* be deleted by the caller. 513 *@return CR_OK if parsing succeded, an error code otherwise. 514 *Error code can be either CR_PARSING_ERROR if the string 515 *parsed just doesn't 516 *respect the production or another error if a 517 *lower level error occured. 518 */ 519static enum CRStatus 520cr_tknzr_parse_unicode_escape (CRTknzr * a_this, 521 guint32 * a_unicode, 522 CRParsingLocation *a_location) 523{ 524 guint32 cur_char; 525 CRInputPos init_pos; 526 glong occur = 0; 527 guint32 unicode = 0; 528 guchar *tmp_char_ptr1 = NULL, 529 *tmp_char_ptr2 = NULL; 530 enum CRStatus status = CR_OK; 531 532 g_return_val_if_fail (a_this && PRIVATE (a_this) 533 && a_unicode, CR_BAD_PARAM_ERROR); 534 535 /*first, let's backup the current position pointer */ 536 RECORD_INITIAL_POS (a_this, &init_pos); 537 538 READ_NEXT_CHAR (a_this, &cur_char); 539 540 if (cur_char != '\\') { 541 status = CR_PARSING_ERROR; 542 goto error; 543 } 544 if (a_location) { 545 cr_tknzr_get_parsing_location 546 (a_this, a_location) ; 547 } 548 PEEK_NEXT_CHAR (a_this, &cur_char); 549 550 for (occur = 0, unicode = 0; ((cur_char >= '0' && cur_char <= '9') 551 || (cur_char >= 'a' && cur_char <= 'f') 552 || (cur_char >= 'A' && cur_char <= 'F')) 553 && occur < 6; occur++) { 554 gint cur_char_val = 0; 555 556 READ_NEXT_CHAR (a_this, &cur_char); 557 558 if ((cur_char >= '0' && cur_char <= '9')) { 559 cur_char_val = (cur_char - '0'); 560 } else if ((cur_char >= 'a' && cur_char <= 'f')) { 561 cur_char_val = 10 + (cur_char - 'a'); 562 } else if ((cur_char >= 'A' && cur_char <= 'F')) { 563 cur_char_val = 10 + (cur_char - 'A'); 564 } 565 566 unicode = unicode * 10 + cur_char_val; 567 568 PEEK_NEXT_CHAR (a_this, &cur_char); 569 } 570 571 if (occur == 5) { 572 /* 573 *the unicode escape is 6 digit length 574 */ 575 576 /* 577 *parse one space that may 578 *appear just after the unicode 579 *escape. 580 */ 581 cr_tknzr_parse_w (a_this, &tmp_char_ptr1, 582 &tmp_char_ptr2, NULL); 583 status = CR_OK; 584 } else { 585 /* 586 *The unicode escape is less than 587 *6 digit length. The character 588 *that comes right after the escape 589 *must be a white space. 590 */ 591 status = cr_tknzr_parse_w (a_this, &tmp_char_ptr1, 592 &tmp_char_ptr2, NULL); 593 } 594 595 if (status == CR_OK) { 596 *a_unicode = unicode; 597 return CR_OK; 598 } 599 600 error: 601 /* 602 *restore the initial position pointer backuped at 603 *the beginning of this function. 604 */ 605 cr_tknzr_set_cur_pos (a_this, &init_pos); 606 607 return status; 608} 609 610/** 611 *parses an escape sequence as defined by the css spec: 612 *escape ::= {unicode}|\\[ -~\200-\4177777] 613 *@param a_this the current instance of #CRTknzr . 614 */ 615static enum CRStatus 616cr_tknzr_parse_escape (CRTknzr * a_this, guint32 * a_esc_code, 617 CRParsingLocation *a_location) 618{ 619 enum CRStatus status = CR_OK; 620 guint32 cur_char = 0; 621 CRInputPos init_pos; 622 guchar next_chars[2]; 623 624 g_return_val_if_fail (a_this && PRIVATE (a_this) 625 && a_esc_code, CR_BAD_PARAM_ERROR); 626 627 RECORD_INITIAL_POS (a_this, &init_pos); 628 629 PEEK_BYTE (a_this, 1, &next_chars[0]); 630 PEEK_BYTE (a_this, 2, &next_chars[1]); 631 632 if (next_chars[0] != '\\') { 633 status = CR_PARSING_ERROR; 634 goto error; 635 } 636 637 if ((next_chars[1] >= '0' && next_chars[1] <= '9') 638 || (next_chars[1] >= 'a' && next_chars[1] <= 'f') 639 || (next_chars[1] >= 'A' && next_chars[1] <= 'F')) { 640 status = cr_tknzr_parse_unicode_escape (a_this, a_esc_code, 641 a_location); 642 } else { 643 /*consume the '\' char */ 644 READ_NEXT_CHAR (a_this, &cur_char); 645 if (a_location) { 646 cr_tknzr_get_parsing_location (a_this, 647 a_location) ; 648 } 649 /*then read the char after the '\' */ 650 READ_NEXT_CHAR (a_this, &cur_char); 651 652 if (cur_char != ' ' && (cur_char < 200 || cur_char > 4177777)) { 653 status = CR_PARSING_ERROR; 654 goto error; 655 } 656 *a_esc_code = cur_char; 657 658 } 659 if (status == CR_OK) { 660 return CR_OK; 661 } 662 error: 663 cr_tknzr_set_cur_pos (a_this, &init_pos); 664 return status; 665} 666 667/** 668 *Parses a string type as defined in css spec [4.1.1]: 669 * 670 *string ::= {string1}|{string2} 671 *string1 ::= \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\" 672 *string2 ::= \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\' 673 * 674 *@param a_this the current instance of #CRTknzr. 675 *@param a_start out parameter. Upon successfull completion, 676 *points to the beginning of the string, points to an undefined value 677 *otherwise. 678 *@param a_end out parameter. Upon successfull completion, points to 679 *the beginning of the string, points to an undefined value otherwise. 680 *@return CR_OK upon successfull completion, an error code otherwise. 681 */ 682static enum CRStatus 683cr_tknzr_parse_string (CRTknzr * a_this, CRString ** a_str) 684{ 685 guint32 cur_char = 0, 686 delim = 0; 687 CRInputPos init_pos; 688 enum CRStatus status = CR_OK; 689 CRString *str = NULL; 690 691 g_return_val_if_fail (a_this && PRIVATE (a_this) 692 && PRIVATE (a_this)->input 693 && a_str, CR_BAD_PARAM_ERROR); 694 695 RECORD_INITIAL_POS (a_this, &init_pos); 696 READ_NEXT_CHAR (a_this, &cur_char); 697 698 if (cur_char == '"') 699 delim = '"'; 700 else if (cur_char == '\'') 701 delim = '\''; 702 else { 703 status = CR_PARSING_ERROR; 704 goto error; 705 } 706 str = cr_string_new (); 707 if (str) { 708 cr_tknzr_get_parsing_location 709 (a_this, &str->location) ; 710 } 711 for (;;) { 712 guchar next_chars[2] = { 0 }; 713 714 PEEK_BYTE (a_this, 1, &next_chars[0]); 715 PEEK_BYTE (a_this, 2, &next_chars[1]); 716 717 if (next_chars[0] == '\\') { 718 guchar *tmp_char_ptr1 = NULL, 719 *tmp_char_ptr2 = NULL; 720 guint32 esc_code = 0; 721 722 if (next_chars[1] == '\'' || next_chars[1] == '"') { 723 g_string_append_unichar (str->stryng, 724 next_chars[1]); 725 SKIP_BYTES (a_this, 2); 726 status = CR_OK; 727 } else { 728 status = cr_tknzr_parse_escape 729 (a_this, &esc_code, NULL); 730 731 if (status == CR_OK) { 732 g_string_append_unichar 733 (str->stryng, 734 esc_code); 735 } 736 } 737 738 if (status != CR_OK) { 739 /* 740 *consume the '\' char, and try to parse 741 *a newline. 742 */ 743 READ_NEXT_CHAR (a_this, &cur_char); 744 745 status = cr_tknzr_parse_nl 746 (a_this, &tmp_char_ptr1, 747 &tmp_char_ptr2, NULL); 748 } 749 750 CHECK_PARSING_STATUS (status, FALSE); 751 } else if (strchr ("\t !#$%&", next_chars[0]) 752 || (next_chars[0] >= '(' && next_chars[0] <= '~')) { 753 READ_NEXT_CHAR (a_this, &cur_char); 754 g_string_append_unichar (str->stryng, 755 cur_char); 756 status = CR_OK; 757 } 758 759 else if (cr_utils_is_nonascii (next_chars[0])) { 760 READ_NEXT_CHAR (a_this, &cur_char); 761 g_string_append_unichar (str->stryng, cur_char); 762 } else if (next_chars[0] == delim) { 763 READ_NEXT_CHAR (a_this, &cur_char); 764 break; 765 } else { 766 status = CR_PARSING_ERROR; 767 goto error; 768 } 769 } 770 771 if (status == CR_OK) { 772 if (*a_str == NULL) { 773 *a_str = str; 774 str = NULL; 775 } else { 776 (*a_str)->stryng = g_string_append_len 777 ((*a_str)->stryng, 778 str->stryng->str, 779 str->stryng->len); 780 cr_string_destroy (str); 781 } 782 return CR_OK; 783 } 784 785 error: 786 787 if (str) { 788 cr_string_destroy (str) ; 789 str = NULL; 790 } 791 cr_tknzr_set_cur_pos (a_this, &init_pos); 792 return status; 793} 794 795/** 796 *Parses the an nmstart as defined by the css2 spec [4.1.1]: 797 * nmstart [a-zA-Z]|{nonascii}|{escape} 798 * 799 *@param a_this the current instance of #CRTknzr. 800 *@param a_start out param. A pointer to the starting point of 801 *the token. 802 *@param a_end out param. A pointer to the ending point of the 803 *token. 804 *@param a_char out param. The actual parsed nmchar. 805 *@return CR_OK upon successfull completion, 806 *an error code otherwise. 807 */ 808static enum CRStatus 809cr_tknzr_parse_nmstart (CRTknzr * a_this, 810 guint32 * a_char, 811 CRParsingLocation *a_location) 812{ 813 CRInputPos init_pos; 814 enum CRStatus status = CR_OK; 815 guint32 cur_char = 0, 816 next_char = 0; 817 818 g_return_val_if_fail (a_this && PRIVATE (a_this) 819 && PRIVATE (a_this)->input 820 && a_char, CR_BAD_PARAM_ERROR); 821 822 RECORD_INITIAL_POS (a_this, &init_pos); 823 824 PEEK_NEXT_CHAR (a_this, &next_char); 825 826 if (next_char == '\\') { 827 status = cr_tknzr_parse_escape (a_this, a_char, 828 a_location); 829 830 if (status != CR_OK) 831 goto error; 832 833 } else if (cr_utils_is_nonascii (next_char) == TRUE 834 || ((next_char >= 'a') && (next_char <= 'z')) 835 || ((next_char >= 'A') && (next_char <= 'Z')) 836 ) { 837 READ_NEXT_CHAR (a_this, &cur_char); 838 if (a_location) { 839 cr_tknzr_get_parsing_location (a_this, 840 a_location) ; 841 } 842 *a_char = cur_char; 843 status = CR_OK; 844 } else { 845 status = CR_PARSING_ERROR; 846 goto error; 847 } 848 849 return CR_OK; 850 851 error: 852 cr_tknzr_set_cur_pos (a_this, &init_pos); 853 854 return status; 855 856} 857 858/** 859 *Parses an nmchar as described in the css spec at 860 *chap 4.1.1: 861 *nmchar ::= [a-z0-9-]|{nonascii}|{escape} 862 * 863 *Humm, I have added the possibility for nmchar to 864 *contain upper case letters. 865 * 866 *@param a_this the current instance of #CRTknzr. 867 *@param a_start out param. A pointer to the starting point of 868 *the token. 869 *@param a_end out param. A pointer to the ending point of the 870 *token. 871 *@param a_char out param. The actual parsed nmchar. 872 *@return CR_OK upon successfull completion, 873 *an error code otherwise. 874 */ 875static enum CRStatus 876cr_tknzr_parse_nmchar (CRTknzr * a_this, guint32 * a_char, 877 CRParsingLocation *a_location) 878{ 879 guint32 cur_char = 0, 880 next_char = 0; 881 enum CRStatus status = CR_OK; 882 CRInputPos init_pos; 883 884 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char, 885 CR_BAD_PARAM_ERROR); 886 887 RECORD_INITIAL_POS (a_this, &init_pos); 888 889 status = cr_input_peek_char (PRIVATE (a_this)->input, 890 &next_char) ; 891 if (status != CR_OK) 892 goto error; 893 894 if (next_char == '\\') { 895 status = cr_tknzr_parse_escape (a_this, a_char, 896 a_location); 897 898 if (status != CR_OK) 899 goto error; 900 901 } else if (cr_utils_is_nonascii (next_char) == TRUE 902 || ((next_char >= 'a') && (next_char <= 'z')) 903 || ((next_char >= 'A') && (next_char <= 'Z')) 904 || ((next_char >= '0') && (next_char <= '9')) 905 || (next_char == '-') 906 || (next_char == '_') /*'_' not allowed by the spec. */ 907 ) { 908 READ_NEXT_CHAR (a_this, &cur_char); 909 *a_char = cur_char; 910 status = CR_OK; 911 if (a_location) { 912 cr_tknzr_get_parsing_location 913 (a_this, a_location) ; 914 } 915 } else { 916 status = CR_PARSING_ERROR; 917 goto error; 918 } 919 return CR_OK; 920 921 error: 922 cr_tknzr_set_cur_pos (a_this, &init_pos); 923 return status; 924} 925 926/** 927 *Parses an "ident" as defined in css spec [4.1.1]: 928 *ident ::= {nmstart}{nmchar}* 929 * 930 *Actually parses it using the css3 grammar: 931 *ident ::= -?{nmstart}{nmchar}* 932 *@param a_this the currens instance of #CRTknzr. 933 * 934 *@param a_str a pointer to parsed ident. If *a_str is NULL, 935 *this function allocates a new instance of CRString. If not, 936 *the function just appends the parsed string to the one passed. 937 *In both cases it is up to the caller to free *a_str. 938 * 939 *@return CR_OK upon successfull completion, an error code 940 *otherwise. 941 */ 942static enum CRStatus 943cr_tknzr_parse_ident (CRTknzr * a_this, CRString ** a_str) 944{ 945 guint32 tmp_char = 0; 946 CRString *stringue = NULL ; 947 CRInputPos init_pos; 948 enum CRStatus status = CR_OK; 949 gboolean location_is_set = FALSE ; 950 951 g_return_val_if_fail (a_this && PRIVATE (a_this) 952 && PRIVATE (a_this)->input 953 && a_str, CR_BAD_PARAM_ERROR); 954 955 RECORD_INITIAL_POS (a_this, &init_pos); 956 PEEK_NEXT_CHAR (a_this, &tmp_char) ; 957 stringue = cr_string_new () ; 958 g_return_val_if_fail (stringue, 959 CR_OUT_OF_MEMORY_ERROR) ; 960 961 if (tmp_char == '-') { 962 READ_NEXT_CHAR (a_this, &tmp_char) ; 963 cr_tknzr_get_parsing_location 964 (a_this, &stringue->location) ; 965 location_is_set = TRUE ; 966 g_string_append_unichar (stringue->stryng, 967 tmp_char) ; 968 } 969 status = cr_tknzr_parse_nmstart (a_this, &tmp_char, NULL); 970 if (status != CR_OK) { 971 status = CR_PARSING_ERROR; 972 goto end ; 973 } 974 if (location_is_set == FALSE) { 975 cr_tknzr_get_parsing_location 976 (a_this, &stringue->location) ; 977 location_is_set = TRUE ; 978 } 979 g_string_append_unichar (stringue->stryng, tmp_char); 980 for (;;) { 981 status = cr_tknzr_parse_nmchar (a_this, 982 &tmp_char, 983 NULL); 984 if (status != CR_OK) { 985 status = CR_OK ; 986 break; 987 } 988 g_string_append_unichar (stringue->stryng, tmp_char); 989 } 990 if (status == CR_OK) { 991 if (!*a_str) { 992 *a_str = stringue ; 993 994 } else { 995 g_string_append_len ((*a_str)->stryng, 996 stringue->stryng->str, 997 stringue->stryng->len) ; 998 cr_string_destroy (stringue) ; 999 } 1000 stringue = NULL ; 1001 } 1002 1003 error: 1004 end: 1005 if (stringue) { 1006 cr_string_destroy (stringue) ; 1007 stringue = NULL ; 1008 } 1009 if (status != CR_OK ) { 1010 cr_tknzr_set_cur_pos (a_this, &init_pos) ; 1011 } 1012 return status ; 1013} 1014 1015 1016/** 1017 *Parses a "name" as defined by css spec [4.1.1]: 1018 *name ::= {nmchar}+ 1019 * 1020 *@param a_this the current instance of #CRTknzr. 1021 * 1022 *@param a_str out parameter. A pointer to the successfully parsed 1023 *name. If *a_str is set to NULL, this function allocates a new instance 1024 *of CRString. If not, it just appends the parsed name to the passed *a_str. 1025 *In both cases, it is up to the caller to free *a_str. 1026 * 1027 *@return CR_OK upon successfull completion, an error code otherwise. 1028 */ 1029static enum CRStatus 1030cr_tknzr_parse_name (CRTknzr * a_this, 1031 CRString ** a_str) 1032{ 1033 guint32 tmp_char = 0; 1034 CRInputPos init_pos; 1035 enum CRStatus status = CR_OK; 1036 gboolean str_needs_free = FALSE, 1037 is_first_nmchar=TRUE ; 1038 glong i = 0; 1039 CRParsingLocation loc = {0} ; 1040 1041 g_return_val_if_fail (a_this && PRIVATE (a_this) 1042 && PRIVATE (a_this)->input 1043 && a_str, 1044 CR_BAD_PARAM_ERROR) ; 1045 1046 RECORD_INITIAL_POS (a_this, &init_pos); 1047 1048 if (*a_str == NULL) { 1049 *a_str = cr_string_new (); 1050 str_needs_free = TRUE; 1051 } 1052 for (i = 0;; i++) { 1053 if (is_first_nmchar == TRUE) { 1054 status = cr_tknzr_parse_nmchar 1055 (a_this, &tmp_char, 1056 &loc) ; 1057 is_first_nmchar = FALSE ; 1058 } else { 1059 status = cr_tknzr_parse_nmchar 1060 (a_this, &tmp_char, NULL) ; 1061 } 1062 if (status != CR_OK) 1063 break; 1064 g_string_append_unichar ((*a_str)->stryng, 1065 tmp_char); 1066 } 1067 if (i > 0) { 1068 cr_parsing_location_copy 1069 (&(*a_str)->location, &loc) ; 1070 return CR_OK; 1071 } 1072 if (str_needs_free == TRUE && *a_str) { 1073 cr_string_destroy (*a_str); 1074 *a_str = NULL; 1075 } 1076 cr_tknzr_set_cur_pos (a_this, &init_pos); 1077 return CR_PARSING_ERROR; 1078} 1079 1080/** 1081 *Parses a "hash" as defined by the css spec in [4.1.1]: 1082 *HASH ::= #{name} 1083 */ 1084static enum CRStatus 1085cr_tknzr_parse_hash (CRTknzr * a_this, CRString ** a_str) 1086{ 1087 guint32 cur_char = 0; 1088 CRInputPos init_pos; 1089 enum CRStatus status = CR_OK; 1090 gboolean str_needs_free = FALSE; 1091 CRParsingLocation loc = {0} ; 1092 1093 g_return_val_if_fail (a_this && PRIVATE (a_this) 1094 && PRIVATE (a_this)->input, 1095 CR_BAD_PARAM_ERROR); 1096 1097 RECORD_INITIAL_POS (a_this, &init_pos); 1098 READ_NEXT_CHAR (a_this, &cur_char); 1099 if (cur_char != '#') { 1100 status = CR_PARSING_ERROR; 1101 goto error; 1102 } 1103 if (*a_str == NULL) { 1104 *a_str = cr_string_new (); 1105 str_needs_free = TRUE; 1106 } 1107 cr_tknzr_get_parsing_location (a_this, 1108 &loc) ; 1109 status = cr_tknzr_parse_name (a_this, a_str); 1110 cr_parsing_location_copy (&(*a_str)->location, &loc) ; 1111 if (status != CR_OK) { 1112 goto error; 1113 } 1114 return CR_OK; 1115 1116 error: 1117 if (str_needs_free == TRUE && *a_str) { 1118 cr_string_destroy (*a_str); 1119 *a_str = NULL; 1120 } 1121 1122 cr_tknzr_set_cur_pos (a_this, &init_pos); 1123 return status; 1124} 1125 1126/** 1127 *Parses an uri as defined by the css spec [4.1.1]: 1128 * URI ::= url\({w}{string}{w}\) 1129 * |url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\) 1130 * 1131 *@param a_this the current instance of #CRTknzr. 1132 *@param a_str the successfully parsed url. 1133 *@return CR_OK upon successfull completion, an error code otherwise. 1134 */ 1135static enum CRStatus 1136cr_tknzr_parse_uri (CRTknzr * a_this, 1137 CRString ** a_str) 1138{ 1139 guint32 cur_char = 0; 1140 CRInputPos init_pos; 1141 enum CRStatus status = CR_PARSING_ERROR; 1142 guchar tab[4] = { 0 }, *tmp_ptr1 = NULL, *tmp_ptr2 = NULL; 1143 CRString *str = NULL; 1144 CRParsingLocation location = {0} ; 1145 1146 g_return_val_if_fail (a_this 1147 && PRIVATE (a_this) 1148 && PRIVATE (a_this)->input 1149 && a_str, 1150 CR_BAD_PARAM_ERROR); 1151 1152 RECORD_INITIAL_POS (a_this, &init_pos); 1153 1154 PEEK_BYTE (a_this, 1, &tab[0]); 1155 PEEK_BYTE (a_this, 2, &tab[1]); 1156 PEEK_BYTE (a_this, 3, &tab[2]); 1157 PEEK_BYTE (a_this, 4, &tab[3]); 1158 1159 if (tab[0] != 'u' || tab[1] != 'r' || tab[2] != 'l' || tab[3] != '(') { 1160 status = CR_PARSING_ERROR; 1161 goto error; 1162 } 1163 /* 1164 *Here, we want to skip 4 bytes ('u''r''l''('). 1165 *But we also need to keep track of the parsing location 1166 *of the 'u'. So, we skip 1 byte, we record the parsing 1167 *location, then we skip the 3 remaining bytes. 1168 */ 1169 SKIP_CHARS (a_this, 1); 1170 cr_tknzr_get_parsing_location (a_this, &location) ; 1171 SKIP_CHARS (a_this, 3); 1172 cr_tknzr_try_to_skip_spaces (a_this); 1173 status = cr_tknzr_parse_string (a_this, a_str); 1174 1175 if (status == CR_OK) { 1176 guint32 next_char = 0; 1177 status = cr_tknzr_parse_w (a_this, &tmp_ptr1, 1178 &tmp_ptr2, NULL); 1179 cr_tknzr_try_to_skip_spaces (a_this); 1180 PEEK_NEXT_CHAR (a_this, &next_char); 1181 if (next_char == ')') { 1182 READ_NEXT_CHAR (a_this, &cur_char); 1183 status = CR_OK; 1184 } else { 1185 status = CR_PARSING_ERROR; 1186 } 1187 } 1188 if (status != CR_OK) { 1189 str = cr_string_new (); 1190 for (;;) { 1191 guint32 next_char = 0; 1192 PEEK_NEXT_CHAR (a_this, &next_char); 1193 if (strchr ("!#$%&", next_char) 1194 || (next_char >= '*' && next_char <= '~') 1195 || (cr_utils_is_nonascii (next_char) == TRUE)) { 1196 READ_NEXT_CHAR (a_this, &cur_char); 1197 g_string_append_unichar 1198 (str->stryng, cur_char); 1199 status = CR_OK; 1200 } else { 1201 guint32 esc_code = 0; 1202 status = cr_tknzr_parse_escape 1203 (a_this, &esc_code, NULL); 1204 if (status == CR_OK) { 1205 g_string_append_unichar 1206 (str->stryng, 1207 esc_code); 1208 } else { 1209 status = CR_OK; 1210 break; 1211 } 1212 } 1213 } 1214 cr_tknzr_try_to_skip_spaces (a_this); 1215 READ_NEXT_CHAR (a_this, &cur_char); 1216 if (cur_char == ')') { 1217 status = CR_OK; 1218 } else { 1219 status = CR_PARSING_ERROR; 1220 goto error; 1221 } 1222 if (str) { 1223 if (*a_str == NULL) { 1224 *a_str = str; 1225 str = NULL; 1226 } else { 1227 g_string_append_len 1228 ((*a_str)->stryng, 1229 str->stryng->str, 1230 str->stryng->len); 1231 cr_string_destroy (str); 1232 } 1233 } 1234 } 1235 1236 cr_parsing_location_copy 1237 (&(*a_str)->location, 1238 &location) ; 1239 return CR_OK ; 1240 error: 1241 if (str) { 1242 cr_string_destroy (str); 1243 str = NULL; 1244 } 1245 cr_tknzr_set_cur_pos (a_this, &init_pos); 1246 return status; 1247} 1248 1249/** 1250 *parses an RGB as defined in the css2 spec. 1251 *rgb: rgb '('S*{num}%?S* ',' {num}#?S*,S*{num}#?S*')' 1252 * 1253 *@param a_this the "this pointer" of the current instance of 1254 *@param a_rgb out parameter the parsed rgb. 1255 *@return CR_OK upon successfull completion, an error code otherwise. 1256 */ 1257static enum CRStatus 1258cr_tknzr_parse_rgb (CRTknzr * a_this, CRRgb ** a_rgb) 1259{ 1260 enum CRStatus status = CR_OK; 1261 CRInputPos init_pos; 1262 CRNum *num = NULL; 1263 guchar next_bytes[3] = { 0 }, cur_byte = 0; 1264 glong red = 0, 1265 green = 0, 1266 blue = 0, 1267 i = 0; 1268 gboolean is_percentage = FALSE; 1269 CRParsingLocation location = {0} ; 1270 1271 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 1272 1273 RECORD_INITIAL_POS (a_this, &init_pos); 1274 1275 PEEK_BYTE (a_this, 1, &next_bytes[0]); 1276 PEEK_BYTE (a_this, 2, &next_bytes[1]); 1277 PEEK_BYTE (a_this, 3, &next_bytes[2]); 1278 1279 if (((next_bytes[0] == 'r') || (next_bytes[0] == 'R')) 1280 && ((next_bytes[1] == 'g') || (next_bytes[1] == 'G')) 1281 && ((next_bytes[2] == 'b') || (next_bytes[2] == 'B'))) { 1282 SKIP_CHARS (a_this, 1); 1283 cr_tknzr_get_parsing_location (a_this, &location) ; 1284 SKIP_CHARS (a_this, 2); 1285 } else { 1286 status = CR_PARSING_ERROR; 1287 goto error; 1288 } 1289 READ_NEXT_BYTE (a_this, &cur_byte); 1290 ENSURE_PARSING_COND (cur_byte == '('); 1291 1292 cr_tknzr_try_to_skip_spaces (a_this); 1293 status = cr_tknzr_parse_num (a_this, &num); 1294 ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL)); 1295 1296 red = num->val; 1297 cr_num_destroy (num); 1298 num = NULL; 1299 1300 PEEK_BYTE (a_this, 1, &next_bytes[0]); 1301 if (next_bytes[0] == '%') { 1302 SKIP_CHARS (a_this, 1); 1303 is_percentage = TRUE; 1304 } 1305 cr_tknzr_try_to_skip_spaces (a_this); 1306 1307 for (i = 0; i < 2; i++) { 1308 READ_NEXT_BYTE (a_this, &cur_byte); 1309 ENSURE_PARSING_COND (cur_byte == ','); 1310 1311 cr_tknzr_try_to_skip_spaces (a_this); 1312 status = cr_tknzr_parse_num (a_this, &num); 1313 ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL)); 1314 1315 PEEK_BYTE (a_this, 1, &next_bytes[0]); 1316 if (next_bytes[0] == '%') { 1317 SKIP_CHARS (a_this, 1); 1318 is_percentage = 1; 1319 } 1320 1321 if (i == 0) { 1322 green = num->val; 1323 } else if (i == 1) { 1324 blue = num->val; 1325 } 1326 1327 if (num) { 1328 cr_num_destroy (num); 1329 num = NULL; 1330 } 1331 cr_tknzr_try_to_skip_spaces (a_this); 1332 } 1333 1334 READ_NEXT_BYTE (a_this, &cur_byte); 1335 if (*a_rgb == NULL) { 1336 *a_rgb = cr_rgb_new_with_vals (red, green, blue, 1337 is_percentage); 1338 1339 if (*a_rgb == NULL) { 1340 status = CR_ERROR; 1341 goto error; 1342 } 1343 status = CR_OK; 1344 } else { 1345 (*a_rgb)->red = red; 1346 (*a_rgb)->green = green; 1347 (*a_rgb)->blue = blue; 1348 (*a_rgb)->is_percentage = is_percentage; 1349 1350 status = CR_OK; 1351 } 1352 1353 if (status == CR_OK) { 1354 if (a_rgb && *a_rgb) { 1355 cr_parsing_location_copy 1356 (&(*a_rgb)->location, 1357 &location) ; 1358 } 1359 return CR_OK; 1360 } 1361 1362 error: 1363 if (num) { 1364 cr_num_destroy (num); 1365 num = NULL; 1366 } 1367 1368 cr_tknzr_set_cur_pos (a_this, &init_pos); 1369 return CR_OK; 1370} 1371 1372/** 1373 *Parses a atkeyword as defined by the css spec in [4.1.1]: 1374 *ATKEYWORD ::= @{ident} 1375 * 1376 *@param a_this the "this pointer" of the current instance of 1377 *#CRTknzr. 1378 * 1379 *@param a_str out parameter. The parsed atkeyword. If *a_str is 1380 *set to NULL this function allocates a new instance of CRString and 1381 *sets it to the parsed atkeyword. If not, this function just appends 1382 *the parsed atkeyword to the end of *a_str. In both cases it is up to 1383 *the caller to free *a_str. 1384 * 1385 *@return CR_OK upon successfull completion, an error code otherwise. 1386 */ 1387static enum CRStatus 1388cr_tknzr_parse_atkeyword (CRTknzr * a_this, 1389 CRString ** a_str) 1390{ 1391 guint32 cur_char = 0; 1392 CRInputPos init_pos; 1393 gboolean str_needs_free = FALSE; 1394 enum CRStatus status = CR_OK; 1395 1396 g_return_val_if_fail (a_this && PRIVATE (a_this) 1397 && PRIVATE (a_this)->input 1398 && a_str, CR_BAD_PARAM_ERROR); 1399 1400 RECORD_INITIAL_POS (a_this, &init_pos); 1401 1402 READ_NEXT_CHAR (a_this, &cur_char); 1403 1404 if (cur_char != '@') { 1405 status = CR_PARSING_ERROR; 1406 goto error; 1407 } 1408 1409 if (*a_str == NULL) { 1410 *a_str = cr_string_new (); 1411 str_needs_free = TRUE; 1412 } 1413 status = cr_tknzr_parse_ident (a_this, a_str); 1414 if (status != CR_OK) { 1415 goto error; 1416 } 1417 return CR_OK; 1418 error: 1419 1420 if (str_needs_free == TRUE && *a_str) { 1421 cr_string_destroy (*a_str); 1422 *a_str = NULL; 1423 } 1424 cr_tknzr_set_cur_pos (a_this, &init_pos); 1425 return status; 1426} 1427 1428static enum CRStatus 1429cr_tknzr_parse_important (CRTknzr * a_this, 1430 CRParsingLocation *a_location) 1431{ 1432 guint32 cur_char = 0; 1433 CRInputPos init_pos; 1434 enum CRStatus status = CR_OK; 1435 1436 g_return_val_if_fail (a_this && PRIVATE (a_this) 1437 && PRIVATE (a_this)->input, 1438 CR_BAD_PARAM_ERROR); 1439 1440 RECORD_INITIAL_POS (a_this, &init_pos); 1441 READ_NEXT_CHAR (a_this, &cur_char); 1442 ENSURE_PARSING_COND (cur_char == '!'); 1443 if (a_location) { 1444 cr_tknzr_get_parsing_location (a_this, 1445 a_location) ; 1446 } 1447 cr_tknzr_try_to_skip_spaces (a_this); 1448 1449 if (BYTE (PRIVATE (a_this)->input, 1, NULL) == 'i' 1450 && BYTE (PRIVATE (a_this)->input, 2, NULL) == 'm' 1451 && BYTE (PRIVATE (a_this)->input, 3, NULL) == 'p' 1452 && BYTE (PRIVATE (a_this)->input, 4, NULL) == 'o' 1453 && BYTE (PRIVATE (a_this)->input, 5, NULL) == 'r' 1454 && BYTE (PRIVATE (a_this)->input, 6, NULL) == 't' 1455 && BYTE (PRIVATE (a_this)->input, 7, NULL) == 'a' 1456 && BYTE (PRIVATE (a_this)->input, 8, NULL) == 'n' 1457 && BYTE (PRIVATE (a_this)->input, 9, NULL) == 't') { 1458 SKIP_BYTES (a_this, 9); 1459 if (a_location) { 1460 cr_tknzr_get_parsing_location (a_this, 1461 a_location) ; 1462 } 1463 return CR_OK; 1464 } else { 1465 status = CR_PARSING_ERROR; 1466 } 1467 1468 error: 1469 cr_tknzr_set_cur_pos (a_this, &init_pos); 1470 1471 return status; 1472} 1473 1474/** 1475 *Parses a num as defined in the css spec [4.1.1]: 1476 *[0-9]+|[0-9]*\.[0-9]+ 1477 *@param a_this the current instance of #CRTknzr. 1478 *@param a_num out parameter. The parsed number. 1479 *@return CR_OK upon successfull completion, 1480 *an error code otherwise. 1481 */ 1482static enum CRStatus 1483cr_tknzr_parse_num (CRTknzr * a_this, 1484 CRNum ** a_num) 1485{ 1486 enum CRStatus status = CR_PARSING_ERROR; 1487 enum CRNumType val_type = NUM_GENERIC; 1488 gboolean parsing_dec = FALSE, 1489 parsed = FALSE; 1490 guint32 cur_char = 0, 1491 int_part = 0, 1492 dec_part = 0, 1493 next_char = 0, 1494 decimal_places = 0; 1495 CRInputPos init_pos; 1496 CRParsingLocation location = {0} ; 1497 1498 g_return_val_if_fail (a_this && PRIVATE (a_this) 1499 && PRIVATE (a_this)->input, 1500 CR_BAD_PARAM_ERROR); 1501 1502 RECORD_INITIAL_POS (a_this, &init_pos); 1503 READ_NEXT_CHAR (a_this, &cur_char); 1504 if (IS_NUM (cur_char) == TRUE) { 1505 int_part = int_part * 10 + (cur_char - '0'); 1506 1507 parsed = TRUE; 1508 } else if (cur_char == '.') { 1509 parsing_dec = TRUE; 1510 } else { 1511 status = CR_PARSING_ERROR; 1512 goto error; 1513 } 1514 cr_tknzr_get_parsing_location (a_this, &location) ; 1515 1516 for (;;) { 1517 status = cr_tknzr_peek_char (a_this, &next_char); 1518 if (status != CR_OK) { 1519 if (status == CR_END_OF_INPUT_ERROR) 1520 status = CR_OK; 1521 break; 1522 } 1523 if (next_char == '.') { 1524 if (parsing_dec == TRUE) { 1525 status = CR_PARSING_ERROR; 1526 goto error; 1527 } 1528 1529 READ_NEXT_CHAR (a_this, &cur_char); 1530 parsing_dec = TRUE; 1531 parsed = TRUE; 1532 } else if (IS_NUM (next_char) == TRUE) { 1533 READ_NEXT_CHAR (a_this, &cur_char); 1534 parsed = TRUE; 1535 1536 if (parsing_dec == FALSE) { 1537 int_part = int_part * 10 + (cur_char - '0'); 1538 } else { 1539 decimal_places++; 1540 dec_part = dec_part * 10 + (cur_char - '0'); 1541 } 1542 } else { 1543 break; 1544 } 1545 } 1546 1547 if (parsed == FALSE) { 1548 status = CR_PARSING_ERROR; 1549 } 1550 1551 /* 1552 *Now, set the output param values. 1553 */ 1554 if (status == CR_OK) { 1555 gdouble val = 0.0; 1556 1557 val = int_part; 1558 val += cr_utils_n_to_0_dot_n (dec_part, decimal_places); 1559 if (*a_num == NULL) { 1560 *a_num = cr_num_new_with_val (val, val_type); 1561 1562 if (*a_num == NULL) { 1563 status = CR_ERROR; 1564 goto error; 1565 } 1566 } else { 1567 (*a_num)->val = val; 1568 (*a_num)->type = val_type; 1569 } 1570 cr_parsing_location_copy (&(*a_num)->location, 1571 &location) ; 1572 return CR_OK; 1573 } 1574 1575 error: 1576 1577 cr_tknzr_set_cur_pos (a_this, &init_pos); 1578 1579 return status; 1580} 1581 1582/********************************************* 1583 *PUBLIC methods 1584 ********************************************/ 1585 1586CRTknzr * 1587cr_tknzr_new (CRInput * a_input) 1588{ 1589 CRTknzr *result = NULL; 1590 1591 result = g_try_malloc (sizeof (CRTknzr)); 1592 1593 if (result == NULL) { 1594 cr_utils_trace_info ("Out of memory"); 1595 return NULL; 1596 } 1597 1598 memset (result, 0, sizeof (CRTknzr)); 1599 1600 result->priv = g_try_malloc (sizeof (CRTknzrPriv)); 1601 1602 if (result->priv == NULL) { 1603 cr_utils_trace_info ("Out of memory"); 1604 1605 if (result) { 1606 g_free (result); 1607 result = NULL; 1608 } 1609 1610 return NULL; 1611 } 1612 memset (result->priv, 0, sizeof (CRTknzrPriv)); 1613 if (a_input) 1614 cr_tknzr_set_input (result, a_input); 1615 return result; 1616} 1617 1618CRTknzr * 1619cr_tknzr_new_from_buf (guchar * a_buf, gulong a_len, 1620 enum CREncoding a_enc, 1621 gboolean a_free_at_destroy) 1622{ 1623 CRTknzr *result = NULL; 1624 CRInput *input = NULL; 1625 1626 input = cr_input_new_from_buf (a_buf, a_len, a_enc, 1627 a_free_at_destroy); 1628 1629 g_return_val_if_fail (input != NULL, NULL); 1630 1631 result = cr_tknzr_new (input); 1632 1633 return result; 1634} 1635 1636CRTknzr * 1637cr_tknzr_new_from_uri (const guchar * a_file_uri, 1638 enum CREncoding a_enc) 1639{ 1640 CRTknzr *result = NULL; 1641 CRInput *input = NULL; 1642 1643 input = cr_input_new_from_uri (a_file_uri, a_enc); 1644 g_return_val_if_fail (input != NULL, NULL); 1645 1646 result = cr_tknzr_new (input); 1647 1648 return result; 1649} 1650 1651void 1652cr_tknzr_ref (CRTknzr * a_this) 1653{ 1654 g_return_if_fail (a_this && PRIVATE (a_this)); 1655 1656 PRIVATE (a_this)->ref_count++; 1657} 1658 1659gboolean 1660cr_tknzr_unref (CRTknzr * a_this) 1661{ 1662 g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE); 1663 1664 if (PRIVATE (a_this)->ref_count > 0) { 1665 PRIVATE (a_this)->ref_count--; 1666 } 1667 1668 if (PRIVATE (a_this)->ref_count == 0) { 1669 cr_tknzr_destroy (a_this); 1670 return TRUE; 1671 } 1672 1673 return FALSE; 1674} 1675 1676enum CRStatus 1677cr_tknzr_set_input (CRTknzr * a_this, CRInput * a_input) 1678{ 1679 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 1680 1681 if (PRIVATE (a_this)->input) { 1682 cr_input_unref (PRIVATE (a_this)->input); 1683 } 1684 1685 PRIVATE (a_this)->input = a_input; 1686 1687 cr_input_ref (PRIVATE (a_this)->input); 1688 1689 return CR_OK; 1690} 1691 1692enum CRStatus 1693cr_tknzr_get_input (CRTknzr * a_this, CRInput ** a_input) 1694{ 1695 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 1696 1697 *a_input = PRIVATE (a_this)->input; 1698 1699 return CR_OK; 1700} 1701 1702/********************************* 1703 *Tokenizer input handling routines 1704 *********************************/ 1705 1706/** 1707 *Reads the next byte from the parser input stream. 1708 *@param a_this the "this pointer" of the current instance of 1709 *#CRParser. 1710 *@param a_byte out parameter the place where to store the byte 1711 *read. 1712 *@return CR_OK upon successfull completion, an error 1713 *code otherwise. 1714 */ 1715enum CRStatus 1716cr_tknzr_read_byte (CRTknzr * a_this, guchar * a_byte) 1717{ 1718 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 1719 1720 return cr_input_read_byte (PRIVATE (a_this)->input, a_byte); 1721 1722} 1723 1724/** 1725 *Reads the next char from the parser input stream. 1726 *@param a_this the current instance of #CRTknzr. 1727 *@param a_char out parameter. The read char. 1728 *@return CR_OK upon successfull completion, an error code 1729 *otherwise. 1730 */ 1731enum CRStatus 1732cr_tknzr_read_char (CRTknzr * a_this, guint32 * a_char) 1733{ 1734 g_return_val_if_fail (a_this && PRIVATE (a_this) 1735 && PRIVATE (a_this)->input 1736 && a_char, CR_BAD_PARAM_ERROR); 1737 1738 if (PRIVATE (a_this)->token_cache) { 1739 cr_input_set_cur_pos (PRIVATE (a_this)->input, 1740 &PRIVATE (a_this)->prev_pos); 1741 cr_token_destroy (PRIVATE (a_this)->token_cache); 1742 PRIVATE (a_this)->token_cache = NULL; 1743 } 1744 1745 return cr_input_read_char (PRIVATE (a_this)->input, a_char); 1746} 1747 1748/** 1749 *Peeks a char from the parser input stream. 1750 *To "peek a char" means reads the next char without consuming it. 1751 *Subsequent calls to this function return the same char. 1752 *@param a_this the current instance of #CRTknzr. 1753 *@param a_char out parameter. The peeked char uppon successfull completion. 1754 *@return CR_OK upon successfull completion, an error code otherwise. 1755 */ 1756enum CRStatus 1757cr_tknzr_peek_char (CRTknzr * a_this, guint32 * a_char) 1758{ 1759 g_return_val_if_fail (a_this && PRIVATE (a_this) 1760 && PRIVATE (a_this)->input 1761 && a_char, CR_BAD_PARAM_ERROR); 1762 1763 if (PRIVATE (a_this)->token_cache) { 1764 cr_input_set_cur_pos (PRIVATE (a_this)->input, 1765 &PRIVATE (a_this)->prev_pos); 1766 cr_token_destroy (PRIVATE (a_this)->token_cache); 1767 PRIVATE (a_this)->token_cache = NULL; 1768 } 1769 1770 return cr_input_peek_char (PRIVATE (a_this)->input, a_char); 1771} 1772 1773/** 1774 *Peeks a byte ahead at a given postion in the parser input stream. 1775 *@param a_this the current instance of #CRTknzr. 1776 *@param a_offset the offset of the peeked byte starting from the current 1777 *byte in the parser input stream. 1778 *@param a_byte out parameter. The peeked byte upon 1779 *successfull completion. 1780 *@return CR_OK upon successfull completion, an error code otherwise. 1781 */ 1782enum CRStatus 1783cr_tknzr_peek_byte (CRTknzr * a_this, gulong a_offset, guchar * a_byte) 1784{ 1785 g_return_val_if_fail (a_this && PRIVATE (a_this) 1786 && PRIVATE (a_this)->input && a_byte, 1787 CR_BAD_PARAM_ERROR); 1788 1789 if (PRIVATE (a_this)->token_cache) { 1790 cr_input_set_cur_pos (PRIVATE (a_this)->input, 1791 &PRIVATE (a_this)->prev_pos); 1792 cr_token_destroy (PRIVATE (a_this)->token_cache); 1793 PRIVATE (a_this)->token_cache = NULL; 1794 } 1795 1796 return cr_input_peek_byte (PRIVATE (a_this)->input, 1797 CR_SEEK_CUR, a_offset, a_byte); 1798} 1799 1800/** 1801 *Same as cr_tknzr_peek_byte() but this api returns the byte peeked. 1802 *@param a_this the current instance of #CRTknzr. 1803 *@param a_offset the offset of the peeked byte starting from the current 1804 *byte in the parser input stream. 1805 *@param a_eof out parameter. If not NULL, is set to TRUE if we reached end of 1806 *file, FALE otherwise. If the caller sets it to NULL, this parameter 1807 *is just ignored. 1808 *@return the peeked byte. 1809 */ 1810guchar 1811cr_tknzr_peek_byte2 (CRTknzr * a_this, gulong a_offset, gboolean * a_eof) 1812{ 1813 g_return_val_if_fail (a_this && PRIVATE (a_this) 1814 && PRIVATE (a_this)->input, 0); 1815 1816 return cr_input_peek_byte2 (PRIVATE (a_this)->input, a_offset, a_eof); 1817} 1818 1819/** 1820 *Gets the number of bytes left in the topmost input stream 1821 *associated to this parser. 1822 *@param a_this the current instance of #CRTknzr 1823 *@return the number of bytes left or -1 in case of error. 1824 */ 1825glong 1826cr_tknzr_get_nb_bytes_left (CRTknzr * a_this) 1827{ 1828 g_return_val_if_fail (a_this && PRIVATE (a_this) 1829 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); 1830 1831 if (PRIVATE (a_this)->token_cache) { 1832 cr_input_set_cur_pos (PRIVATE (a_this)->input, 1833 &PRIVATE (a_this)->prev_pos); 1834 cr_token_destroy (PRIVATE (a_this)->token_cache); 1835 PRIVATE (a_this)->token_cache = NULL; 1836 } 1837 1838 return cr_input_get_nb_bytes_left (PRIVATE (a_this)->input); 1839} 1840 1841enum CRStatus 1842cr_tknzr_get_cur_pos (CRTknzr * a_this, CRInputPos * a_pos) 1843{ 1844 g_return_val_if_fail (a_this && PRIVATE (a_this) 1845 && PRIVATE (a_this)->input 1846 && a_pos, CR_BAD_PARAM_ERROR); 1847 1848 if (PRIVATE (a_this)->token_cache) { 1849 cr_input_set_cur_pos (PRIVATE (a_this)->input, 1850 &PRIVATE (a_this)->prev_pos); 1851 cr_token_destroy (PRIVATE (a_this)->token_cache); 1852 PRIVATE (a_this)->token_cache = NULL; 1853 } 1854 1855 return cr_input_get_cur_pos (PRIVATE (a_this)->input, a_pos); 1856} 1857 1858enum CRStatus 1859cr_tknzr_get_parsing_location (CRTknzr *a_this, 1860 CRParsingLocation *a_loc) 1861{ 1862 g_return_val_if_fail (a_this 1863 && PRIVATE (a_this) 1864 && a_loc, 1865 CR_BAD_PARAM_ERROR) ; 1866 1867 return cr_input_get_parsing_location 1868 (PRIVATE (a_this)->input, a_loc) ; 1869} 1870 1871enum CRStatus 1872cr_tknzr_get_cur_byte_addr (CRTknzr * a_this, guchar ** a_addr) 1873{ 1874 g_return_val_if_fail (a_this && PRIVATE (a_this) 1875 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); 1876 if (PRIVATE (a_this)->token_cache) { 1877 cr_input_set_cur_pos (PRIVATE (a_this)->input, 1878 &PRIVATE (a_this)->prev_pos); 1879 cr_token_destroy (PRIVATE (a_this)->token_cache); 1880 PRIVATE (a_this)->token_cache = NULL; 1881 } 1882 1883 return cr_input_get_cur_byte_addr (PRIVATE (a_this)->input, a_addr); 1884} 1885 1886enum CRStatus 1887cr_tknzr_seek_index (CRTknzr * a_this, enum CRSeekPos a_origin, gint a_pos) 1888{ 1889 g_return_val_if_fail (a_this && PRIVATE (a_this) 1890 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); 1891 1892 if (PRIVATE (a_this)->token_cache) { 1893 cr_input_set_cur_pos (PRIVATE (a_this)->input, 1894 &PRIVATE (a_this)->prev_pos); 1895 cr_token_destroy (PRIVATE (a_this)->token_cache); 1896 PRIVATE (a_this)->token_cache = NULL; 1897 } 1898 1899 return cr_input_seek_index (PRIVATE (a_this)->input, a_origin, a_pos); 1900} 1901 1902enum CRStatus 1903cr_tknzr_consume_chars (CRTknzr * a_this, guint32 a_char, glong * a_nb_char) 1904{ 1905 g_return_val_if_fail (a_this && PRIVATE (a_this) 1906 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); 1907 1908 if (PRIVATE (a_this)->token_cache) { 1909 cr_input_set_cur_pos (PRIVATE (a_this)->input, 1910 &PRIVATE (a_this)->prev_pos); 1911 cr_token_destroy (PRIVATE (a_this)->token_cache); 1912 PRIVATE (a_this)->token_cache = NULL; 1913 } 1914 1915 return cr_input_consume_chars (PRIVATE (a_this)->input, 1916 a_char, a_nb_char); 1917} 1918 1919enum CRStatus 1920cr_tknzr_set_cur_pos (CRTknzr * a_this, CRInputPos * a_pos) 1921{ 1922 g_return_val_if_fail (a_this && PRIVATE (a_this) 1923 && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); 1924 1925 if (PRIVATE (a_this)->token_cache) { 1926 cr_token_destroy (PRIVATE (a_this)->token_cache); 1927 PRIVATE (a_this)->token_cache = NULL; 1928 } 1929 1930 return cr_input_set_cur_pos (PRIVATE (a_this)->input, a_pos); 1931} 1932 1933enum CRStatus 1934cr_tknzr_unget_token (CRTknzr * a_this, CRToken * a_token) 1935{ 1936 g_return_val_if_fail (a_this && PRIVATE (a_this) 1937 && PRIVATE (a_this)->token_cache == NULL, 1938 CR_BAD_PARAM_ERROR); 1939 1940 PRIVATE (a_this)->token_cache = a_token; 1941 1942 return CR_OK; 1943} 1944 1945/** 1946 *Returns the next token of the input stream. 1947 *This method is really central. Each parsing 1948 *method calls it. 1949 *@param a_this the current tokenizer. 1950 *@param a_tk out parameter. The returned token. 1951 *for the sake of mem leak avoidance, *a_tk must 1952 *be NULL. 1953 *@param CR_OK upon successfull completion, an error code 1954 *otherwise. 1955 */ 1956enum CRStatus 1957cr_tknzr_get_next_token (CRTknzr * a_this, CRToken ** a_tk) 1958{ 1959 enum CRStatus status = CR_OK; 1960 CRToken *token = NULL; 1961 CRInputPos init_pos; 1962 guint32 next_char = 0; 1963 guchar next_bytes[4] = { 0 }; 1964 gboolean reached_eof = FALSE; 1965 CRInput *input = NULL; 1966 CRString *str = NULL; 1967 CRRgb *rgb = NULL; 1968 CRParsingLocation location = {0} ; 1969 1970 g_return_val_if_fail (a_this && PRIVATE (a_this) 1971 && a_tk && *a_tk == NULL 1972 && PRIVATE (a_this)->input, 1973 CR_BAD_PARAM_ERROR); 1974 1975 if (PRIVATE (a_this)->token_cache) { 1976 *a_tk = PRIVATE (a_this)->token_cache; 1977 PRIVATE (a_this)->token_cache = NULL; 1978 return CR_OK; 1979 } 1980 1981 RECORD_INITIAL_POS (a_this, &init_pos); 1982 1983 status = cr_input_get_end_of_file 1984 (PRIVATE (a_this)->input, &reached_eof); 1985 ENSURE_PARSING_COND (status == CR_OK); 1986 1987 if (reached_eof == TRUE) { 1988 status = CR_END_OF_INPUT_ERROR; 1989 goto error; 1990 } 1991 1992 input = PRIVATE (a_this)->input; 1993 1994 PEEK_NEXT_CHAR (a_this, &next_char); 1995 token = cr_token_new (); 1996 ENSURE_PARSING_COND (token); 1997 1998 switch (next_char) { 1999 case '@': 2000 { 2001 if (BYTE (input, 2, NULL) == 'f' 2002 && BYTE (input, 3, NULL) == 'o' 2003 && BYTE (input, 4, NULL) == 'n' 2004 && BYTE (input, 5, NULL) == 't' 2005 && BYTE (input, 6, NULL) == '-' 2006 && BYTE (input, 7, NULL) == 'f' 2007 && BYTE (input, 8, NULL) == 'a' 2008 && BYTE (input, 9, NULL) == 'c' 2009 && BYTE (input, 10, NULL) == 'e') { 2010 SKIP_CHARS (a_this, 1); 2011 cr_tknzr_get_parsing_location 2012 (a_this, &location) ; 2013 SKIP_CHARS (a_this, 9); 2014 status = cr_token_set_font_face_sym (token); 2015 CHECK_PARSING_STATUS (status, TRUE); 2016 cr_parsing_location_copy (&token->location, 2017 &location) ; 2018 goto done; 2019 } 2020 2021 if (BYTE (input, 2, NULL) == 'c' 2022 && BYTE (input, 3, NULL) == 'h' 2023 && BYTE (input, 4, NULL) == 'a' 2024 && BYTE (input, 5, NULL) == 'r' 2025 && BYTE (input, 6, NULL) == 's' 2026 && BYTE (input, 7, NULL) == 'e' 2027 && BYTE (input, 8, NULL) == 't') { 2028 SKIP_CHARS (a_this, 1); 2029 cr_tknzr_get_parsing_location 2030 (a_this, &location) ; 2031 SKIP_CHARS (a_this, 7); 2032 status = cr_token_set_charset_sym (token); 2033 CHECK_PARSING_STATUS (status, TRUE); 2034 cr_parsing_location_copy (&token->location, 2035 &location) ; 2036 goto done; 2037 } 2038 2039 if (BYTE (input, 2, NULL) == 'i' 2040 && BYTE (input, 3, NULL) == 'm' 2041 && BYTE (input, 4, NULL) == 'p' 2042 && BYTE (input, 5, NULL) == 'o' 2043 && BYTE (input, 6, NULL) == 'r' 2044 && BYTE (input, 7, NULL) == 't') { 2045 SKIP_CHARS (a_this, 1); 2046 cr_tknzr_get_parsing_location 2047 (a_this, &location) ; 2048 SKIP_CHARS (a_this, 6); 2049 status = cr_token_set_import_sym (token); 2050 CHECK_PARSING_STATUS (status, TRUE); 2051 cr_parsing_location_copy (&token->location, 2052 &location) ; 2053 goto done; 2054 } 2055 2056 if (BYTE (input, 2, NULL) == 'm' 2057 && BYTE (input, 3, NULL) == 'e' 2058 && BYTE (input, 4, NULL) == 'd' 2059 && BYTE (input, 5, NULL) == 'i' 2060 && BYTE (input, 6, NULL) == 'a') { 2061 SKIP_CHARS (a_this, 1); 2062 cr_tknzr_get_parsing_location (a_this, 2063 &location) ; 2064 SKIP_CHARS (a_this, 5); 2065 status = cr_token_set_media_sym (token); 2066 CHECK_PARSING_STATUS (status, TRUE); 2067 cr_parsing_location_copy (&token->location, 2068 &location) ; 2069 goto done; 2070 } 2071 2072 if (BYTE (input, 2, NULL) == 'p' 2073 && BYTE (input, 3, NULL) == 'a' 2074 && BYTE (input, 4, NULL) == 'g' 2075 && BYTE (input, 5, NULL) == 'e') { 2076 SKIP_CHARS (a_this, 1); 2077 cr_tknzr_get_parsing_location (a_this, 2078 &location) ; 2079 SKIP_CHARS (a_this, 4); 2080 status = cr_token_set_page_sym (token); 2081 CHECK_PARSING_STATUS (status, TRUE); 2082 cr_parsing_location_copy (&token->location, 2083 &location) ; 2084 goto done; 2085 } 2086 status = cr_tknzr_parse_atkeyword (a_this, &str); 2087 if (status == CR_OK) { 2088 status = cr_token_set_atkeyword (token, str); 2089 CHECK_PARSING_STATUS (status, TRUE); 2090 if (str) { 2091 cr_parsing_location_copy (&token->location, 2092 &str->location) ; 2093 } 2094 goto done; 2095 } 2096 } 2097 break; 2098 2099 case 'u': 2100 2101 if (BYTE (input, 2, NULL) == 'r' 2102 && BYTE (input, 3, NULL) == 'l' 2103 && BYTE (input, 4, NULL) == '(') { 2104 CRString *str = NULL; 2105 2106 status = cr_tknzr_parse_uri (a_this, &str); 2107 if (status == CR_OK) { 2108 status = cr_token_set_uri (token, str); 2109 CHECK_PARSING_STATUS (status, TRUE); 2110 if (str) { 2111 cr_parsing_location_copy (&token->location, 2112 &str->location) ; 2113 } 2114 goto done; 2115 } 2116 } else { 2117 status = cr_tknzr_parse_ident (a_this, &str); 2118 if (status == CR_OK && str) { 2119 status = cr_token_set_ident (token, str); 2120 CHECK_PARSING_STATUS (status, TRUE); 2121 if (str) { 2122 cr_parsing_location_copy (&token->location, 2123 &str->location) ; 2124 } 2125 goto done; 2126 } 2127 } 2128 break; 2129 2130 case 'r': 2131 if (BYTE (input, 2, NULL) == 'g' 2132 && BYTE (input, 3, NULL) == 'b' 2133 && BYTE (input, 4, NULL) == '(') { 2134 status = cr_tknzr_parse_rgb (a_this, &rgb); 2135 if (status == CR_OK && rgb) { 2136 status = cr_token_set_rgb (token, rgb); 2137 CHECK_PARSING_STATUS (status, TRUE); 2138 if (rgb) { 2139 cr_parsing_location_copy (&token->location, 2140 &rgb->location) ; 2141 } 2142 rgb = NULL; 2143 goto done; 2144 } 2145 2146 } else { 2147 status = cr_tknzr_parse_ident (a_this, &str); 2148 if (status == CR_OK) { 2149 status = cr_token_set_ident (token, str); 2150 CHECK_PARSING_STATUS (status, TRUE); 2151 if (str) { 2152 cr_parsing_location_copy (&token->location, 2153 &str->location) ; 2154 } 2155 str = NULL; 2156 goto done; 2157 } 2158 } 2159 break; 2160 2161 case '<': 2162 if (BYTE (input, 2, NULL) == '-' 2163 && BYTE (input, 3, NULL) == '-') { 2164 SKIP_CHARS (a_this, 1); 2165 cr_tknzr_get_parsing_location (a_this, 2166 &location) ; 2167 SKIP_CHARS (a_this, 2); 2168 status = cr_token_set_cdo (token); 2169 CHECK_PARSING_STATUS (status, TRUE); 2170 cr_parsing_location_copy (&token->location, 2171 &location) ; 2172 goto done; 2173 } 2174 break; 2175 2176 case '-': 2177 if (BYTE (input, 2, NULL) == '-' 2178 && BYTE (input, 3, NULL) == '>') { 2179 SKIP_CHARS (a_this, 1); 2180 cr_tknzr_get_parsing_location (a_this, 2181 &location) ; 2182 SKIP_CHARS (a_this, 2); 2183 status = cr_token_set_cdc (token); 2184 CHECK_PARSING_STATUS (status, TRUE); 2185 cr_parsing_location_copy (&token->location, 2186 &location) ; 2187 goto done; 2188 } else { 2189 status = cr_tknzr_parse_ident 2190 (a_this, &str); 2191 if (status == CR_OK) { 2192 cr_token_set_ident 2193 (token, str); 2194 if (str) { 2195 cr_parsing_location_copy (&token->location, 2196 &str->location) ; 2197 } 2198 goto done; 2199 } 2200 } 2201 break; 2202 2203 case '~': 2204 if (BYTE (input, 2, NULL) == '=') { 2205 SKIP_CHARS (a_this, 1); 2206 cr_tknzr_get_parsing_location (a_this, 2207 &location) ; 2208 SKIP_CHARS (a_this, 1); 2209 status = cr_token_set_includes (token); 2210 CHECK_PARSING_STATUS (status, TRUE); 2211 cr_parsing_location_copy (&token->location, 2212 &location) ; 2213 goto done; 2214 } 2215 break; 2216 2217 case '|': 2218 if (BYTE (input, 2, NULL) == '=') { 2219 SKIP_CHARS (a_this, 1); 2220 cr_tknzr_get_parsing_location (a_this, 2221 &location) ; 2222 SKIP_CHARS (a_this, 1); 2223 status = cr_token_set_dashmatch (token); 2224 CHECK_PARSING_STATUS (status, TRUE); 2225 cr_parsing_location_copy (&token->location, 2226 &location) ; 2227 goto done; 2228 } 2229 break; 2230 2231 case '/': 2232 if (BYTE (input, 2, NULL) == '*') { 2233 status = cr_tknzr_parse_comment (a_this, &str); 2234 2235 if (status == CR_OK) { 2236 status = cr_token_set_comment (token, str); 2237 str = NULL; 2238 CHECK_PARSING_STATUS (status, TRUE); 2239 if (str) { 2240 cr_parsing_location_copy (&token->location, 2241 &str->location) ; 2242 } 2243 goto done; 2244 } 2245 } 2246 break ; 2247 2248 case ';': 2249 SKIP_CHARS (a_this, 1); 2250 cr_tknzr_get_parsing_location (a_this, 2251 &location) ; 2252 status = cr_token_set_semicolon (token); 2253 CHECK_PARSING_STATUS (status, TRUE); 2254 cr_parsing_location_copy (&token->location, 2255 &location) ; 2256 goto done; 2257 2258 case '{': 2259 SKIP_CHARS (a_this, 1); 2260 cr_tknzr_get_parsing_location (a_this, 2261 &location) ; 2262 status = cr_token_set_cbo (token); 2263 CHECK_PARSING_STATUS (status, TRUE); 2264 cr_tknzr_get_parsing_location (a_this, 2265 &location) ; 2266 goto done; 2267 2268 case '}': 2269 SKIP_CHARS (a_this, 1); 2270 cr_tknzr_get_parsing_location (a_this, 2271 &location) ; 2272 status = cr_token_set_cbc (token); 2273 CHECK_PARSING_STATUS (status, TRUE); 2274 cr_parsing_location_copy (&token->location, 2275 &location) ; 2276 goto done; 2277 2278 case '(': 2279 SKIP_CHARS (a_this, 1); 2280 cr_tknzr_get_parsing_location (a_this, 2281 &location) ; 2282 status = cr_token_set_po (token); 2283 CHECK_PARSING_STATUS (status, TRUE); 2284 cr_parsing_location_copy (&token->location, 2285 &location) ; 2286 goto done; 2287 2288 case ')': 2289 SKIP_CHARS (a_this, 1); 2290 cr_tknzr_get_parsing_location (a_this, 2291 &location) ; 2292 status = cr_token_set_pc (token); 2293 CHECK_PARSING_STATUS (status, TRUE); 2294 cr_parsing_location_copy (&token->location, 2295 &location) ; 2296 goto done; 2297 2298 case '[': 2299 SKIP_CHARS (a_this, 1); 2300 cr_tknzr_get_parsing_location (a_this, 2301 &location) ; 2302 status = cr_token_set_bo (token); 2303 CHECK_PARSING_STATUS (status, TRUE); 2304 cr_parsing_location_copy (&token->location, 2305 &location) ; 2306 goto done; 2307 2308 case ']': 2309 SKIP_CHARS (a_this, 1); 2310 cr_tknzr_get_parsing_location (a_this, 2311 &location) ; 2312 status = cr_token_set_bc (token); 2313 CHECK_PARSING_STATUS (status, TRUE); 2314 cr_parsing_location_copy (&token->location, 2315 &location) ; 2316 goto done; 2317 2318 case ' ': 2319 case '\t': 2320 case '\n': 2321 case '\f': 2322 case '\r': 2323 { 2324 guchar *start = NULL, 2325 *end = NULL; 2326 2327 status = cr_tknzr_parse_w (a_this, &start, 2328 &end, &location); 2329 if (status == CR_OK) { 2330 status = cr_token_set_s (token); 2331 CHECK_PARSING_STATUS (status, TRUE); 2332 cr_tknzr_get_parsing_location (a_this, 2333 &location) ; 2334 goto done; 2335 } 2336 } 2337 break; 2338 2339 case '#': 2340 { 2341 status = cr_tknzr_parse_hash (a_this, &str); 2342 if (status == CR_OK && str) { 2343 status = cr_token_set_hash (token, str); 2344 CHECK_PARSING_STATUS (status, TRUE); 2345 if (str) { 2346 cr_parsing_location_copy (&token->location, 2347 &str->location) ; 2348 } 2349 str = NULL; 2350 goto done; 2351 } 2352 } 2353 break; 2354 2355 case '\'': 2356 case '"': 2357 status = cr_tknzr_parse_string (a_this, &str); 2358 if (status == CR_OK && str) { 2359 status = cr_token_set_string (token, str); 2360 CHECK_PARSING_STATUS (status, TRUE); 2361 if (str) { 2362 cr_parsing_location_copy (&token->location, 2363 &str->location) ; 2364 } 2365 str = NULL; 2366 goto done; 2367 } 2368 break; 2369 2370 case '!': 2371 status = cr_tknzr_parse_important (a_this, &location); 2372 if (status == CR_OK) { 2373 status = cr_token_set_important_sym (token); 2374 CHECK_PARSING_STATUS (status, TRUE); 2375 cr_parsing_location_copy (&token->location, 2376 &location) ; 2377 goto done; 2378 } 2379 break; 2380 2381 case '0': 2382 case '1': 2383 case '2': 2384 case '3': 2385 case '4': 2386 case '5': 2387 case '6': 2388 case '7': 2389 case '8': 2390 case '9': 2391 case '.': 2392 { 2393 CRNum *num = NULL; 2394 2395 status = cr_tknzr_parse_num (a_this, &num); 2396 if (status == CR_OK && num) { 2397 next_bytes[0] = BYTE (input, 1, NULL); 2398 next_bytes[1] = BYTE (input, 2, NULL); 2399 next_bytes[2] = BYTE (input, 3, NULL); 2400 next_bytes[3] = BYTE (input, 3, NULL); 2401 2402 if (next_bytes[0] == 'e' 2403 && next_bytes[1] == 'm') { 2404 num->type = NUM_LENGTH_EM; 2405 status = cr_token_set_ems (token, 2406 num); 2407 num = NULL; 2408 SKIP_CHARS (a_this, 2); 2409 } else if (next_bytes[0] == 'e' 2410 && next_bytes[1] == 'x') { 2411 num->type = NUM_LENGTH_EX; 2412 status = cr_token_set_exs (token, 2413 num); 2414 num = NULL; 2415 SKIP_CHARS (a_this, 2); 2416 } else if (next_bytes[0] == 'p' 2417 && next_bytes[1] == 'x') { 2418 num->type = NUM_LENGTH_PX; 2419 status = cr_token_set_length 2420 (token, num, LENGTH_PX_ET); 2421 num = NULL; 2422 SKIP_CHARS (a_this, 2); 2423 } else if (next_bytes[0] == 'c' 2424 && next_bytes[1] == 'm') { 2425 num->type = NUM_LENGTH_CM; 2426 status = cr_token_set_length 2427 (token, num, LENGTH_CM_ET); 2428 num = NULL; 2429 SKIP_CHARS (a_this, 2); 2430 } else if (next_bytes[0] == 'm' 2431 && next_bytes[1] == 'm') { 2432 num->type = NUM_LENGTH_MM; 2433 status = cr_token_set_length 2434 (token, num, LENGTH_MM_ET); 2435 num = NULL; 2436 SKIP_CHARS (a_this, 2); 2437 } else if (next_bytes[0] == 'i' 2438 && next_bytes[1] == 'n') { 2439 num->type = NUM_LENGTH_IN; 2440 status = cr_token_set_length 2441 (token, num, LENGTH_IN_ET); 2442 num = NULL; 2443 SKIP_CHARS (a_this, 2); 2444 } else if (next_bytes[0] == 'p' 2445 && next_bytes[1] == 't') { 2446 num->type = NUM_LENGTH_PT; 2447 status = cr_token_set_length 2448 (token, num, LENGTH_PT_ET); 2449 num = NULL; 2450 SKIP_CHARS (a_this, 2); 2451 } else if (next_bytes[0] == 'p' 2452 && next_bytes[1] == 'c') { 2453 num->type = NUM_LENGTH_PC; 2454 status = cr_token_set_length 2455 (token, num, LENGTH_PC_ET); 2456 num = NULL; 2457 SKIP_CHARS (a_this, 2); 2458 } else if (next_bytes[0] == 'd' 2459 && next_bytes[1] == 'e' 2460 && next_bytes[2] == 'g') { 2461 num->type = NUM_ANGLE_DEG; 2462 status = cr_token_set_angle 2463 (token, num, ANGLE_DEG_ET); 2464 num = NULL; 2465 SKIP_CHARS (a_this, 3); 2466 } else if (next_bytes[0] == 'r' 2467 && next_bytes[1] == 'a' 2468 && next_bytes[2] == 'd') { 2469 num->type = NUM_ANGLE_RAD; 2470 status = cr_token_set_angle 2471 (token, num, ANGLE_RAD_ET); 2472 num = NULL; 2473 SKIP_CHARS (a_this, 3); 2474 } else if (next_bytes[0] == 'g' 2475 && next_bytes[1] == 'r' 2476 && next_bytes[2] == 'a' 2477 && next_bytes[3] == 'd') { 2478 num->type = NUM_ANGLE_GRAD; 2479 status = cr_token_set_angle 2480 (token, num, ANGLE_GRAD_ET); 2481 num = NULL; 2482 SKIP_CHARS (a_this, 4); 2483 } else if (next_bytes[0] == 'm' 2484 && next_bytes[1] == 's') { 2485 num->type = NUM_TIME_MS; 2486 status = cr_token_set_time 2487 (token, num, TIME_MS_ET); 2488 num = NULL; 2489 SKIP_CHARS (a_this, 2); 2490 } else if (next_bytes[0] == 's') { 2491 num->type = NUM_TIME_S; 2492 status = cr_token_set_time 2493 (token, num, TIME_S_ET); 2494 num = NULL; 2495 SKIP_CHARS (a_this, 1); 2496 } else if (next_bytes[0] == 'H' 2497 && next_bytes[1] == 'z') { 2498 num->type = NUM_FREQ_HZ; 2499 status = cr_token_set_freq 2500 (token, num, FREQ_HZ_ET); 2501 num = NULL; 2502 SKIP_CHARS (a_this, 2); 2503 } else if (next_bytes[0] == 'k' 2504 && next_bytes[1] == 'H' 2505 && next_bytes[2] == 'z') { 2506 num->type = NUM_FREQ_KHZ; 2507 status = cr_token_set_freq 2508 (token, num, FREQ_KHZ_ET); 2509 num = NULL; 2510 SKIP_CHARS (a_this, 3); 2511 } else if (next_bytes[0] == '%') { 2512 num->type = NUM_PERCENTAGE; 2513 status = cr_token_set_percentage 2514 (token, num); 2515 num = NULL; 2516 SKIP_CHARS (a_this, 1); 2517 } else { 2518 status = cr_tknzr_parse_ident (a_this, 2519 &str); 2520 if (status == CR_OK && str) { 2521 num->type = NUM_UNKNOWN_TYPE; 2522 status = cr_token_set_dimen 2523 (token, num, str); 2524 num = NULL; 2525 CHECK_PARSING_STATUS (status, 2526 TRUE); 2527 str = NULL; 2528 } else { 2529 status = cr_token_set_number 2530 (token, num); 2531 num = NULL; 2532 CHECK_PARSING_STATUS (status, CR_OK); 2533 str = NULL; 2534 } 2535 } 2536 if (token && token->u.num) { 2537 cr_parsing_location_copy (&token->location, 2538 &token->u.num->location) ; 2539 } else { 2540 status = CR_ERROR ; 2541 } 2542 goto done ; 2543 } 2544 } 2545 break; 2546 2547 default: 2548 /*process the fallback cases here */ 2549 2550 if (next_char == '\\' 2551 || (cr_utils_is_nonascii (next_bytes[0]) == TRUE) 2552 || ((next_char >= 'a') && (next_char <= 'z')) 2553 || ((next_char >= 'A') && (next_char <= 'Z'))) { 2554 status = cr_tknzr_parse_ident (a_this, &str); 2555 if (status == CR_OK && str) { 2556 guint32 next_c = 0; 2557 2558 status = cr_input_peek_char 2559 (PRIVATE (a_this)->input, &next_c); 2560 2561 if (status == CR_OK && next_c == '(') { 2562 2563 SKIP_CHARS (a_this, 1); 2564 status = cr_token_set_function 2565 (token, str); 2566 CHECK_PARSING_STATUS (status, TRUE); 2567 /*ownership is transfered 2568 *to token by cr_token_set_function. 2569 */ 2570 if (str) { 2571 cr_parsing_location_copy (&token->location, 2572 &str->location) ; 2573 } 2574 str = NULL; 2575 } else { 2576 status = cr_token_set_ident (token, 2577 str); 2578 CHECK_PARSING_STATUS (status, TRUE); 2579 if (str) { 2580 cr_parsing_location_copy (&token->location, 2581 &str->location) ; 2582 } 2583 str = NULL; 2584 } 2585 goto done; 2586 } else { 2587 if (str) { 2588 cr_string_destroy (str); 2589 str = NULL; 2590 } 2591 } 2592 } 2593 break; 2594 } 2595 2596 READ_NEXT_CHAR (a_this, &next_char); 2597 cr_tknzr_get_parsing_location (a_this, 2598 &location) ; 2599 status = cr_token_set_delim (token, next_char); 2600 CHECK_PARSING_STATUS (status, TRUE); 2601 cr_parsing_location_copy (&token->location, 2602 &location) ; 2603 done: 2604 2605 if (status == CR_OK && token) { 2606 *a_tk = token; 2607 /* 2608 *store the previous position input stream pos. 2609 */ 2610 memmove (&PRIVATE (a_this)->prev_pos, 2611 &init_pos, sizeof (CRInputPos)); 2612 return CR_OK; 2613 } 2614 2615 error: 2616 if (token) { 2617 cr_token_destroy (token); 2618 token = NULL; 2619 } 2620 2621 if (str) { 2622 cr_string_destroy (str); 2623 str = NULL; 2624 } 2625 cr_tknzr_set_cur_pos (a_this, &init_pos); 2626 return status; 2627 2628} 2629 2630enum CRStatus 2631cr_tknzr_parse_token (CRTknzr * a_this, enum CRTokenType a_type, 2632 enum CRTokenExtraType a_et, gpointer a_res, 2633 gpointer a_extra_res) 2634{ 2635 enum CRStatus status = CR_OK; 2636 CRToken *token = NULL; 2637 2638 g_return_val_if_fail (a_this && PRIVATE (a_this) 2639 && PRIVATE (a_this)->input 2640 && a_res, CR_BAD_PARAM_ERROR); 2641 2642 status = cr_tknzr_get_next_token (a_this, &token); 2643 if (status != CR_OK) 2644 return status; 2645 if (token == NULL) 2646 return CR_PARSING_ERROR; 2647 2648 if (token->type == a_type) { 2649 switch (a_type) { 2650 case NO_TK: 2651 case S_TK: 2652 case CDO_TK: 2653 case CDC_TK: 2654 case INCLUDES_TK: 2655 case DASHMATCH_TK: 2656 case IMPORT_SYM_TK: 2657 case PAGE_SYM_TK: 2658 case MEDIA_SYM_TK: 2659 case FONT_FACE_SYM_TK: 2660 case CHARSET_SYM_TK: 2661 case IMPORTANT_SYM_TK: 2662 status = CR_OK; 2663 break; 2664 2665 case STRING_TK: 2666 case IDENT_TK: 2667 case HASH_TK: 2668 case ATKEYWORD_TK: 2669 case FUNCTION_TK: 2670 case COMMENT_TK: 2671 case URI_TK: 2672 *((CRString **) a_res) = token->u.str; 2673 token->u.str = NULL; 2674 status = CR_OK; 2675 break; 2676 2677 case EMS_TK: 2678 case EXS_TK: 2679 case PERCENTAGE_TK: 2680 case NUMBER_TK: 2681 *((CRNum **) a_res) = token->u.num; 2682 token->u.num = NULL; 2683 status = CR_OK; 2684 break; 2685 2686 case LENGTH_TK: 2687 case ANGLE_TK: 2688 case TIME_TK: 2689 case FREQ_TK: 2690 if (token->extra_type == a_et) { 2691 *((CRNum **) a_res) = token->u.num; 2692 token->u.num = NULL; 2693 status = CR_OK; 2694 } 2695 break; 2696 2697 case DIMEN_TK: 2698 *((CRNum **) a_res) = token->u.num; 2699 if (a_extra_res == NULL) { 2700 status = CR_BAD_PARAM_ERROR; 2701 goto error; 2702 } 2703 2704 *((CRString **) a_extra_res) = token->dimen; 2705 token->u.num = NULL; 2706 token->dimen = NULL; 2707 status = CR_OK; 2708 break; 2709 2710 case DELIM_TK: 2711 *((guint32 *) a_res) = token->u.unichar; 2712 status = CR_OK; 2713 break; 2714 2715 case UNICODERANGE_TK: 2716 default: 2717 status = CR_PARSING_ERROR; 2718 break; 2719 } 2720 2721 cr_token_destroy (token); 2722 token = NULL; 2723 } else { 2724 cr_tknzr_unget_token (a_this, token); 2725 token = NULL; 2726 status = CR_PARSING_ERROR; 2727 } 2728 2729 return status; 2730 2731 error: 2732 2733 if (token) { 2734 cr_tknzr_unget_token (a_this, token); 2735 token = NULL; 2736 } 2737 2738 return status; 2739} 2740 2741void 2742cr_tknzr_destroy (CRTknzr * a_this) 2743{ 2744 g_return_if_fail (a_this); 2745 2746 if (PRIVATE (a_this) && PRIVATE (a_this)->input) { 2747 if (cr_input_unref (PRIVATE (a_this)->input) 2748 == TRUE) { 2749 PRIVATE (a_this)->input = NULL; 2750 } 2751 } 2752 2753 if (PRIVATE (a_this)->token_cache) { 2754 cr_token_destroy (PRIVATE (a_this)->token_cache); 2755 PRIVATE (a_this)->token_cache = NULL; 2756 } 2757 2758 if (PRIVATE (a_this)) { 2759 g_free (PRIVATE (a_this)); 2760 PRIVATE (a_this) = NULL; 2761 } 2762 2763 g_free (a_this); 2764} 2765