/************************************************************************ * grammer - src/parse.c * Copyright (C) 2002 Marcello Barnaba * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 1, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * parser engine . . . */ #include "parse.h" Grammar *acquire_grammar(void) { Grammar *g = xmalloc(sizeof(Grammar)); Part *p; do outf("Insert the Terminals, separated by spaces . . "); while(!(g->X = acquire_list(NULL, FLAGS_NONE))); do outf("Insert the NonTerminals, separated by spaces . . "); while(!(g->V = acquire_list(NULL, FLAGS_NONE))); do outf("Insert the Start Symbol . . "); while(!(g->S = acquire_part(NULL, FLAGS_NONE))); if((p = find_part(g->V, g->S))) { xfree(g->S); g->S = p; } else add_part(&g->V, &g->S); signal(SIGINT, parser_sigint_handler); outf("Insert the Productions, one per line, ctrl+c+ to stop . . \n"); g->P = acquire_prod(); signal(SIGINT, SIG_DFL); return g; } Part *acquire_part(char *inbuf, short flags) { Part *ret; register char *p = NULL; ret = xmalloc(sizeof(Part)); if(!inbuf) inbuf = acquire_string(fileno(in), '\n'); for(p = inbuf; *p; p++) { if(isalpha(*p)) { if(ret->id) { xfree(ret); return NULL; } ret->id = *p; } else if(isdigit(*p)) { if(!ret->id) { xfree(ret); return NULL; } else { ret->n = strtoul(p, NULL, 10); return ret; } } else if(*p == OR_SYMBOL && flags & FLAGS_ACQUIRE_OR) { if(ret->id || ret->n) { xfree(ret); return NULL; } ret->id = *p; ret->n = 0; return ret; } } return ret->id ? ret : NULL; } List *acquire_list(char *inbuf, short flags) { char *str; register char *p, *q; List *arr = NULL, *head = NULL; Part *pt = NULL; unsigned short i = 0; unsigned char state = STATE_IDLE, prev_state = STATE_IDLE; str = inbuf ? inbuf : acquire_string(fileno(in), '\n'); HEAD(arr, head, List); /* sentinel */ for(q = p = str; ; p++) { if((isalpha(*p)) && state == STATE_IDLE) { state = STATE_SYMBOL; continue; } if(*p == OR_SYMBOL) { if(prev_state == STATE_SYMBOL && flags & FLAGS_ACQUIRE_OR) state = STATE_OR; else { q = p + 1; state = STATE_IDLE; outf("syntax error: `%c' not allowed here.\n", *p); } continue; } if(isdigit(*p) && state == STATE_SYMBOL) continue; if((*p == ' ' || !*p) && (state == STATE_SYMBOL || state == STATE_OR)) { *p = '\0'; i++; NEW(arr, List); if((pt = acquire_part(q, flags)) && (!find_part(head->next, pt) || flags & FLAGS_ACQUIRE_DUPS)) { ASSIGN(arr, item, pt); NEXT(arr); prev_state = state; } else outf("syntax error: item n. %d %s [%s]\n", i, pt ? "duplicate" : "invalid", q); q = p + 1; state = STATE_IDLE; continue; } if(*p == ' ') continue; if (!*p) break; } if(prev_state == STATE_OR) { free_list(head->next); head->next = NULL; outf("syntax error: productions cannot terminate with `%c'. production ignored.\n", OR_SYMBOL); } CUT(arr, head); if(!inbuf) xfree(str); return head; } /* simple strtok replacement . . `cause libc`s strtok *sux*: * from strtok(3) man page . . * ... * * BUGS * Never use these functions. * ... */ static char **tokenize(char *in) { static char *ret[2]; register char *p; for (p = in; *p; p++) { if(*p == '-' && *++p == '>') { unsigned short l0, l1; l0 = p - in; l1 = strlen(++p) + 1; if(l0 && l1) { ret[0] = xmalloc(l0--); ret[1] = xmalloc(l1--); memcpy(ret[0], in, l0); memcpy(ret[1], p, l1); return ret; } return NULL; } } return NULL; } static char prod_end = 0; void parser_sigint_handler(int sig) { outf("ctrl+c pressed, press enter to end insertion."); prod_end = 1; return; } Prod *acquire_prod() { Prod *head = NULL, *p = NULL; List *sx, *dx; char *buf, **s; HEAD(p, head, Prod); while((buf = acquire_string(fileno(in), '\n'))) { if(prod_end) { if(head->next) break; else { outf("production list is empty. insert a production and then ask to end.\n"); prod_end = 0; continue; } } if((s = tokenize(buf))) { sx = acquire_list(s[0], FLAGS_ACQUIRE_DUPS); dx = acquire_list(s[1], FLAGS_ACQUIRE_DUPS | FLAGS_ACQUIRE_OR); xfree(s[0]); xfree(s[1]); if(sx && dx) { NEW(p, Prod); ASSIGN(p, sx, sx); ASSIGN(p, dx, dx); NEXT(p); } else { if(sx) xfree(sx); if(dx) xfree(dx); } } else outf("bad line\n"); } CUT(p, head); return head; }