#include #include #include #include #include #include #include struct lapis_node_s; typedef struct { size_t nnodes; struct lapis_node_s *nodes; } lapis_stmt_t; typedef struct { enum lapis_ttype { LAPIS_TTYPE_NONE, LAPIS_TTYPE_IDENT, LAPIS_TTYPE_STR, LAPIS_TTYPE_FLOAT, LAPIS_TTYPE_INT, LAPIS_TTYPE_BOOL, LAPIS_TTYPE_OPER, LAPIS_TTYPE_STMT_LIST, } type; union { struct { size_t nstmts; lapis_stmt_t *stmts; }; struct { size_t len; char * str; }; double f64; int64_t i64; _Bool bool_; }; } lapis_token_t; typedef struct lapis_node_s { lapis_token_t value; size_t lhs_idx, rhs_idx; } lapis_node_t; void lapis_node_print(const lapis_node_t *nodes, size_t idx) { if ( nodes[idx].value.type == LAPIS_TTYPE_OPER ) putchar('('); if ( nodes[idx].lhs_idx != SIZE_MAX ) { lapis_node_print(nodes, nodes[idx].lhs_idx); putchar(' '); } switch (nodes[idx].value.type){ default: break; case LAPIS_TTYPE_IDENT: case LAPIS_TTYPE_STR: case LAPIS_TTYPE_OPER: fwrite(nodes[idx].value.str, 1, nodes[idx].value.len, stdout); break; case LAPIS_TTYPE_FLOAT: printf("%lf", nodes[idx].value.f64); break; case LAPIS_TTYPE_INT: printf("%ld", nodes[idx].value.i64); break; case LAPIS_TTYPE_STMT_LIST: for (size_t n=0; n < nodes[idx].value.nstmts; n++) { lapis_node_print(nodes[idx].value.stmts[n].nodes, nodes[idx].value.stmts[n].nnodes-1); fputs("; ", stdout); } fputs("} ", stdout); break; } if ( nodes[idx].rhs_idx != SIZE_MAX ) { putchar(' '); lapis_node_print(nodes, nodes[idx].rhs_idx); if ( nodes[idx].value.str[0] == '[' ) putchar(' '); } if ( nodes[idx].value.type == LAPIS_TTYPE_OPER ) putchar(nodes[idx].value.str[0] == '[' ? ']' : ')'); } size_t lapis_node_count(const lapis_node_t *nodes, size_t idx) { size_t count = 1; if ( nodes[idx].lhs_idx != SIZE_MAX ) count += lapis_node_count(nodes, nodes[idx].lhs_idx); if ( nodes[idx].rhs_idx != SIZE_MAX ) count += lapis_node_count(nodes, nodes[idx].rhs_idx); return count; } /// Free a token's internal buffer if it has one void lapis_token_free(lapis_token_t *token) { if ( token->type == LAPIS_TTYPE_IDENT || token->type == LAPIS_TTYPE_OPER ) { if ( token->str != NULL ) { free( token->str ); token->str = NULL; token->len = 0; } } } lapis_node_t *lapis_parse_expr(int64_t *nnodes, FILE *f); lapis_token_t *lapis_parse_tokens(int64_t *ntokens, FILE *f) { size_t num_tokens = 0; lapis_token_t *tokens = NULL; lapis_token_t tmptok = { .type = LAPIS_TTYPE_NONE, .str = NULL, .len = 0 }; _Bool end_of_expression = false; int c; while ( !end_of_expression && ((c = fgetc(f)) != EOF) ) { if ( c == '{' ) { size_t nstmts = 0; lapis_stmt_t *stmts = NULL; while (1) { //puts("++++++++++"); int64_t nnodes; lapis_node_t *nodes = lapis_parse_expr(&nnodes, f); //puts("----------"); if ( nnodes < 0 ) goto ERROR; if ( nnodes == 0 ) break; //printf("%lu: ", nstmts); //lapis_node_print(nodes, nnodes-1); //putchar('\n'); nstmts++; stmts = realloc(stmts, nstmts * sizeof(lapis_stmt_t)); if ( stmts == NULL ) { fprintf(stderr, "ERROR[%d]: realloc failed!\n", __LINE__); goto ERROR; } stmts[nstmts-1] = (lapis_stmt_t) { .nnodes = nnodes, .nodes = nodes }; } if ( nstmts > 0 ) { tokens = realloc(tokens, (num_tokens + 2) * sizeof(lapis_token_t)); if ( tokens == NULL ) { fprintf(stderr, "ERROR [%s:%d]: realloc failed!\n", __func__, __LINE__); goto ERROR; } // need an operator to connect the statement list to the outer statement tokens[num_tokens].type = LAPIS_TTYPE_OPER; tokens[num_tokens].str = strdup("{"); tokens[num_tokens].len = 1; num_tokens++; // need an operator to connect the statement list to the outer statement tokens[num_tokens].type = LAPIS_TTYPE_STMT_LIST; tokens[num_tokens].stmts = stmts; tokens[num_tokens].nstmts = nstmts; num_tokens++; } c = ' '; } _Bool in_escape = false; // skip over comments if ( c == '#' ) { while ( (c = fgetc(f)) != '\n' && c != EOF ); if ( c == EOF ) break; } // detect line escapes if ( c == '\\' ) { in_escape = true; c = fgetc(f); if ( c == EOF ) break; } if ( c == ';' || c == '}' || (!in_escape && c == '\n') ) { if ( num_tokens || c == '}' ) end_of_expression = true; c = ' '; } enum lapis_ttype cur_ttype; if ( (tmptok.type == LAPIS_TTYPE_IDENT && isdigit(c)) || isalpha(c) || c == '_' ) { cur_ttype = LAPIS_TTYPE_IDENT; } else if ( isdigit(c) ) { cur_ttype = LAPIS_TTYPE_INT; } else if ( c == '"' ) { cur_ttype = LAPIS_TTYPE_STR; } else if ( ispunct(c) ) { cur_ttype = LAPIS_TTYPE_OPER; } else if ( isspace(c) ) { cur_ttype = LAPIS_TTYPE_NONE; } else { fprintf(stderr, "ERROR [%s]: Invalid char c='%c' at offset=%ld!\n", __func__, c, ftell(f)); goto ERROR; } if ( cur_ttype != tmptok.type ) { // store the previous token if it was valid if ( tmptok.type != LAPIS_TTYPE_NONE ) { tokens = realloc(tokens, (num_tokens + 1) * sizeof(lapis_token_t)); if ( tokens == NULL ) { fprintf(stderr, "ERROR [%s:%d]: realloc failed!\n", __func__, __LINE__); goto ERROR; } tokens[num_tokens] = tmptok; num_tokens++; } // initialize new token tmptok.type = cur_ttype; switch (cur_ttype) { default: break; case LAPIS_TTYPE_IDENT: case LAPIS_TTYPE_OPER: case LAPIS_TTYPE_STR: tmptok.str = NULL; tmptok.len = 0; break; // NOTE: float and bool will not occur during initial tokenization // they are only for results of reducing expressions later //case LAPIS_TTYPE_FLOAT: tmptok.f64 = 0.; break; //case LAPIS_TTYPE_BOOL: tmptok.bool_ = false; break; case LAPIS_TTYPE_INT: tmptok.i64 = 0l; break; } } if ( cur_ttype == LAPIS_TTYPE_STR ) { _Bool in_esc = false; while ( (c = fgetc(f)) != '"' || in_esc ) { if ( c == EOF ) { fprintf(stderr, "ERROR [%s]: EOF inside string token! offset=%ld\n", __func__, ftell(f)); goto ERROR; } if ( !in_esc && c == '\\' ) { in_esc = true; continue; } else { in_esc = false; } tmptok.str = realloc(tmptok.str, tmptok.len + 1); if ( tmptok.str == NULL ) { fprintf(stderr, "ERROR [%s:%d]: realloc failed!\n", __func__, __LINE__); goto ERROR; } tmptok.str[tmptok.len] = c; tmptok.len++; } continue; } // extend existing token switch (cur_ttype) { default: break; case LAPIS_TTYPE_IDENT: case LAPIS_TTYPE_OPER: tmptok.str = realloc(tmptok.str, tmptok.len + 1); if ( tmptok.str == NULL ) { fprintf(stderr, "ERROR [%s:%d]: realloc failed!\n", __func__, __LINE__); goto ERROR; } tmptok.str[tmptok.len] = c; tmptok.len++; break; case LAPIS_TTYPE_INT: tmptok.i64 *= 10; tmptok.i64 += (c - '0'); break; } } // handle float decimal point // FIXME known bug if '.' occurs after an operator w/o a space (e.g. +.234234 fails to parse as a float) for (size_t n=1; n < num_tokens; n++) { if ( tokens[n].type == LAPIS_TTYPE_OPER && tokens[n].len == 1 && tokens[n].str[0] == '.' ) { _Bool valid = false; double val = 0.; if ( tokens[n-1].type == LAPIS_TTYPE_INT ) { val += tokens[n-1].i64; tokens[n-1].type = LAPIS_TTYPE_NONE; valid = true; } if ( n+1 < num_tokens && tokens[n+1].type == LAPIS_TTYPE_INT ) { double den = 1; while ( den < tokens[n+1].i64 ) den *= 10; val += tokens[n+1].i64 / den; tokens[n+1].type = LAPIS_TTYPE_NONE; valid = true; } if ( !valid ) continue; tokens[n].type = LAPIS_TTYPE_FLOAT; tokens[n].f64 = val; } } // TODO handle sci notation (e.g. 3e[+-]6,3E[+-]6,...) *ntokens = num_tokens; return tokens; ERROR: if ( tokens ) { for (size_t n=0; n < num_tokens; n++) lapis_token_free(&tokens[n]); free(tokens); } *ntokens = -1; return NULL; } lapis_node_t *lapis_parse_expr(int64_t *nnodes, FILE *f) { lapis_token_t *tokens = NULL; int64_t *oper_priorities = NULL; uint8_t *oper_arg_form = NULL; lapis_node_t *nodes = NULL; int64_t ntokens=0; tokens = lapis_parse_tokens(&ntokens, stdin); if ( ntokens < 0 ) goto ERROR; if ( tokens == NULL ) { // likely EOF; exit cleanly *nnodes = 0; return NULL; } oper_priorities = malloc( ntokens * sizeof(int64_t) ); if ( oper_priorities == NULL ) goto ERROR; // 0 - no args, 1 - lhs, 2 - rhs, 3 - both oper_arg_form = malloc( ntokens * sizeof(uint8_t) ); if ( oper_arg_form == NULL ) goto ERROR; memset(oper_arg_form, 0, ntokens); size_t num_nodes = 0; #define ADD_NODE(NODE) \ (num_nodes++, nodes = realloc(nodes, num_nodes * sizeof(lapis_node_t)), nodes[num_nodes-1] = NODE) int64_t paren_level = 0; int64_t index_level = 0; for (size_t n=0; n < ntokens; n++) { if ( tokens[n].type != LAPIS_TTYPE_OPER ) { oper_priorities[n] = -1; continue; } int64_t prod=1; for (size_t k=0; k < tokens[n].len; k++) prod *= tokens[n].str[k]; switch (prod) { default: fprintf(stderr, "ERROR[%d]: unknown priority for operator '", __LINE__); fwrite(tokens[n].str, 1, tokens[n].len, stderr); fputs("'\n", stderr); goto ERROR; case 1ul*',': case 1ul*'{': oper_priorities[n] = 1; oper_arg_form[n] = 3; break; case 1ul*'.'*'.'*'.': oper_priorities[n] = 2; oper_arg_form[n] = 1; break; case 1ul*'=': case 1ul*'+'*'=': case 1ul*'-'*'=': case 1ul*'*'*'=': case 1ul*'/'*'=': case 1ul*'%'*'=': case 1ul*'<'*'<'*'=': case 1ul*'>'*'>'*'=': case 1ul*'&'*'=': case 1ul*'^'*'=': case 1ul*'|'*'=': oper_priorities[n] = 2; oper_arg_form[n] = 3; break; //?: (3) case 1ul*'|'*'|': oper_priorities[n] = 4; oper_arg_form[n] = 3; break; case 1ul*'&'*'&': oper_priorities[n] = 5; oper_arg_form[n] = 3; break; case 1ul*'|': oper_priorities[n] = 6; oper_arg_form[n] = 3; break; case 1ul*'^': oper_priorities[n] = 7; oper_arg_form[n] = 3; break; case 1ul*'&': oper_priorities[n] = 8; oper_arg_form[n] = 3; break; case 1ul*'='*'=': case 1ul*'!'*'=': oper_priorities[n] = 9; oper_arg_form[n] = 3; break; case 1ul*'<': case 1ul*'>': case 1ul*'<'*'=': case 1ul*'>'*'=': oper_priorities[n] = 10; oper_arg_form[n] = 3; break; case 1ul*'<'*'<': case 1ul*'>'*'>': oper_priorities[n] = 11; oper_arg_form[n] = 3; break; case 1ul*'+': case 1ul*'-': // check if actual binary operator or if this is a sign if ( n > 0 && tokens[n-1].type == LAPIS_TTYPE_OPER ) { oper_arg_form[n] = 2; /*rhs only*/ oper_priorities[n] = 14; } else { oper_arg_form[n] = 3; /*binary*/ oper_priorities[n] = 12; } break; case 1ul*'*': case 1ul*'/': case 1ul*'%': oper_priorities[n] = 13; oper_arg_form[n] = 3; break; case 1ul*':': oper_priorities[n] = 14; oper_arg_form[n] = 3; break; case 1ul*'!': case 1ul*'~': oper_priorities[n] = 14; oper_arg_form[n] = 2;/*rhs only*/ break; case 1ul*'+'*'+': case 1ul*'-'*'-': oper_priorities[n] = 15; oper_arg_form[n] = 3; break; case 1ul*'.': case 1ul*'-'*'>': oper_priorities[n] = 15; oper_arg_form[n] = 3; break; case 1ul*'(': // handle function call / keyword operator if ( n > 0 && tokens[n-1].type == LAPIS_TTYPE_IDENT ) { // treat the function identifier as a prefix operator tokens[n-1].type = LAPIS_TTYPE_OPER; oper_priorities[n-1] = 15 + (paren_level + index_level) * 16; oper_arg_form[n-1] = 2; // prefix-unary; takes only rhs } // clear the '(' as being an operator oper_priorities[n] = -2; tokens[n].type = LAPIS_TTYPE_NONE; paren_level++; break; case 1ul*')': if ( paren_level == 0 ) { fprintf(stderr, "ERROR(%d): unexpected ')'\n", __LINE__); goto ERROR; } // clear the ')' as being an operator oper_priorities[n] = -2; tokens[n].type = LAPIS_TTYPE_NONE; paren_level--; break; case 1ul*'[': oper_priorities[n] = -1; /* == 15 once the level is incremented */ oper_arg_form[n] = 3; index_level++; break; case 1ul*']': if ( index_level == 0 ) { fprintf(stderr, "ERROR(%d): unexpected ']'\n", __LINE__); goto ERROR; } // clear the ']' as being an operator oper_priorities[n] = -2; tokens[n].type = LAPIS_TTYPE_NONE; index_level--; break; } if ( oper_priorities[n] != -2 ) oper_priorities[n] += (paren_level + index_level) * 16; } while (1) { // seek the next highest priority operator int64_t max_prior = -1; size_t max_prior_idx = SIZE_MAX; for (size_t n=0; n < ntokens; n++) { if ( oper_priorities[n] < 0 ) continue; if ( oper_priorities[n] > max_prior ) { max_prior = oper_priorities[n]; max_prior_idx = n; } } // quit if no more operators were found if ( max_prior_idx == SIZE_MAX ) break; lapis_node_t node = { .value = tokens[max_prior_idx], .lhs_idx = SIZE_MAX, .rhs_idx = SIZE_MAX }; size_t update_lhs_idx = SIZE_MAX, update_rhs_idx = SIZE_MAX; if ( oper_arg_form[max_prior_idx] & 1 ) { /*get lhs if needed*/ size_t n = max_prior_idx; while ( n > 0 ) { n--; if ( tokens[n].type != LAPIS_TTYPE_NONE ) break; } if ( n == max_prior_idx || tokens[n].type == LAPIS_TTYPE_NONE ) { fprintf(stderr, "ERROR[%d]: Found no valid LHS operand for operator: ", __LINE__); fwrite(tokens[max_prior_idx].str, 1, tokens[max_prior_idx].len, stderr); fputc('\n', stderr); goto ERROR; } if ( tokens[n].type == LAPIS_TTYPE_OPER ) { if ( oper_priorities[n] >= -1 ) { fprintf(stderr, "ERROR[%d]: Attempted to take LHS as operand before evaluation!", __LINE__); fputs(" lhs=", stderr); fwrite(tokens[n].str, 1, tokens[n].len, stderr); fputs(" operator=", stderr); fwrite(tokens[max_prior_idx].str, 1, tokens[max_prior_idx].len, stderr); fputc('\n', stderr); goto ERROR; } // This is very weird, BUT as a vile hack I encode the node index // into the negative values of oper_priorities to simultaneously: // a) clear the priority so the next one can be found // b) store the index of the new node associated with the operator so // later lower priority dependent operators can find its node. int64_t node_idx = -(oper_priorities[n] + 2); assert( node_idx > 0 ); node.lhs_idx = node_idx; // since we are taking ownership of this node we must flag it for update update_lhs_idx = n; } else { lapis_node_t tmpnode = { .value = tokens[n], .lhs_idx = SIZE_MAX, .rhs_idx = SIZE_MAX }; ADD_NODE( tmpnode ); node.lhs_idx = num_nodes-1; } // clear token type to prevent reuse tokens[n].type = LAPIS_TTYPE_NONE; } if ( oper_arg_form[max_prior_idx] & 2 ) { /*get rhs if needed*/ size_t n = max_prior_idx; for (; n < ntokens; n++) { if ( n == max_prior_idx ) continue; if ( tokens[n].type != LAPIS_TTYPE_NONE ) break; } if ( n == max_prior_idx || tokens[n].type == LAPIS_TTYPE_NONE ) { fprintf(stderr, "ERROR[%d]: Found no valid RHS operand for operator: ", __LINE__); fwrite(tokens[max_prior_idx].str, 1, tokens[max_prior_idx].len, stderr); fputc('\n', stderr); goto ERROR; } if ( tokens[n].type == LAPIS_TTYPE_OPER ) { if ( oper_priorities[n] >= -1 ) { fprintf(stderr, "ERROR[%d]: Attempted to take RHS as operand before evaluation!", __LINE__); fputs(" rhs=", stderr); fwrite(tokens[n].str, 1, tokens[n].len, stderr); fputs(" operator=", stderr); fwrite(tokens[max_prior_idx].str, 1, tokens[max_prior_idx].len, stderr); fputc('\n', stderr); goto ERROR; } // This is very weird, BUT as a vile hack I encode the node index // into the negative values of oper_priorities to simultaneously: // a) clear the priority so the next one can be found // b) store the index of the new node associated with the operator so // later lower priority dependent operators can find its node. int64_t node_idx = -(oper_priorities[n] + 2); assert( node_idx > 0 ); node.rhs_idx = node_idx; // since we are taking ownership of this node we must flag it for update update_rhs_idx = n; } else { lapis_node_t tmpnode = { .value = tokens[n], .lhs_idx = SIZE_MAX, .rhs_idx = SIZE_MAX }; ADD_NODE( tmpnode ); node.rhs_idx = num_nodes-1; } // clear token type to prevent reuse tokens[n].type = LAPIS_TTYPE_NONE; } ADD_NODE( node ); int64_t idx = num_nodes - 1; printf("add oper token %lu w/ priority %ld as node %ld\n", max_prior_idx, oper_priorities[max_prior_idx], idx); // vile hack again, but on the setting side (see previous comment) oper_priorities[max_prior_idx] = -idx - 2; // We also have to update any previous operator nodes to point to our new // node as well. This is because those nodes will end up double owned otherwise. if ( update_lhs_idx != SIZE_MAX ) oper_priorities[update_lhs_idx] = -idx - 2; if ( update_rhs_idx != SIZE_MAX ) oper_priorities[update_rhs_idx] = -idx - 2; } if ( lapis_node_count(nodes, num_nodes-1) != num_nodes ) { // NOTE: this likely indicates a priority conflict of some kind between operators fprintf(stderr, "ERROR[%d]: Unknown syntax error! Orphan nodes detected!\n", __LINE__); goto ERROR; } if ( tokens != NULL ) free( tokens ); if ( oper_priorities != NULL ) free( oper_priorities ); if ( oper_arg_form != NULL ) free( oper_arg_form ); *nnodes = num_nodes; return nodes; ERROR: if ( tokens != NULL ) { for (size_t n=0; n < ntokens; n++) lapis_token_free(&tokens[n]); free(tokens); } if ( oper_priorities != NULL ) free( oper_priorities ); if ( oper_arg_form != NULL ) free( oper_arg_form ); if ( nodes != NULL ) free( nodes ); *nnodes = -1; return NULL; } int main() { #if 0 int64_t ntokens=0; lapis_token_t *tokens = lapis_parse_tokens(&ntokens, stdin); printf("Tokens (%lu):\n", ntokens); for (size_t n=0; n < ntokens; n++) { switch (tokens[n].type) { default: break; case LAPIS_TTYPE_STR: case LAPIS_TTYPE_IDENT: case LAPIS_TTYPE_OPER: fputs(" ", stdout); fwrite(tokens[n].str, 1, tokens[n].len, stdout); putchar('\n'); break; case LAPIS_TTYPE_INT: printf(" %ld (int)\n", tokens[n].i64); break; case LAPIS_TTYPE_FLOAT: printf(" %lf (float)\n", tokens[n].f64); break; } } #else while (1) { int64_t nnodes; lapis_node_t *nodes = lapis_parse_expr(&nnodes, stdin); if ( nnodes < 0 ) { fputs("Error occured while parsing expression!\n", stderr); return 1; } if ( nnodes == 0 ) break; lapis_node_print(nodes, nnodes-1); putchar('\n'); } #endif return 0; }