#include #include #include #include #include #include #include /*close()*/ /// realloc s1 and append s2 to it (modifies only s1) char *cstr_append(char *s1, const char *s2) { s1 = realloc(s1, strlen(s1) + strlen(s2) + 1); if ( s1 == NULL ) return NULL; strcat(s1, s2); return s1; } /** * Open a stream of the desired header (after 1st stage processing of the given CC program) * which includes all other headers it also includes and in addition the compiler evaluates * all macros and cleans out all comments for us. */ FILE *open_c_header(const char *cc_cmd, const char *hdr_fn) { char *cmd = strdup("echo \"#include <"); cmd = cstr_append(cmd, hdr_fn); if ( cmd == NULL ) return NULL; cmd = cstr_append(cmd, ">\" | "); if ( cmd == NULL ) return NULL; cmd = cstr_append(cmd, cc_cmd); if ( cmd == NULL ) return NULL; cmd = cstr_append(cmd, " -I . -E -"); if ( cmd == NULL ) return NULL; FILE *f = popen(cmd, "r"); free(cmd); return f; } /** * Run a header file through the compiler to validate it's syntax */ int valid_c_header(const char *cc_cmd, const char *hdr_fn) { static const char tmppath[] = "/tmp/."; char *test_fn = strdup(tmppath); if ( test_fn == NULL ) return 0; test_fn = cstr_append(test_fn, hdr_fn); if ( test_fn == NULL ) return 0; test_fn = cstr_append(test_fn, ".o.XXXXXX"); if ( test_fn == NULL ) return 0; // NOTE: This generates a unique filename by replacing the 'XXXXXX' with random chars then // opens the file. It MODIFIES its argument string to contain the random chars selected to // replace the 'XXXXXX' so after call if the filedesc is valid, test_fn is the true filename. int r = mkstemp(test_fn); if ( r == -1 ) { free( test_fn ); return 0; } close(r); char *cmd = strdup("echo \"#include <"); if ( cmd == NULL ) goto FAILRET; cmd = cstr_append(cmd, hdr_fn); if ( cmd == NULL ) goto FAILRET; cmd = cstr_append(cmd, ">\" | "); if ( cmd == NULL ) goto FAILRET; cmd = cstr_append(cmd, cc_cmd); if ( cmd == NULL ) goto FAILRET; cmd = cstr_append(cmd, " -I . -c -x c -o "); /*include ./; compile only, don't link; c syntax*/ if ( cmd == NULL ) goto FAILRET; cmd = cstr_append(cmd, test_fn); if ( cmd == NULL ) goto FAILRET; cmd = cstr_append(cmd, " -"); /*use stdin*/ if ( cmd == NULL ) goto FAILRET; puts(cmd); int ret = system(cmd); remove( test_fn ); free( test_fn ); free( cmd ); return (ret == 0); FAILRET: remove( test_fn ); free( test_fn ); if ( cmd ) free( cmd ); return 0; } /// Parse a series of keywords/identifiers until a non-kw/id is found indicating the end char ** parse_c_header_identkws(size_t *nidkws, int *c, FILE *f) { *nidkws = 0; char ** idkws = NULL; size_t cur_kw_len = 0; char * cur_kw = NULL; while ( (*c = fgetc(f)) != EOF ) { if ( isspace(*c) ) { // end of in progress word if ( cur_kw_len > 0 ) { idkws = realloc(idkws, (*nidkws + 1) * sizeof(char*)); assert( idkws != NULL ); idkws[*nidkws] = realloc(cur_kw, cur_kw_len+1); idkws[*nidkws][cur_kw_len] = 0; (*nidkws)++; cur_kw = NULL; cur_kw_len = 0; } } else if ( *c == '_' || isalnum(*c) ) { /* identifiers / keywords */ cur_kw = realloc(cur_kw, cur_kw_len + 1); assert( cur_kw != NULL ); cur_kw[cur_kw_len] = *c; cur_kw_len++; } else { /* anything else breaks */ break; } } // store any last in progress word if ( cur_kw_len > 0 ) { idkws = realloc(idkws, (*nidkws + 1) * sizeof(char*)); assert( idkws != NULL ); idkws[*nidkws] = realloc(cur_kw, cur_kw_len+1); idkws[*nidkws][cur_kw_len] = 0; (*nidkws)++; } return idkws; } char * parse_c_header_decl_body(int *c, size_t *len, FILE *f) { *len = 0; char * s = NULL; size_t level = 1; while ( (*c = fgetc(f)) != EOF && (*c != '}' || level > 1) ) { if ( *c == '{' ) level++; else if ( *c == '}' ) level--; s = realloc(s, *len + 1); s[*len] = *c; (*len)++; } return s; } char * parse_c_header_decl_args(int *c, size_t *len, FILE *f) { *len = 0; char * s = NULL; size_t level = 1; while ( (*c = fgetc(f)) != EOF && (*c != ')' || level > 1) ) { if ( *c == '(' ) level++; else if ( *c == ')' ) level--; s = realloc(s, *len + 1); s[*len] = (*c != ',') ? *c : ';'; (*len)++; } s = realloc(s, *len + 1); s[*len] = ';'; (*len)++; //fwrite(s, 1, *len, stdout); //putchar('\n'); return s; } /// Returns the length to the end of the next token delimited by delim starting /// from the previous value of *p before the call. size_t next_tok_len(char **p, char *p_end, char delim) { if ( p == NULL || *p > p_end ) return 0; char *end = memchr(*p, delim, p_end - *p); if ( end == NULL ) { if ( *p >= p_end ) return 0; // if any remains return it as the last word end = p_end; } size_t len = end - *p; *p = end + 1; return len; } /// Get pointer to and length of the last identifier word in statement string s char *parse_c_last_word(size_t *lwlen, char *s) { int64_t end_idx = -1, start_idx = -1; int got_first = 0; for (int64_t n = strlen(s) - 1; n >= 0; n--) { /* if ( s[n] == '[' || s[n] == ']' ) { *lwlen = 0; return NULL; } */ // if we reach a non-identifier char while in a possible identifier if ( end_idx > 0 && !isalnum(s[n]) && s[n] != '_' ) { // exit if we had a valid identifier (must start with [a-zA-Z_]) if ( got_first && start_idx > 0 ) break; got_first = 0; start_idx = -1; end_idx = -1; } // if in possible identifier if ( end_idx > 0 ) { got_first = isalpha(s[n]) || s[n] == '_'; start_idx = n; // start of possible identifier } else if ( isalnum(s[n]) || s[n] == '_' ) { end_idx = n + 1; } } // nothing found if ( !got_first || end_idx < 0 || start_idx < 0 ) { *lwlen = 0; return NULL; } *lwlen = end_idx - start_idx; return &s[start_idx]; } char ** parse_c_header(size_t *ndecl, FILE *f); /// Parse sub expression from buffer char * parse_c_header_sub(char *out, size_t *out_len, const char *sub, size_t sub_len) { FILE *fsub = fmemopen((void*)sub, sub_len, "r"); size_t n_sub_decl = 0; char ** sub_decl = parse_c_header(&n_sub_decl, fsub); for (size_t n=0; n < n_sub_decl; n++) { size_t lwlen = 0; char * lastword = parse_c_last_word(&lwlen, sub_decl[n]); //assert( lastword != NULL ); if ( lastword == NULL ) { lastword = sub_decl[n] + strlen(sub_decl[n]); lwlen = 0; } size_t type_len = (lastword - sub_decl[n]); size_t type_array_len = ((sub_decl[n] + strlen(sub_decl[n])) - (lastword + lwlen)); out = realloc(out, (*out_len) + type_len + type_array_len + 1); // add type identifiers and pointers memcpy(out + (*out_len), sub_decl[n], type_len); (*out_len) += type_len; // add array []'s memcpy(out + (*out_len), lastword + lwlen, type_array_len); (*out_len) += type_array_len; // NOTE: temporarily null terminate for calling parse_c_last_word again out[*out_len] = 0; size_t type_offset = *out_len - type_len - type_array_len; // check if there are any words in the type string. If not, lastword is part of type instead size_t type_lwlen; char * type_lastword = parse_c_last_word(&type_lwlen, out + type_offset); //fwrite(type_lastword, 1, type_lwlen, stdout); //putchar('\n'); out = realloc(out, (*out_len) + 2 + lwlen); // if lastword is a variable identifier if ( type_lastword != NULL ) { out[(*out_len)] = ','; (*out_len)++; } if ( lwlen > 0 ) { // add identifier memcpy(out + (*out_len), lastword, lwlen); (*out_len) += lwlen; } // if lastword is part of the type if ( type_lastword == NULL ) { out[(*out_len)] = ','; (*out_len)++; } out[(*out_len)] = ';'; (*out_len)++; } fclose(fsub); return out; } char ** parse_c_header(size_t *ndecl, FILE *f) { *ndecl = 0; char ** decl = NULL; size_t slen = 0; char * s = NULL; int64_t array_count = 0, peak_array_count = 0; char *func_name = NULL; int c; while ( (c = fgetc(f)) != EOF ) { // consume anything that starts with '#' until EOF if ( c == '#' ) { while ( (c = fgetc(f)) != EOF && c != '\n' ); if ( c == EOF ) break; } if ( c == '[' ) { array_count++; if ( array_count > peak_array_count ) peak_array_count = array_count; continue; } if ( c == ']' ) { array_count--; continue; } // if we are in an array definition we must skip over it to avoid // tripping up on operator keywords inside its size expression (e.g. sizeof) if ( array_count != 0 ) continue; // consume any function or struct body definitions if ( c == '{' ) { size_t body_len = 0; char * body = parse_c_header_decl_body(&c, &body_len, f); // We need to search leading keywords to see if struct/union is found int found_struct = 0; int found_union = 0; int found_enum = 0; size_t word_len = 0; char *p = s, *prev_p = s; while ( (word_len = next_tok_len(&p, s + slen, ' ')) != 0 ) { if ( word_len == (sizeof("struct")-1) && 0 == strncmp("struct", prev_p, word_len) ) { found_struct = 1; break; } if ( word_len == (sizeof("union")-1) && 0 == strncmp("union", prev_p, word_len) ) { found_union = 1; break; } if ( word_len == (sizeof("enum")-1) && 0 == strncmp("enum", prev_p, word_len) ) { found_enum = 1; break; } prev_p = p; } // IF and ONLY IF we are defining a struct/union preserve the body if ( (found_struct || found_union) && NULL == memchr(s, '(', slen) ) { s = realloc(s, slen+1); s[slen] = '{'; slen++; // parse declarations in recursive instance s = parse_c_header_sub(s, &slen, body, body_len); s = realloc(s, slen+1); s[slen] = '}'; slen++; } if ( found_enum ) { s = realloc(s, slen+1); s[slen] = '{'; slen++; // in C all enums store values as 'int' static const char typestr[] = "int, "; char * p = body; char * end_ptr; while ( (end_ptr = memchr(p, ',', body_len - (p - body))) != NULL ) { char * tmpp = memchr(p, '=', body_len - (end_ptr - body)); if ( tmpp != NULL ) *tmpp = 0; else *end_ptr = 0; size_t lwlen; char * lastword = parse_c_last_word(&lwlen, p); s = realloc(s, slen + (sizeof(typestr)-1) + lwlen + 1); memcpy(s + slen, typestr, sizeof(typestr)-1); slen += (sizeof(typestr)-1); memcpy(s + slen, lastword, lwlen); slen += lwlen; s[slen] = ';'; slen++; p = end_ptr + 1; } size_t lwlen; char * lastword = parse_c_last_word(&lwlen, p); if ( lastword != NULL ) { s = realloc(s, slen + (sizeof(typestr)-1) + lwlen + 2); memcpy(s + slen, typestr, sizeof(typestr)-1); slen += (sizeof(typestr)-1); memcpy(s + slen, lastword, lwlen); slen += lwlen; s[slen] = ';'; slen++; } s = realloc(s, slen+1); s[slen] = '}'; slen++; } if ( body != NULL ) free( body ); if ( c == EOF ) break; continue; } // consume any static variable assignments if ( c == '=' ) { while ( (c = fgetc(f)) != EOF && c != ';' ); if ( c == EOF ) break; } if ( c == '(' ) { size_t args_len = 0; char *args = parse_c_header_decl_args(&c, &args_len, f); int is_func_ptr = 0; for (size_t n=0; n < args_len; n++) { if ( args[n] == '*' ) is_func_ptr = 1; if ( !isspace(args[n]) ) break; } if ( is_func_ptr ) { while ( (c = fgetc(f)) != EOF && c != '(' ); if ( c == EOF ) { free( args ); break; } ungetc(c, f); // if we are not already in a func pointer decl, set the name if ( func_name == NULL ) { size_t lwlen; char *lastword = parse_c_last_word(&lwlen, args); if ( lastword ) { func_name = malloc( lwlen + 1 ); memcpy(func_name, lastword, lwlen); func_name[lwlen] = 0; } free( args ); } continue; } // remove any space between the function identifier and '(' if ( slen > 0 && s[slen-1] == ' ' ) slen--; //fwrite(s, 1, slen, stdout); //putchar('\n'); s = realloc(s, slen + 1); s[slen] = '('; slen++; s = parse_c_header_sub(s, &slen, args, args_len); free( args ); s = realloc(s, slen + 1); s[slen] = ')'; slen++; if ( func_name ) { size_t func_name_len = strlen(func_name); s = realloc(s, slen + 1 + func_name_len); s[slen] = ' '; slen++; memcpy(s + slen, func_name, func_name_len); slen += func_name_len; free( func_name ); func_name = NULL; } // consume remainder of function prototype definition while ( (c = fgetc(f)) != EOF && c != ';' && c != '{' ); if ( c == '{' ) { // need to consume static function bodies here instead to avoid a weird bug size_t body_len = 0; char * body = parse_c_header_decl_body(&c, &body_len, f); free( body ); } if ( c == EOF ) break; } // end of declaration if ( c == ';' || c == '}' ) { s = realloc(s, slen + 2 * peak_array_count + 1); for (size_t n=0; n < peak_array_count; n++) { s[slen] = '['; slen++; s[slen] = ']'; slen++; } s[slen] = 0; slen++; decl = realloc(decl, (*ndecl + 1) * sizeof(char*)); decl[*ndecl] = s; (*ndecl)++; s = NULL; slen = 0; continue; } if ( isspace(c) ) { // add space to the buffer but de-duplicate any repeats if ( slen != 0 && s[slen-1] != ' ' ) { s = realloc(s, slen + 1); s[slen] = ' '; slen++; } continue; } s = realloc(s, slen + 1); s[slen] = c; slen++; } return decl; } int main(int argc, char *argv[]) { if ( argc <= 1 ) return 1; const char *cmd = getenv("CC"); cmd = (cmd == NULL) ? "cc" : cmd; FILE *fhdr = open_c_header(cmd, argv[1]); if ( fhdr == NULL ) { perror(argv[1]); return 1; } size_t ndecl = 0; char **decl = parse_c_header(&ndecl, fhdr); int r = pclose(fhdr); if ( r != 0 ) { fprintf(stdout, "[ERROR] Failed to read c-header '%s', CC=%s returned %d\n", argv[1], cmd, r); return 1; } if ( !valid_c_header(cmd, argv[1]) ) { fprintf(stdout, "[ERROR] System CC=%s failed to validate the syntax of c-header: %s\n", cmd, argv[1]); return 1; } for (size_t n=0; n < ndecl; n++) { puts(decl[n]); } return 0; }