in wip c header parser: parse function decls into the same form as structs

This commit is contained in:
icst 2024-06-28 22:03:37 -04:00
parent be69ebfaf5
commit c4f47f11e5

View file

@ -92,6 +92,7 @@ char ** parse_c_header_identkws(size_t *nidkws, int *c, FILE *f) {
return idkws;
}
#if 0
int64_t parse_c_header_int(int *c, FILE *f) {
size_t n=0;
@ -110,7 +111,6 @@ int64_t parse_c_header_int(int *c, FILE *f) {
return strtol(buf, NULL, 0);
}
#if 0
typedef struct {
size_t n;
char ** strs;
@ -220,6 +220,25 @@ char * parse_c_header_decl_body(int *c, size_t *len, FILE *f) {
return s;
}
char * parse_c_header_decl_args(int *c, size_t *len, FILE *f) {
*len = 0;
char * s = NULL;
size_t level = 1;
while ( (*c = fgetc(f)) != EOF && (*c != ')' || level > 1) ) {
if ( *c == '(' ) level++;
else if ( *c == ')' ) level--;
s = realloc(s, *len + 1);
s[*len] = (*c != ',') ? *c : ';';
(*len)++;
}
return s;
}
/// Returns the length to the end of the next token delimited by delim starting
/// from the previous value of *p before the call.
size_t next_tok_len(char **p, char *p_end, char delim) {
@ -290,6 +309,53 @@ char *parse_c_last_word(size_t *lwlen, char *s) {
return &s[start_idx];
}
char ** parse_c_header(size_t *ndecl, FILE *f);
/// Parse sub expression from buffer
char * parse_c_header_sub(char *out, size_t *out_len, const char *sub, size_t sub_len) {
FILE *fsub = fmemopen(sub, sub_len, "r");
size_t n_sub_decl = 0;
char ** sub_decl = parse_c_header(&n_sub_decl, fsub);
for (size_t n=0; n < n_sub_decl; n++) {
size_t lwlen = 0;
char * lastword = parse_c_last_word(&lwlen, sub_decl[n]);
assert( lastword != NULL );
size_t type_len = (lastword - sub_decl[n]);
size_t type_array_len = ((sub_decl[n] + strlen(sub_decl[n])) - (lastword + lwlen));
out = realloc(out, (*out_len) + type_len + type_array_len);
// add type identifiers and pointers
memcpy(out + (*out_len), sub_decl[n], type_len);
(*out_len) += type_len;
// add array []'s
memcpy(out + (*out_len), lastword + lwlen, type_array_len);
(*out_len) += type_array_len;
out = realloc(out, (*out_len) + 2 + lwlen);
out[(*out_len)] = ',';
(*out_len)++;
// add identifier
memcpy(out + (*out_len), lastword, lwlen);
(*out_len) += lwlen;
out[(*out_len)] = ';';
(*out_len)++;
}
fclose(fsub);
return out;
}
char ** parse_c_header(size_t *ndecl, FILE *f) {
*ndecl = 0;
@ -329,9 +395,6 @@ char ** parse_c_header(size_t *ndecl, FILE *f) {
size_t body_len = 0;
char * body = parse_c_header_decl_body(&c, &body_len, f);
//fwrite(body, 1, body_len, stdout);
//putchar('\n');
// We need to search leading keywords to see if struct/union is found
int found_struct = 0;
int found_union = 0;
@ -356,83 +419,16 @@ char ** parse_c_header(size_t *ndecl, FILE *f) {
// IF and ONLY IF we are defining a struct/union preserve the body
if ( (found_struct || found_union) && NULL == memchr(s, '(', slen) ) {
// handle body definitions recursively
FILE *fsub = fmemopen(body, body_len, "r");
size_t n_sub_decl = 0;
char ** sub_decl = parse_c_header(&n_sub_decl, fsub);
/*
for (size_t n=0; n < n_sub_decl; n++) {
size_t word_len = 0;
char *p = sub_decl[n], *prev_p = sub_decl[n];
fputs(" > ", stdout);
while ( (word_len = next_tok_len(&p, sub_decl[n] + strlen(sub_decl[n]), ' ')) != 0 ) {
putchar('"');
fwrite(prev_p, 1, word_len, stdout);
putchar('"');
putchar(' ');
prev_p = p;
}
putchar('\n');
}
puts("----------");
*/
s = realloc(s, slen+1);
s[slen] = '{';
slen++;
for (size_t n=0; n < n_sub_decl; n++) {
size_t lwlen = 0;
char * lastword = parse_c_last_word(&lwlen, sub_decl[n]);
if ( lastword == NULL ) {
fprintf(stderr, "ERROR: identifier not found in s=\"%s\" body=", sub_decl[n]);
fwrite(body, 1, body_len, stdout);
putchar('\n');
continue;
}
size_t type_len = (lastword - sub_decl[n]);
size_t type_array_len = ((sub_decl[n] + strlen(sub_decl[n])) - (lastword + lwlen));
s = realloc(s, slen + type_len + type_array_len);
memcpy(s + slen, sub_decl[n], type_len);
slen += type_len;
memcpy(s + slen, lastword + lwlen, type_array_len);
slen += type_array_len;
s = realloc(s, slen + 2 + lwlen);
s[slen] = ',';
slen++;
memcpy(s + slen, lastword, lwlen);
slen += lwlen;
s[slen] = ';';
slen++;
//fputs("@@@ ", stdout);
//fwrite(lastword, 1, lwlen, stdout);
//putchar('\n');
}
//puts("----------");
// parse declarations in recursive instance
s = parse_c_header_sub(s, &slen, body, body_len);
s = realloc(s, slen+1);
s[slen] = '}';
slen++;
/*
s = realloc(s, slen + body_len);
memcpy(s + slen, body, body_len);
slen += body_len;
*/
}
if ( body != NULL ) free( body );
@ -456,27 +452,12 @@ char ** parse_c_header(size_t *ndecl, FILE *f) {
s[slen] = '(';
slen++;
while ( (c = fgetc(f)) != EOF && c != ')' ) {
size_t args_len = 0;
char *args = parse_c_header_decl_args(&c, &args_len, f);
if ( !isspace(c) ) {
s = parse_c_header_sub(s, &slen, args, args_len);
s = realloc(s, slen + 1);
s[slen] = c;
slen++;
} else if ( slen > 0 && s[slen-1] != ' ' ) {
s = realloc(s, slen + 1);
s[slen] = ' ';
slen++;
}
}
if ( c == EOF ) break;
s = realloc(s, slen + 2);
// add extra ',' for parsing consistency
s[slen] = ',';
slen++;
s = realloc(s, slen + 1);
s[slen] = ')';
slen++;
@ -505,9 +486,6 @@ char ** parse_c_header(size_t *ndecl, FILE *f) {
decl[*ndecl] = s;
(*ndecl)++;
//fwrite(s, 1, slen, stdout);
//putchar('\n');
s = NULL;
slen = 0;
continue;
@ -526,8 +504,6 @@ char ** parse_c_header(size_t *ndecl, FILE *f) {
s = realloc(s, slen + 1);
s[slen] = c;
slen++;
//putchar(c);
}
return decl;
@ -537,7 +513,9 @@ int main(int argc, char *argv[]) {
if ( argc <= 1 ) return 1;
FILE *fhdr = open_c_header("cc", argv[1]);
const char *cmd = getenv("CC");
FILE *fhdr = open_c_header(((cmd == NULL) ? "cc" : cmd), argv[1]);
if ( fhdr == NULL ) {
perror(argv[1]);