diff --git a/ast.h b/ast.h new file mode 100644 index 0000000..ce6cf01 --- /dev/null +++ b/ast.h @@ -0,0 +1,40 @@ +#pragma once +#include "hashtab.h" + +enum { + AST_DATTR_COMPTIME=0x1, + AST_DATTR_CONST=0x2, + AST_DATTR_VOLATILE=0x4, + AST_DATTR_ATOMIC=0x8, + AST_DATTR_STATIC=0x10, + AST_DATTR_THREAD_LOCAL=0x20, + AST_DATTR_COMPLEX=0x40, +}; + +typedef struct { + + uint32_t attrs; + + enum { + AST_DTYPE_VOID, + AST_DTYPE_SIGNED, + AST_DTYPE_UNSIGNED, + AST_DTYPE_FLOAT, + AST_DTYPE_STRUCT, + AST_DTYPE_UNION, + AST_DTYPE_FUNC, + AST_DTYPE_STR, + } fmt; + + size_t nptrs; + size_t nbytes; + +} ast_dtype_t; + +typedef struct ast_s { + + size_t ndtypes; + ast_dtype_t * dtypes; + hashtab_t dtypes_ht; + +} ast_t; diff --git a/hashtab.c b/hashtab.c new file mode 100644 index 0000000..70bad9e --- /dev/null +++ b/hashtab.c @@ -0,0 +1,23 @@ +#include "hashtab.h" + +#include + +int main() { + + hashtab_t ht = HASHTAB_INIT; + + hashtab_add(&ht, "aaa", 0); + hashtab_add(&ht, "ccc", 1); + hashtab_add(&ht, "bbb", 2); + + size_t h0 = hashtab_bsrch(&ht, hashtab_fnvhash("aaa", 3)); + printf("%lu (%lu, %s) ", h0, ht.indexes[h0], ht.keys[h0]); + + size_t h1 = hashtab_bsrch(&ht, hashtab_fnvhash("bbb", 3)); + printf("%lu (%lu, %s) ", h1, ht.indexes[h1], ht.keys[h1]); + + size_t h2 = hashtab_bsrch(&ht, hashtab_fnvhash("ccc", 3)); + printf("%lu (%lu, %s) ", h2, ht.indexes[h2], ht.keys[h2]); + putchar('\n'); + + return 0;} diff --git a/hashtab.h b/hashtab.h new file mode 100644 index 0000000..1cb5301 --- /dev/null +++ b/hashtab.h @@ -0,0 +1,144 @@ +#pragma once +/// Hash table for mapping string keys to indexes for another array + +#include "qsort.h" /*macro qsort implementation*/ + +#include +#include +#include + +typedef struct { + size_t size; + char ** keys; + uint64_t * hashes; + uint64_t * indexes; +} hashtab_t; + +#define HASHTAB_INIT {\ + .size = 0,\ + .keys = NULL,\ + .hashes = NULL,\ + .indexes = NULL\ +} + +static inline void hashtab_free(hashtab_t *ht) { + + for (size_t k=0; k < ht->size; k++) { + free( ht->keys[k] ); + } + + free( ht->keys ); + free( ht->hashes ); + free( ht->indexes ); +} + +static inline char *hashtab_strdup_len(const char *key, size_t key_len) { + + char *key2 = malloc( key_len + 1 ); + if ( key2 == NULL ) return NULL; + + memcpy(key2, key, key_len); + key2[key_len] = 0; + + return key2; +} + +static inline char *hashtab_strdup(const char *key) { + + size_t key_len = 0; + for (; key[key_len] != '\0'; key_len++); + + return hashtab_strdup_len(key, key_len); +} + +uint64_t hashtab_fnvhash(const char *s, size_t slen) { + + static const uint64_t prime = 0x100000001B3; + uint64_t result = 0xcbf29ce484222325; + + for (size_t k = 0; k < slen; k++) + result = (result * prime) ^ s[k]; + + return result; +} + +static inline int hashtab_add(hashtab_t *ht, const char *key, uint64_t idx) { + + char * key2 = hashtab_strdup(key); + if ( key2 == NULL ) return 0; + + ht->keys = realloc(ht->keys, (1 + ht->size) * sizeof(char*)); + if ( ht->keys == NULL ) { + free( key2 ); + return 0; + } + ht->keys[ht->size] = key2; + + ht->hashes = realloc(ht->hashes, (1 + ht->size) * sizeof(*ht->hashes)); + if ( ht->hashes == NULL ) return 0; + ht->hashes[ht->size] = hashtab_fnvhash(key, strlen(key)); + + ht->indexes = realloc(ht->indexes, (1 + ht->size) * sizeof(*ht->indexes)); + if ( ht->indexes == NULL ) return 0; + ht->indexes[ht->size] = idx; + ht->size++; + +#define HASHTAB_CMP_LT(N, M) (ht->hashes[N] < ht->hashes[M]) + + uint64_t tmp_hash; + uint64_t tmp_idx; + char * tmp_key; + +#define HASHTAB_SWAP(N, M) (\ + tmp_hash = ht->hashes[N],\ + tmp_idx = ht->indexes[N],\ + tmp_key = ht->keys[N],\ + ht->hashes[N] = ht->hashes[M],\ + ht->indexes[N] = ht->indexes[M],\ + ht->keys[N] = ht->keys[M],\ + ht->hashes[M] = tmp_hash,\ + ht->indexes[M] = tmp_idx,\ + ht->keys[M] = tmp_key\ +) + + QSORT(ht->size, HASHTAB_CMP_LT, HASHTAB_SWAP); + + return 1; +} + +static inline size_t hashtab_bsrch(const hashtab_t *ht, uint64_t hash) { + + size_t l = 0, r = ht->size - 1; + + if ( ht->size == 0 ) goto NOT_FOUND; + + while ( l <= r ) { + + size_t m = (l + r) >> 1; + + if ( ht->hashes[m] < hash ) { + + l = m + 1; + + } else if ( ht->hashes[m] > hash ) { + + if ( m == 0 ) goto NOT_FOUND; + r = m - 1; + + } else { + + return m; + } + } + +NOT_FOUND: + return SIZE_MAX; +} + +static inline uint64_t hashtab_find(const hashtab_t *ht, const char *key) { + + uint64_t hash = hashtab_fnvhash(key, strlen(key)); + size_t n = hashtab_bsrch(ht, hash); + if ( n == SIZE_MAX ) return UINT64_MAX; + return ht->indexes[n]; +} diff --git a/qsort.h b/qsort.h new file mode 100644 index 0000000..9ab426b --- /dev/null +++ b/qsort.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2013, 2017 Alexey Tourbin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This is a traditional Quicksort implementation which mostly follows + * [Sedgewick 1978]. Sorting is performed entirely on array indices, + * while actual access to the array elements is abstracted out with the + * user-defined `LESS` and `SWAP` primitives. + * + * Synopsis: + * QSORT(N, LESS, SWAP); + * where + * N - the number of elements in A[]; + * LESS(i, j) - compares A[i] to A[j]; + * SWAP(i, j) - exchanges A[i] with A[j]. + */ + +#ifndef QSORT_H +#define QSORT_H + +/* Sort 3 elements. */ +#define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \ +do { \ + if (Q_LESS(q_a2, q_a1)) { \ + if (Q_LESS(q_a3, q_a2)) \ + Q_SWAP(q_a1, q_a3); \ + else { \ + Q_SWAP(q_a1, q_a2); \ + if (Q_LESS(q_a3, q_a2)) \ + Q_SWAP(q_a2, q_a3); \ + } \ + } \ + else if (Q_LESS(q_a3, q_a2)) { \ + Q_SWAP(q_a2, q_a3); \ + if (Q_LESS(q_a2, q_a1)) \ + Q_SWAP(q_a1, q_a2); \ + } \ +} while (0) + +/* Partition [q_l,q_r] around a pivot. After partitioning, + * [q_l,q_j] are the elements that are less than or equal to the pivot, + * while [q_i,q_r] are the elements greater than or equal to the pivot. */ +#define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP) \ +do { \ + /* The middle element, not to be confused with the median. */ \ + Q_UINT q_m = q_l + ((q_r - q_l) >> 1); \ + /* Reorder the second, the middle, and the last items. \ + * As [Edelkamp Weiss 2016] explain, using the second element \ + * instead of the first one helps avoid bad behaviour for \ + * decreasingly sorted arrays. This method is used in recent \ + * versions of gcc's std::sort, see gcc bug 58437#c13, although \ + * the details are somewhat different (cf. #c14). */ \ + Q_SORT3(q_l + 1, q_m, q_r, Q_LESS, Q_SWAP); \ + /* Place the median at the beginning. */ \ + Q_SWAP(q_l, q_m); \ + /* Partition [q_l+2, q_r-1] around the median which is in q_l. \ + * q_i and q_j are initially off by one, they get decremented \ + * in the do-while loops. */ \ + q_i = q_l + 1; q_j = q_r; \ + while (1) { \ + do q_i++; while (Q_LESS(q_i, q_l)); \ + do q_j--; while (Q_LESS(q_l, q_j)); \ + if (q_i >= q_j) break; /* Sedgewick says "until j < i" */ \ + Q_SWAP(q_i, q_j); \ + } \ + /* Compensate for the i==j case. */ \ + q_i = q_j + 1; \ + /* Put the median to its final place. */ \ + Q_SWAP(q_l, q_j); \ + /* The median is not part of the left subfile. */ \ + q_j--; \ +} while (0) + +/* Insertion sort is applied to small subfiles - this is contrary to + * Sedgewick's suggestion to run a separate insertion sort pass after + * the partitioning is done. The reason I don't like a separate pass + * is that it triggers extra comparisons, because it can't see that the + * medians are already in their final positions and need not be rechecked. + * Since I do not assume that comparisons are cheap, I also do not try + * to eliminate the (q_j > q_l) boundary check. */ +#define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP) \ +do { \ + Q_UINT q_i, q_j; \ + /* For each item starting with the second... */ \ + for (q_i = q_l + 1; q_i <= q_r; q_i++) \ + /* move it down the array so that the first part is sorted. */ \ + for (q_j = q_i; q_j > q_l && (Q_LESS(q_j, q_j - 1)); q_j--) \ + Q_SWAP(q_j, q_j - 1); \ +} while (0) + +/* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to + * Q_THRESH, the algorithm performs recursive partitioning. When the size + * drops below Q_THRESH, the algorithm switches to insertion sort. + * The minimum valid value is probably 5 (with 5 items, the second and + * the middle items, the middle itself being rounded down, are distinct). */ +#define Q_THRESH 16 + +/* The main loop. */ +#define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP) \ +do { \ + Q_UINT q_l = 0; \ + Q_UINT q_r = (Q_N) - 1; \ + Q_UINT q_sp = 0; /* the number of frames pushed to the stack */ \ + struct { Q_UINT q_l, q_r; } \ + /* On 32-bit platforms, to sort a "char[3GB+]" array, \ + * it may take full 32 stack frames. On 64-bit CPUs, \ + * though, the address space is limited to 48 bits. \ + * The usage is further reduced if Q_N has a 32-bit type. */ \ + q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32]; \ + while (1) { \ + if (q_r - q_l + 1 >= Q_THRESH) { \ + Q_UINT q_i, q_j; \ + Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP); \ + /* Now have two subfiles: [q_l,q_j] and [q_i,q_r]. \ + * Dealing with them depends on which one is bigger. */ \ + if (q_j - q_l >= q_r - q_i) \ + Q_SUBFILES(q_l, q_j, q_i, q_r); \ + else \ + Q_SUBFILES(q_i, q_r, q_l, q_j); \ + } \ + else { \ + Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP); \ + /* Pop subfiles from the stack, until it gets empty. */ \ + if (q_sp == 0) break; \ + q_sp--; \ + q_l = q_st[q_sp].q_l; \ + q_r = q_st[q_sp].q_r; \ + } \ + } \ +} while (0) + +/* The missing part: dealing with subfiles. + * Assumes that the first subfile is not smaller than the second. */ +#define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2) \ +do { \ + /* If the second subfile is only a single element, it needs \ + * no further processing. The first subfile will be processed \ + * on the next iteration (both subfiles cannot be only a single \ + * element, due to Q_THRESH). */ \ + if (q_l2 == q_r2) { \ + q_l = q_l1; \ + q_r = q_r1; \ + } \ + else { \ + /* Otherwise, both subfiles need processing. \ + * Push the larger subfile onto the stack. */ \ + q_st[q_sp].q_l = q_l1; \ + q_st[q_sp].q_r = q_r1; \ + q_sp++; \ + /* Process the smaller subfile on the next iteration. */ \ + q_l = q_l2; \ + q_r = q_r2; \ + } \ +} while (0) + +/* And now, ladies and gentlemen, may I proudly present to you... */ +#define QSORT(Q_N, Q_LESS, Q_SWAP) \ +do { \ + if ((Q_N) > 1) \ + /* We could check sizeof(Q_N) and use "unsigned", but at least \ + * on x86_64, this has the performance penalty of up to 5%. */ \ + Q_LOOP(unsigned long, Q_N, Q_LESS, Q_SWAP); \ +} while (0) + +#endif + +/* ex:set ts=8 sts=4 sw=4 noet: */