hashmap.c (36577B)
1 // Copyright 2020 Joshua J Baker. All rights reserved. 2 // Use of this source code is governed by an MIT-style 3 // license that can be found in the LICENSE file. 4 5 #include <stdio.h> 6 #include <string.h> 7 #include <stdlib.h> 8 #include <stdint.h> 9 #include <stddef.h> 10 #include "hashmap.h" 11 12 #define GROW_AT 0.60 13 #define SHRINK_AT 0.10 14 15 static void *(*__malloc)(size_t) = NULL; 16 static void *(*__realloc)(void *, size_t) = NULL; 17 static void (*__free)(void *) = NULL; 18 19 // hashmap_set_allocator allows for configuring a custom allocator for 20 // all hashmap library operations. This function, if needed, should be called 21 // only once at startup and a prior to calling hashmap_new(). 22 void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*)) { 23 __malloc = malloc; 24 __free = free; 25 } 26 27 struct bucket { 28 uint64_t hash:48; 29 uint64_t dib:16; 30 }; 31 32 // hashmap is an open addressed hash map using robinhood hashing. 33 struct hashmap { 34 void *(*malloc)(size_t); 35 void *(*realloc)(void *, size_t); 36 void (*free)(void *); 37 size_t elsize; 38 size_t cap; 39 uint64_t seed0; 40 uint64_t seed1; 41 uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1); 42 int (*compare)(const void *a, const void *b, void *udata); 43 void (*elfree)(void *item); 44 void *udata; 45 size_t bucketsz; 46 size_t nbuckets; 47 size_t count; 48 size_t mask; 49 size_t growat; 50 size_t shrinkat; 51 uint8_t growpower; 52 bool oom; 53 void *buckets; 54 void *spare; 55 void *edata; 56 }; 57 58 void hashmap_set_grow_by_power(struct hashmap *map, size_t power) { 59 map->growpower = power < 1 ? 1 : power > 16 ? 16 : power; 60 } 61 62 static struct bucket *bucket_at0(void *buckets, size_t bucketsz, size_t i) { 63 return (struct bucket*)(((char*)buckets)+(bucketsz*i)); 64 } 65 66 static struct bucket *bucket_at(struct hashmap *map, size_t index) { 67 return bucket_at0(map->buckets, map->bucketsz, index); 68 } 69 70 static void *bucket_item(struct bucket *entry) { 71 return ((char*)entry)+sizeof(struct bucket); 72 } 73 74 static uint64_t clip_hash(uint64_t hash) { 75 return hash & 0xFFFFFFFFFFFF; 76 } 77 78 static uint64_t get_hash(struct hashmap *map, const void *key) { 79 return clip_hash(map->hash(key, map->seed0, map->seed1)); 80 } 81 82 // hashmap_new_with_allocator returns a new hash map using a custom allocator. 83 // See hashmap_new for more information information 84 struct hashmap *hashmap_new_with_allocator(void *(*_malloc)(size_t), 85 void *(*_realloc)(void*, size_t), void (*_free)(void*), 86 size_t elsize, size_t cap, uint64_t seed0, uint64_t seed1, 87 uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1), 88 int (*compare)(const void *a, const void *b, void *udata), 89 void (*elfree)(void *item), 90 void *udata) 91 { 92 _malloc = _malloc ? _malloc : __malloc ? __malloc : malloc; 93 _realloc = _realloc ? _realloc : __realloc ? __realloc : realloc; 94 _free = _free ? _free : __free ? __free : free; 95 size_t ncap = 16; 96 if (cap < ncap) { 97 cap = ncap; 98 } else { 99 while (ncap < cap) { 100 ncap *= 2; 101 } 102 cap = ncap; 103 } 104 // printf("%d\n", (int)cap); 105 size_t bucketsz = sizeof(struct bucket) + elsize; 106 while (bucketsz & (sizeof(uintptr_t)-1)) { 107 bucketsz++; 108 } 109 // hashmap + spare + edata 110 size_t size = sizeof(struct hashmap)+bucketsz*2; 111 struct hashmap *map = _malloc(size); 112 if (!map) { 113 return NULL; 114 } 115 memset(map, 0, sizeof(struct hashmap)); 116 map->elsize = elsize; 117 map->bucketsz = bucketsz; 118 map->seed0 = seed0; 119 map->seed1 = seed1; 120 map->hash = hash; 121 map->compare = compare; 122 map->elfree = elfree; 123 map->udata = udata; 124 map->spare = ((char*)map)+sizeof(struct hashmap); 125 map->edata = (char*)map->spare+bucketsz; 126 map->cap = cap; 127 map->nbuckets = cap; 128 map->mask = map->nbuckets-1; 129 map->buckets = _malloc(map->bucketsz*map->nbuckets); 130 if (!map->buckets) { 131 _free(map); 132 return NULL; 133 } 134 memset(map->buckets, 0, map->bucketsz*map->nbuckets); 135 map->growpower = 1; 136 map->growat = map->nbuckets*GROW_AT; 137 map->shrinkat = map->nbuckets*SHRINK_AT; 138 map->malloc = _malloc; 139 map->realloc = _realloc; 140 map->free = _free; 141 return map; 142 } 143 144 // hashmap_new returns a new hash map. 145 // Param `elsize` is the size of each element in the tree. Every element that 146 // is inserted, deleted, or retrieved will be this size. 147 // Param `cap` is the default lower capacity of the hashmap. Setting this to 148 // zero will default to 16. 149 // Params `seed0` and `seed1` are optional seed values that are passed to the 150 // following `hash` function. These can be any value you wish but it's often 151 // best to use randomly generated values. 152 // Param `hash` is a function that generates a hash value for an item. It's 153 // important that you provide a good hash function, otherwise it will perform 154 // poorly or be vulnerable to Denial-of-service attacks. This implementation 155 // comes with two helper functions `hashmap_sip()` and `hashmap_murmur()`. 156 // Param `compare` is a function that compares items in the tree. See the 157 // qsort stdlib function for an example of how this function works. 158 // The hashmap must be freed with hashmap_free(). 159 // Param `elfree` is a function that frees a specific item. This should be NULL 160 // unless you're storing some kind of reference data in the hash. 161 struct hashmap *hashmap_new(size_t elsize, size_t cap, uint64_t seed0, 162 uint64_t seed1, 163 uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1), 164 int (*compare)(const void *a, const void *b, void *udata), 165 void (*elfree)(void *item), 166 void *udata) 167 { 168 return hashmap_new_with_allocator(NULL, NULL, NULL, elsize, cap, seed0, 169 seed1, hash, compare, elfree, udata); 170 } 171 172 static void free_elements(struct hashmap *map) { 173 if (map->elfree) { 174 for (size_t i = 0; i < map->nbuckets; i++) { 175 struct bucket *bucket = bucket_at(map, i); 176 if (bucket->dib) map->elfree(bucket_item(bucket)); 177 } 178 } 179 } 180 181 // hashmap_clear quickly clears the map. 182 // Every item is called with the element-freeing function given in hashmap_new, 183 // if present, to free any data referenced in the elements of the hashmap. 184 // When the update_cap is provided, the map's capacity will be updated to match 185 // the currently number of allocated buckets. This is an optimization to ensure 186 // that this operation does not perform any allocations. 187 void hashmap_clear(struct hashmap *map, bool update_cap) { 188 map->count = 0; 189 free_elements(map); 190 if (update_cap) { 191 map->cap = map->nbuckets; 192 } else if (map->nbuckets != map->cap) { 193 void *new_buckets = map->malloc(map->bucketsz*map->cap); 194 if (new_buckets) { 195 map->free(map->buckets); 196 map->buckets = new_buckets; 197 } 198 map->nbuckets = map->cap; 199 } 200 memset(map->buckets, 0, map->bucketsz*map->nbuckets); 201 map->mask = map->nbuckets-1; 202 map->growat = map->nbuckets*0.75; 203 map->shrinkat = map->nbuckets*0.10; 204 } 205 206 static bool resize0(struct hashmap *map, size_t new_cap) { 207 struct hashmap *map2 = hashmap_new_with_allocator(map->malloc, map->realloc, 208 map->free, map->elsize, new_cap, map->seed0, map->seed1, map->hash, 209 map->compare, map->elfree, map->udata); 210 if (!map2) return false; 211 for (size_t i = 0; i < map->nbuckets; i++) { 212 struct bucket *entry = bucket_at(map, i); 213 if (!entry->dib) { 214 continue; 215 } 216 entry->dib = 1; 217 size_t j = entry->hash & map2->mask; 218 while(1) { 219 struct bucket *bucket = bucket_at(map2, j); 220 if (bucket->dib == 0) { 221 memcpy(bucket, entry, map->bucketsz); 222 break; 223 } 224 if (bucket->dib < entry->dib) { 225 memcpy(map2->spare, bucket, map->bucketsz); 226 memcpy(bucket, entry, map->bucketsz); 227 memcpy(entry, map2->spare, map->bucketsz); 228 } 229 j = (j + 1) & map2->mask; 230 entry->dib += 1; 231 } 232 } 233 map->free(map->buckets); 234 map->buckets = map2->buckets; 235 map->nbuckets = map2->nbuckets; 236 map->mask = map2->mask; 237 map->growat = map2->growat; 238 map->shrinkat = map2->shrinkat; 239 map->free(map2); 240 return true; 241 } 242 243 static bool resize(struct hashmap *map, size_t new_cap) { 244 return resize0(map, new_cap); 245 } 246 247 // hashmap_set_with_hash works like hashmap_set but you provide your 248 // own hash. The 'hash' callback provided to the hashmap_new function 249 // will not be called 250 const void *hashmap_set_with_hash(struct hashmap *map, const void *item, 251 uint64_t hash) 252 { 253 hash = clip_hash(hash); 254 map->oom = false; 255 if (map->count == map->growat) { 256 if (!resize(map, map->nbuckets*(1<<map->growpower))) { 257 map->oom = true; 258 return NULL; 259 } 260 } 261 262 struct bucket *entry = map->edata; 263 entry->hash = hash; 264 entry->dib = 1; 265 void *eitem = bucket_item(entry); 266 memcpy(eitem, item, map->elsize); 267 268 void *bitem; 269 size_t i = entry->hash & map->mask; 270 while(1) { 271 struct bucket *bucket = bucket_at(map, i); 272 if (bucket->dib == 0) { 273 memcpy(bucket, entry, map->bucketsz); 274 map->count++; 275 return NULL; 276 } 277 bitem = bucket_item(bucket); 278 if (entry->hash == bucket->hash && (!map->compare || 279 map->compare(eitem, bitem, map->udata) == 0)) 280 { 281 memcpy(map->spare, bitem, map->elsize); 282 memcpy(bitem, eitem, map->elsize); 283 return map->spare; 284 } 285 if (bucket->dib < entry->dib) { 286 memcpy(map->spare, bucket, map->bucketsz); 287 memcpy(bucket, entry, map->bucketsz); 288 memcpy(entry, map->spare, map->bucketsz); 289 eitem = bucket_item(entry); 290 } 291 i = (i + 1) & map->mask; 292 entry->dib += 1; 293 } 294 } 295 296 // hashmap_set inserts or replaces an item in the hash map. If an item is 297 // replaced then it is returned otherwise NULL is returned. This operation 298 // may allocate memory. If the system is unable to allocate additional 299 // memory then NULL is returned and hashmap_oom() returns true. 300 const void *hashmap_set(struct hashmap *map, const void *item) { 301 return hashmap_set_with_hash(map, item, get_hash(map, item)); 302 } 303 304 // hashmap_get_with_hash works like hashmap_get but you provide your 305 // own hash. The 'hash' callback provided to the hashmap_new function 306 // will not be called 307 const void *hashmap_get_with_hash(struct hashmap *map, const void *key, 308 uint64_t hash) 309 { 310 hash = clip_hash(hash); 311 size_t i = hash & map->mask; 312 while(1) { 313 struct bucket *bucket = bucket_at(map, i); 314 if (!bucket->dib) return NULL; 315 if (bucket->hash == hash) { 316 void *bitem = bucket_item(bucket); 317 if (!map->compare || map->compare(key, bitem, map->udata) == 0) { 318 return bitem; 319 } 320 } 321 i = (i + 1) & map->mask; 322 } 323 } 324 325 // hashmap_get returns the item based on the provided key. If the item is not 326 // found then NULL is returned. 327 const void *hashmap_get(struct hashmap *map, const void *key) { 328 return hashmap_get_with_hash(map, key, get_hash(map, key)); 329 } 330 331 // hashmap_probe returns the item in the bucket at position or NULL if an item 332 // is not set for that bucket. The position is 'moduloed' by the number of 333 // buckets in the hashmap. 334 const void *hashmap_probe(struct hashmap *map, uint64_t position) { 335 size_t i = position & map->mask; 336 struct bucket *bucket = bucket_at(map, i); 337 if (!bucket->dib) { 338 return NULL; 339 } 340 return bucket_item(bucket); 341 } 342 343 // hashmap_delete_with_hash works like hashmap_delete but you provide your 344 // own hash. The 'hash' callback provided to the hashmap_new function 345 // will not be called 346 const void *hashmap_delete_with_hash(struct hashmap *map, const void *key, 347 uint64_t hash) 348 { 349 hash = clip_hash(hash); 350 map->oom = false; 351 size_t i = hash & map->mask; 352 while(1) { 353 struct bucket *bucket = bucket_at(map, i); 354 if (!bucket->dib) { 355 return NULL; 356 } 357 void *bitem = bucket_item(bucket); 358 if (bucket->hash == hash && (!map->compare || 359 map->compare(key, bitem, map->udata) == 0)) 360 { 361 memcpy(map->spare, bitem, map->elsize); 362 bucket->dib = 0; 363 while(1) { 364 struct bucket *prev = bucket; 365 i = (i + 1) & map->mask; 366 bucket = bucket_at(map, i); 367 if (bucket->dib <= 1) { 368 prev->dib = 0; 369 break; 370 } 371 memcpy(prev, bucket, map->bucketsz); 372 prev->dib--; 373 } 374 map->count--; 375 if (map->nbuckets > map->cap && map->count <= map->shrinkat) { 376 // Ignore the return value. It's ok for the resize operation to 377 // fail to allocate enough memory because a shrink operation 378 // does not change the integrity of the data. 379 resize(map, map->nbuckets/2); 380 } 381 return map->spare; 382 } 383 i = (i + 1) & map->mask; 384 } 385 } 386 387 // hashmap_delete removes an item from the hash map and returns it. If the 388 // item is not found then NULL is returned. 389 const void *hashmap_delete(struct hashmap *map, const void *key) { 390 return hashmap_delete_with_hash(map, key, get_hash(map, key)); 391 } 392 393 // hashmap_count returns the number of items in the hash map. 394 size_t hashmap_count(struct hashmap *map) { 395 return map->count; 396 } 397 398 // hashmap_free frees the hash map 399 // Every item is called with the element-freeing function given in hashmap_new, 400 // if present, to free any data referenced in the elements of the hashmap. 401 void hashmap_free(struct hashmap *map) { 402 if (!map) return; 403 free_elements(map); 404 map->free(map->buckets); 405 map->free(map); 406 } 407 408 // hashmap_oom returns true if the last hashmap_set() call failed due to the 409 // system being out of memory. 410 bool hashmap_oom(struct hashmap *map) { 411 return map->oom; 412 } 413 414 // hashmap_scan iterates over all items in the hash map 415 // Param `iter` can return false to stop iteration early. 416 // Returns false if the iteration has been stopped early. 417 bool hashmap_scan(struct hashmap *map, 418 bool (*iter)(const void *item, void *udata), void *udata) 419 { 420 for (size_t i = 0; i < map->nbuckets; i++) { 421 struct bucket *bucket = bucket_at(map, i); 422 if (bucket->dib && !iter(bucket_item(bucket), udata)) { 423 return false; 424 } 425 } 426 return true; 427 } 428 429 // hashmap_iter iterates one key at a time yielding a reference to an 430 // entry at each iteration. Useful to write simple loops and avoid writing 431 // dedicated callbacks and udata structures, as in hashmap_scan. 432 // 433 // map is a hash map handle. i is a pointer to a size_t cursor that 434 // should be initialized to 0 at the beginning of the loop. item is a void 435 // pointer pointer that is populated with the retrieved item. Note that this 436 // is NOT a copy of the item stored in the hash map and can be directly 437 // modified. 438 // 439 // Note that if hashmap_delete() is called on the hashmap being iterated, 440 // the buckets are rearranged and the iterator must be reset to 0, otherwise 441 // unexpected results may be returned after deletion. 442 // 443 // This function has not been tested for thread safety. 444 // 445 // The function returns true if an item was retrieved; false if the end of the 446 // iteration has been reached. 447 bool hashmap_iter(struct hashmap *map, size_t *i, void **item) { 448 struct bucket *bucket; 449 do { 450 if (*i >= map->nbuckets) return false; 451 bucket = bucket_at(map, *i); 452 (*i)++; 453 } while (!bucket->dib); 454 *item = bucket_item(bucket); 455 return true; 456 } 457 458 459 //----------------------------------------------------------------------------- 460 // SipHash reference C implementation 461 // 462 // Copyright (c) 2012-2016 Jean-Philippe Aumasson 463 // <jeanphilippe.aumasson@gmail.com> 464 // Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to> 465 // 466 // To the extent possible under law, the author(s) have dedicated all copyright 467 // and related and neighboring rights to this software to the public domain 468 // worldwide. This software is distributed without any warranty. 469 // 470 // You should have received a copy of the CC0 Public Domain Dedication along 471 // with this software. If not, see 472 // <http://creativecommons.org/publicdomain/zero/1.0/>. 473 // 474 // default: SipHash-2-4 475 //----------------------------------------------------------------------------- 476 static uint64_t SIP64(const uint8_t *in, const size_t inlen, uint64_t seed0, 477 uint64_t seed1) 478 { 479 #define U8TO64_LE(p) \ 480 { (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ 481 ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ 482 ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ 483 ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) } 484 #define U64TO8_LE(p, v) \ 485 { U32TO8_LE((p), (uint32_t)((v))); \ 486 U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); } 487 #define U32TO8_LE(p, v) \ 488 { (p)[0] = (uint8_t)((v)); \ 489 (p)[1] = (uint8_t)((v) >> 8); \ 490 (p)[2] = (uint8_t)((v) >> 16); \ 491 (p)[3] = (uint8_t)((v) >> 24); } 492 #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) 493 #define SIPROUND \ 494 { v0 += v1; v1 = ROTL(v1, 13); \ 495 v1 ^= v0; v0 = ROTL(v0, 32); \ 496 v2 += v3; v3 = ROTL(v3, 16); \ 497 v3 ^= v2; \ 498 v0 += v3; v3 = ROTL(v3, 21); \ 499 v3 ^= v0; \ 500 v2 += v1; v1 = ROTL(v1, 17); \ 501 v1 ^= v2; v2 = ROTL(v2, 32); } 502 uint64_t k0 = U8TO64_LE((uint8_t*)&seed0); 503 uint64_t k1 = U8TO64_LE((uint8_t*)&seed1); 504 uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1; 505 uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0; 506 uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1; 507 uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0; 508 const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); 509 for (; in != end; in += 8) { 510 uint64_t m = U8TO64_LE(in); 511 v3 ^= m; 512 SIPROUND; SIPROUND; 513 v0 ^= m; 514 } 515 const int left = inlen & 7; 516 uint64_t b = ((uint64_t)inlen) << 56; 517 switch (left) { 518 case 7: b |= ((uint64_t)in[6]) << 48; /* fall through */ 519 case 6: b |= ((uint64_t)in[5]) << 40; /* fall through */ 520 case 5: b |= ((uint64_t)in[4]) << 32; /* fall through */ 521 case 4: b |= ((uint64_t)in[3]) << 24; /* fall through */ 522 case 3: b |= ((uint64_t)in[2]) << 16; /* fall through */ 523 case 2: b |= ((uint64_t)in[1]) << 8; /* fall through */ 524 case 1: b |= ((uint64_t)in[0]); break; 525 case 0: break; 526 } 527 v3 ^= b; 528 SIPROUND; SIPROUND; 529 v0 ^= b; 530 v2 ^= 0xff; 531 SIPROUND; SIPROUND; SIPROUND; SIPROUND; 532 b = v0 ^ v1 ^ v2 ^ v3; 533 uint64_t out = 0; 534 U64TO8_LE((uint8_t*)&out, b); 535 return out; 536 } 537 538 //----------------------------------------------------------------------------- 539 // MurmurHash3 was written by Austin Appleby, and is placed in the public 540 // domain. The author hereby disclaims copyright to this source code. 541 // 542 // Murmur3_86_128 543 //----------------------------------------------------------------------------- 544 static uint64_t MM86128(const void *key, const int len, uint32_t seed) { 545 #define ROTL32(x, r) ((x << r) | (x >> (32 - r))) 546 #define FMIX32(h) h^=h>>16; h*=0x85ebca6b; h^=h>>13; h*=0xc2b2ae35; h^=h>>16; 547 const uint8_t * data = (const uint8_t*)key; 548 const int nblocks = len / 16; 549 uint32_t h1 = seed; 550 uint32_t h2 = seed; 551 uint32_t h3 = seed; 552 uint32_t h4 = seed; 553 uint32_t c1 = 0x239b961b; 554 uint32_t c2 = 0xab0e9789; 555 uint32_t c3 = 0x38b34ae5; 556 uint32_t c4 = 0xa1e38b93; 557 const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); 558 for (int i = -nblocks; i; i++) { 559 uint32_t k1 = blocks[i*4+0]; 560 uint32_t k2 = blocks[i*4+1]; 561 uint32_t k3 = blocks[i*4+2]; 562 uint32_t k4 = blocks[i*4+3]; 563 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 564 h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; 565 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 566 h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; 567 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 568 h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; 569 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 570 h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; 571 } 572 const uint8_t * tail = (const uint8_t*)(data + nblocks*16); 573 uint32_t k1 = 0; 574 uint32_t k2 = 0; 575 uint32_t k3 = 0; 576 uint32_t k4 = 0; 577 switch(len & 15) { 578 case 15: k4 ^= tail[14] << 16; /* fall through */ 579 case 14: k4 ^= tail[13] << 8; /* fall through */ 580 case 13: k4 ^= tail[12] << 0; 581 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; 582 /* fall through */ 583 case 12: k3 ^= tail[11] << 24; /* fall through */ 584 case 11: k3 ^= tail[10] << 16; /* fall through */ 585 case 10: k3 ^= tail[ 9] << 8; /* fall through */ 586 case 9: k3 ^= tail[ 8] << 0; 587 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; 588 /* fall through */ 589 case 8: k2 ^= tail[ 7] << 24; /* fall through */ 590 case 7: k2 ^= tail[ 6] << 16; /* fall through */ 591 case 6: k2 ^= tail[ 5] << 8; /* fall through */ 592 case 5: k2 ^= tail[ 4] << 0; 593 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; 594 /* fall through */ 595 case 4: k1 ^= tail[ 3] << 24; /* fall through */ 596 case 3: k1 ^= tail[ 2] << 16; /* fall through */ 597 case 2: k1 ^= tail[ 1] << 8; /* fall through */ 598 case 1: k1 ^= tail[ 0] << 0; 599 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 600 /* fall through */ 601 }; 602 h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; 603 h1 += h2; h1 += h3; h1 += h4; 604 h2 += h1; h3 += h1; h4 += h1; 605 FMIX32(h1); FMIX32(h2); FMIX32(h3); FMIX32(h4); 606 h1 += h2; h1 += h3; h1 += h4; 607 h2 += h1; h3 += h1; h4 += h1; 608 return (((uint64_t)h2)<<32)|h1; 609 } 610 611 //----------------------------------------------------------------------------- 612 // xxHash Library 613 // Copyright (c) 2012-2021 Yann Collet 614 // All rights reserved. 615 // 616 // BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) 617 // 618 // xxHash3 619 //----------------------------------------------------------------------------- 620 #define XXH_PRIME_1 11400714785074694791ULL 621 #define XXH_PRIME_2 14029467366897019727ULL 622 #define XXH_PRIME_3 1609587929392839161ULL 623 #define XXH_PRIME_4 9650029242287828579ULL 624 #define XXH_PRIME_5 2870177450012600261ULL 625 626 static uint64_t XXH_read64(const void* memptr) { 627 uint64_t val; 628 memcpy(&val, memptr, sizeof(val)); 629 return val; 630 } 631 632 static uint32_t XXH_read32(const void* memptr) { 633 uint32_t val; 634 memcpy(&val, memptr, sizeof(val)); 635 return val; 636 } 637 638 static uint64_t XXH_rotl64(uint64_t x, int r) { 639 return (x << r) | (x >> (64 - r)); 640 } 641 642 static uint64_t xxh3(const void* data, size_t len, uint64_t seed) { 643 const uint8_t* p = (const uint8_t*)data; 644 const uint8_t* const end = p + len; 645 uint64_t h64; 646 647 if (len >= 32) { 648 const uint8_t* const limit = end - 32; 649 uint64_t v1 = seed + XXH_PRIME_1 + XXH_PRIME_2; 650 uint64_t v2 = seed + XXH_PRIME_2; 651 uint64_t v3 = seed + 0; 652 uint64_t v4 = seed - XXH_PRIME_1; 653 654 do { 655 v1 += XXH_read64(p) * XXH_PRIME_2; 656 v1 = XXH_rotl64(v1, 31); 657 v1 *= XXH_PRIME_1; 658 659 v2 += XXH_read64(p + 8) * XXH_PRIME_2; 660 v2 = XXH_rotl64(v2, 31); 661 v2 *= XXH_PRIME_1; 662 663 v3 += XXH_read64(p + 16) * XXH_PRIME_2; 664 v3 = XXH_rotl64(v3, 31); 665 v3 *= XXH_PRIME_1; 666 667 v4 += XXH_read64(p + 24) * XXH_PRIME_2; 668 v4 = XXH_rotl64(v4, 31); 669 v4 *= XXH_PRIME_1; 670 671 p += 32; 672 } while (p <= limit); 673 674 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + 675 XXH_rotl64(v4, 18); 676 677 v1 *= XXH_PRIME_2; 678 v1 = XXH_rotl64(v1, 31); 679 v1 *= XXH_PRIME_1; 680 h64 ^= v1; 681 h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4; 682 683 v2 *= XXH_PRIME_2; 684 v2 = XXH_rotl64(v2, 31); 685 v2 *= XXH_PRIME_1; 686 h64 ^= v2; 687 h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4; 688 689 v3 *= XXH_PRIME_2; 690 v3 = XXH_rotl64(v3, 31); 691 v3 *= XXH_PRIME_1; 692 h64 ^= v3; 693 h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4; 694 695 v4 *= XXH_PRIME_2; 696 v4 = XXH_rotl64(v4, 31); 697 v4 *= XXH_PRIME_1; 698 h64 ^= v4; 699 h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4; 700 } 701 else { 702 h64 = seed + XXH_PRIME_5; 703 } 704 705 h64 += (uint64_t)len; 706 707 while (p + 8 <= end) { 708 uint64_t k1 = XXH_read64(p); 709 k1 *= XXH_PRIME_2; 710 k1 = XXH_rotl64(k1, 31); 711 k1 *= XXH_PRIME_1; 712 h64 ^= k1; 713 h64 = XXH_rotl64(h64, 27) * XXH_PRIME_1 + XXH_PRIME_4; 714 p += 8; 715 } 716 717 if (p + 4 <= end) { 718 h64 ^= (uint64_t)(XXH_read32(p)) * XXH_PRIME_1; 719 h64 = XXH_rotl64(h64, 23) * XXH_PRIME_2 + XXH_PRIME_3; 720 p += 4; 721 } 722 723 while (p < end) { 724 h64 ^= (*p) * XXH_PRIME_5; 725 h64 = XXH_rotl64(h64, 11) * XXH_PRIME_1; 726 p++; 727 } 728 729 h64 ^= h64 >> 33; 730 h64 *= XXH_PRIME_2; 731 h64 ^= h64 >> 29; 732 h64 *= XXH_PRIME_3; 733 h64 ^= h64 >> 32; 734 735 return h64; 736 } 737 738 // hashmap_sip returns a hash value for `data` using SipHash-2-4. 739 uint64_t hashmap_sip(const void *data, size_t len, uint64_t seed0, 740 uint64_t seed1) 741 { 742 return SIP64((uint8_t*)data, len, seed0, seed1); 743 } 744 745 // hashmap_murmur returns a hash value for `data` using Murmur3_86_128. 746 uint64_t hashmap_murmur(const void *data, size_t len, uint64_t seed0, 747 uint64_t seed1) 748 { 749 (void)seed1; 750 return MM86128(data, len, seed0); 751 } 752 753 uint64_t hashmap_xxhash3(const void *data, size_t len, uint64_t seed0, 754 uint64_t seed1) 755 { 756 (void)seed1; 757 return xxh3(data, len ,seed0); 758 } 759 760 //============================================================================== 761 // TESTS AND BENCHMARKS 762 // $ cc -DHASHMAP_TEST hashmap.c && ./a.out # run tests 763 // $ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out # run benchmarks 764 //============================================================================== 765 #ifdef HASHMAP_TEST 766 767 static size_t deepcount(struct hashmap *map) { 768 size_t count = 0; 769 for (size_t i = 0; i < map->nbuckets; i++) { 770 if (bucket_at(map, i)->dib) { 771 count++; 772 } 773 } 774 return count; 775 } 776 777 #ifdef __GNUC__ 778 #pragma GCC diagnostic ignored "-Wpedantic" 779 #endif 780 #ifdef __clang__ 781 #pragma GCC diagnostic ignored "-Wunknown-warning-option" 782 #pragma GCC diagnostic ignored "-Wcompound-token-split-by-macro" 783 #pragma GCC diagnostic ignored "-Wgnu-statement-expression-from-macro-expansion" 784 #endif 785 #ifdef __GNUC__ 786 #pragma GCC diagnostic ignored "-Wunused-parameter" 787 #endif 788 789 #include <stdlib.h> 790 #include <string.h> 791 #include <time.h> 792 #include <assert.h> 793 #include <stdio.h> 794 #include "hashmap.h" 795 796 static bool rand_alloc_fail = false; 797 static int rand_alloc_fail_odds = 3; // 1 in 3 chance malloc will fail. 798 static uintptr_t total_allocs = 0; 799 static uintptr_t total_mem = 0; 800 801 static void *xmalloc(size_t size) { 802 if (rand_alloc_fail && rand()%rand_alloc_fail_odds == 0) { 803 return NULL; 804 } 805 void *mem = malloc(sizeof(uintptr_t)+size); 806 assert(mem); 807 *(uintptr_t*)mem = size; 808 total_allocs++; 809 total_mem += size; 810 return (char*)mem+sizeof(uintptr_t); 811 } 812 813 static void xfree(void *ptr) { 814 if (ptr) { 815 total_mem -= *(uintptr_t*)((char*)ptr-sizeof(uintptr_t)); 816 free((char*)ptr-sizeof(uintptr_t)); 817 total_allocs--; 818 } 819 } 820 821 static void shuffle(void *array, size_t numels, size_t elsize) { 822 char tmp[elsize]; 823 char *arr = array; 824 for (size_t i = 0; i < numels - 1; i++) { 825 int j = i + rand() / (RAND_MAX / (numels - i) + 1); 826 memcpy(tmp, arr + j * elsize, elsize); 827 memcpy(arr + j * elsize, arr + i * elsize, elsize); 828 memcpy(arr + i * elsize, tmp, elsize); 829 } 830 } 831 832 static bool iter_ints(const void *item, void *udata) { 833 int *vals = *(int**)udata; 834 vals[*(int*)item] = 1; 835 return true; 836 } 837 838 static int compare_ints_udata(const void *a, const void *b, void *udata) { 839 return *(int*)a - *(int*)b; 840 } 841 842 static int compare_strs(const void *a, const void *b, void *udata) { 843 return strcmp(*(char**)a, *(char**)b); 844 } 845 846 static uint64_t hash_int(const void *item, uint64_t seed0, uint64_t seed1) { 847 return hashmap_xxhash3(item, sizeof(int), seed0, seed1); 848 // return hashmap_sip(item, sizeof(int), seed0, seed1); 849 // return hashmap_murmur(item, sizeof(int), seed0, seed1); 850 } 851 852 static uint64_t hash_str(const void *item, uint64_t seed0, uint64_t seed1) { 853 return hashmap_xxhash3(*(char**)item, strlen(*(char**)item), seed0, seed1); 854 // return hashmap_sip(*(char**)item, strlen(*(char**)item), seed0, seed1); 855 // return hashmap_murmur(*(char**)item, strlen(*(char**)item), seed0, seed1); 856 } 857 858 static void free_str(void *item) { 859 xfree(*(char**)item); 860 } 861 862 static void all(void) { 863 int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL); 864 int N = getenv("N")?atoi(getenv("N")):2000; 865 printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int)); 866 srand(seed); 867 868 rand_alloc_fail = true; 869 870 // test sip and murmur hashes 871 assert(hashmap_sip("hello", 5, 1, 2) == 2957200328589801622); 872 assert(hashmap_murmur("hello", 5, 1, 2) == 1682575153221130884); 873 assert(hashmap_xxhash3("hello", 5, 1, 2) == 2584346877953614258); 874 875 int *vals; 876 while (!(vals = xmalloc(N * sizeof(int)))) {} 877 for (int i = 0; i < N; i++) { 878 vals[i] = i; 879 } 880 881 struct hashmap *map; 882 883 while (!(map = hashmap_new(sizeof(int), 0, seed, seed, 884 hash_int, compare_ints_udata, NULL, NULL))) {} 885 shuffle(vals, N, sizeof(int)); 886 for (int i = 0; i < N; i++) { 887 // // printf("== %d ==\n", vals[i]); 888 assert(map->count == (size_t)i); 889 assert(map->count == hashmap_count(map)); 890 assert(map->count == deepcount(map)); 891 const int *v; 892 assert(!hashmap_get(map, &vals[i])); 893 assert(!hashmap_delete(map, &vals[i])); 894 while (true) { 895 assert(!hashmap_set(map, &vals[i])); 896 if (!hashmap_oom(map)) { 897 break; 898 } 899 } 900 901 for (int j = 0; j < i; j++) { 902 v = hashmap_get(map, &vals[j]); 903 assert(v && *v == vals[j]); 904 } 905 while (true) { 906 v = hashmap_set(map, &vals[i]); 907 if (!v) { 908 assert(hashmap_oom(map)); 909 continue; 910 } else { 911 assert(!hashmap_oom(map)); 912 assert(v && *v == vals[i]); 913 break; 914 } 915 } 916 v = hashmap_get(map, &vals[i]); 917 assert(v && *v == vals[i]); 918 v = hashmap_delete(map, &vals[i]); 919 assert(v && *v == vals[i]); 920 assert(!hashmap_get(map, &vals[i])); 921 assert(!hashmap_delete(map, &vals[i])); 922 assert(!hashmap_set(map, &vals[i])); 923 assert(map->count == (size_t)(i+1)); 924 assert(map->count == hashmap_count(map)); 925 assert(map->count == deepcount(map)); 926 } 927 928 int *vals2; 929 while (!(vals2 = xmalloc(N * sizeof(int)))) {} 930 memset(vals2, 0, N * sizeof(int)); 931 assert(hashmap_scan(map, iter_ints, &vals2)); 932 933 // Test hashmap_iter. This does the same as hashmap_scan above. 934 size_t iter = 0; 935 void *iter_val; 936 while (hashmap_iter (map, &iter, &iter_val)) { 937 assert (iter_ints(iter_val, &vals2)); 938 } 939 for (int i = 0; i < N; i++) { 940 assert(vals2[i] == 1); 941 } 942 xfree(vals2); 943 944 shuffle(vals, N, sizeof(int)); 945 for (int i = 0; i < N; i++) { 946 const int *v; 947 v = hashmap_delete(map, &vals[i]); 948 assert(v && *v == vals[i]); 949 assert(!hashmap_get(map, &vals[i])); 950 assert(map->count == (size_t)(N-i-1)); 951 assert(map->count == hashmap_count(map)); 952 assert(map->count == deepcount(map)); 953 for (int j = N-1; j > i; j--) { 954 v = hashmap_get(map, &vals[j]); 955 assert(v && *v == vals[j]); 956 } 957 } 958 959 for (int i = 0; i < N; i++) { 960 while (true) { 961 assert(!hashmap_set(map, &vals[i])); 962 if (!hashmap_oom(map)) { 963 break; 964 } 965 } 966 } 967 968 assert(map->count != 0); 969 size_t prev_cap = map->cap; 970 hashmap_clear(map, true); 971 assert(prev_cap < map->cap); 972 assert(map->count == 0); 973 974 975 for (int i = 0; i < N; i++) { 976 while (true) { 977 assert(!hashmap_set(map, &vals[i])); 978 if (!hashmap_oom(map)) { 979 break; 980 } 981 } 982 } 983 984 prev_cap = map->cap; 985 hashmap_clear(map, false); 986 assert(prev_cap == map->cap); 987 988 hashmap_free(map); 989 990 xfree(vals); 991 992 993 while (!(map = hashmap_new(sizeof(char*), 0, seed, seed, 994 hash_str, compare_strs, free_str, NULL))); 995 996 for (int i = 0; i < N; i++) { 997 char *str; 998 while (!(str = xmalloc(16))); 999 snprintf(str, 16, "s%i", i); 1000 while(!hashmap_set(map, &str)); 1001 } 1002 1003 hashmap_clear(map, false); 1004 assert(hashmap_count(map) == 0); 1005 1006 for (int i = 0; i < N; i++) { 1007 char *str; 1008 while (!(str = xmalloc(16))); 1009 snprintf(str, 16, "s%i", i); 1010 while(!hashmap_set(map, &str)); 1011 } 1012 1013 hashmap_free(map); 1014 1015 if (total_allocs != 0) { 1016 fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs); 1017 exit(1); 1018 } 1019 } 1020 1021 #define bench(name, N, code) {{ \ 1022 if (strlen(name) > 0) { \ 1023 printf("%-14s ", name); \ 1024 } \ 1025 size_t tmem = total_mem; \ 1026 size_t tallocs = total_allocs; \ 1027 uint64_t bytes = 0; \ 1028 clock_t begin = clock(); \ 1029 for (int i = 0; i < N; i++) { \ 1030 (code); \ 1031 } \ 1032 clock_t end = clock(); \ 1033 double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC; \ 1034 double bytes_sec = (double)bytes/elapsed_secs; \ 1035 printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", \ 1036 N, elapsed_secs, \ 1037 elapsed_secs/(double)N*1e9, \ 1038 (double)N/elapsed_secs \ 1039 ); \ 1040 if (bytes > 0) { \ 1041 printf(", %.1f GB/sec", bytes_sec/1024/1024/1024); \ 1042 } \ 1043 if (total_mem > tmem) { \ 1044 size_t used_mem = total_mem-tmem; \ 1045 printf(", %.2f bytes/op", (double)used_mem/N); \ 1046 } \ 1047 if (total_allocs > tallocs) { \ 1048 size_t used_allocs = total_allocs-tallocs; \ 1049 printf(", %.2f allocs/op", (double)used_allocs/N); \ 1050 } \ 1051 printf("\n"); \ 1052 }} 1053 1054 static void benchmarks(void) { 1055 int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL); 1056 int N = getenv("N")?atoi(getenv("N")):5000000; 1057 printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int)); 1058 srand(seed); 1059 1060 1061 int *vals = xmalloc(N * sizeof(int)); 1062 for (int i = 0; i < N; i++) { 1063 vals[i] = i; 1064 } 1065 1066 shuffle(vals, N, sizeof(int)); 1067 1068 struct hashmap *map; 1069 shuffle(vals, N, sizeof(int)); 1070 1071 map = hashmap_new(sizeof(int), 0, seed, seed, hash_int, compare_ints_udata, 1072 NULL, NULL); 1073 bench("set", N, { 1074 const int *v = hashmap_set(map, &vals[i]); 1075 assert(!v); 1076 }) 1077 shuffle(vals, N, sizeof(int)); 1078 bench("get", N, { 1079 const int *v = hashmap_get(map, &vals[i]); 1080 assert(v && *v == vals[i]); 1081 }) 1082 shuffle(vals, N, sizeof(int)); 1083 bench("delete", N, { 1084 const int *v = hashmap_delete(map, &vals[i]); 1085 assert(v && *v == vals[i]); 1086 }) 1087 hashmap_free(map); 1088 1089 map = hashmap_new(sizeof(int), N, seed, seed, hash_int, compare_ints_udata, 1090 NULL, NULL); 1091 bench("set (cap)", N, { 1092 const int *v = hashmap_set(map, &vals[i]); 1093 assert(!v); 1094 }) 1095 shuffle(vals, N, sizeof(int)); 1096 bench("get (cap)", N, { 1097 const int *v = hashmap_get(map, &vals[i]); 1098 assert(v && *v == vals[i]); 1099 }) 1100 shuffle(vals, N, sizeof(int)); 1101 bench("delete (cap)" , N, { 1102 const int *v = hashmap_delete(map, &vals[i]); 1103 assert(v && *v == vals[i]); 1104 }) 1105 1106 hashmap_free(map); 1107 1108 1109 xfree(vals); 1110 1111 if (total_allocs != 0) { 1112 fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs); 1113 exit(1); 1114 } 1115 } 1116 1117 int main(void) { 1118 hashmap_set_allocator(xmalloc, xfree); 1119 1120 if (getenv("BENCH")) { 1121 printf("Running hashmap.c benchmarks...\n"); 1122 benchmarks(); 1123 } else { 1124 printf("Running hashmap.c tests...\n"); 1125 all(); 1126 printf("PASSED\n"); 1127 } 1128 } 1129 1130 1131 #endif 1132 1133 1134