/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996, 1997, 1998, 1999 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint static const char sccsid[] = "@(#)lock_deadlock.c 11.7 (Sleepycat) 10/19/99"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include #include #include #endif #include "db_int.h" #include "db_shash.h" #include "lock.h" #include "txn.h" #define ISSET_MAP(M, N) ((M)[(N) / 32] & (1 << (N) % 32)) #define CLEAR_MAP(M, N) { \ u_int32_t __i; \ for (__i = 0; __i < (N); __i++) \ (M)[__i] = 0; \ } #define SET_MAP(M, B) ((M)[(B) / 32] |= (1 << ((B) % 32))) #define CLR_MAP(M, B) ((M)[(B) / 32] &= ~(1 << ((B) % 32))) #define OR_MAP(D, S, N) { \ u_int32_t __i; \ for (__i = 0; __i < (N); __i++) \ D[__i] |= S[__i]; \ } #define BAD_KILLID 0xffffffff typedef struct { int valid; u_int32_t id; u_int32_t last_lock; u_int32_t last_locker_id; db_pgno_t pgno; } locker_info; static int CDB___dd_abort __P((DB_ENV *, locker_info *)); static int CDB___dd_build __P((DB_ENV *, u_int32_t **, u_int32_t *, locker_info **)); static int CDB___dd_find __P((u_int32_t *, locker_info *, u_int32_t, u_int32_t ***)); #ifdef DIAGNOSTIC static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t)); #endif int CDB_lock_detect(dbenv, flags, atype, abortp) DB_ENV *dbenv; u_int32_t flags, atype; int *abortp; { DB_LOCKREGION *region; DB_LOCKTAB *lt; locker_info *idmap; u_int32_t *bitmap, **deadp, **free_me, i, killid, nentries, nlockers; int do_pass, ret; PANIC_CHECK(dbenv); ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); lt = dbenv->lk_handle; if (abortp != NULL) *abortp = 0; /* Validate arguments. */ if ((ret = CDB___db_fchk(dbenv, "CDB_lock_detect", flags, DB_LOCK_CONFLICT)) != 0) return (ret); /* Check if a detector run is necessary. */ LOCKREGION(dbenv, lt); if (LF_ISSET(DB_LOCK_CONFLICT)) { /* Make a pass every time a lock waits. */ MEMORY_LOCK(lt); region = lt->reginfo.primary; do_pass = region->need_dd != 0; MEMORY_UNLOCK(lt); if (!do_pass) { UNLOCKREGION(dbenv, lt); return (0); } } /* Build the waits-for bitmap. */ ret = CDB___dd_build(dbenv, &bitmap, &nlockers, &idmap); UNLOCKREGION(dbenv, lt); if (ret != 0) return (ret); if (nlockers == 0) return (0); #ifdef DIAGNOSTIC if (FLD_ISSET(dbenv->verbose, DB_VERB_WAITSFOR)) __dd_debug(dbenv, idmap, bitmap, nlockers); #endif /* Find a deadlock. */ if ((ret = CDB___dd_find(bitmap, idmap, nlockers, &deadp)) != 0) return (ret); nentries = ALIGN(nlockers, 32) / 32; killid = BAD_KILLID; free_me = deadp; for (; *deadp != NULL; deadp++) { if (abortp != NULL) ++*abortp; switch (atype) { /* Kill someone. */ case DB_LOCK_OLDEST: /* * Find the first bit set in the current * array and then look for a lower tid in * the array. */ for (i = 0; i < nlockers; i++) if (ISSET_MAP(*deadp, i)) { killid = i; break; } /* * It's conceivable that under XA, the locker could * have gone away. */ if (killid == BAD_KILLID) break; /* * The oldest transaction has the lowest * transaction id. */ for (i = killid + 1; i < nlockers; i++) if (ISSET_MAP(*deadp, i) && idmap[i].id < idmap[killid].id) killid = i; break; case DB_LOCK_DEFAULT: case DB_LOCK_RANDOM: /* * We are trying to calculate the id of the * locker whose entry is indicated by deadlock. */ killid = (*deadp - bitmap) / nentries; break; case DB_LOCK_YOUNGEST: /* * Find the first bit set in the current * array and then look for a lower tid in * the array. */ for (i = 0; i < nlockers; i++) if (ISSET_MAP(*deadp, i)) { killid = i; break; } /* * It's conceivable that under XA, the locker could * have gone away. */ if (killid == BAD_KILLID) break; /* * The youngest transaction has the highest * transaction id. */ for (i = killid + 1; i < nlockers; i++) if (ISSET_MAP(*deadp, i) && idmap[i].id > idmap[killid].id) killid = i; break; default: killid = BAD_KILLID; ret = EINVAL; } if (killid == BAD_KILLID) continue; /* Kill the locker with lockid idmap[killid]. */ if ((ret = CDB___dd_abort(dbenv, &idmap[killid])) != 0) { /* * It's possible that the lock was already aborted; * this isn't necessarily a problem, so do not treat * it as an error. */ if (ret == EINVAL) ret = 0; else CDB___db_err(dbenv, "warning: unable to abort locker %lx", (u_long)idmap[killid].id); } else if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK)) CDB___db_err(dbenv, "Aborting locker %lx", (u_long)idmap[killid].id); } CDB___os_free(free_me, 0); CDB___os_free(bitmap, 0); CDB___os_free(idmap, 0); return (ret); } /* * ======================================================================== * Utilities */ # define DD_INVALID_ID ((u_int32_t) -1) static int CDB___dd_build(dbenv, bmp, nlockers, idmap) DB_ENV *dbenv; u_int32_t **bmp, *nlockers; locker_info **idmap; { struct __db_lock *lp; DB_LOCKER *lip, *lockerp, *child; DB_LOCKOBJ *op, *lo; DB_LOCKREGION *region; DB_LOCKTAB *lt; locker_info *id_array; u_int32_t *bitmap, count, dd, *entryp, i, id, ndx, nentries, *tmpmap; u_int8_t *pptr; int is_first, ret; lt = dbenv->lk_handle; region = lt->reginfo.primary; /* * We'll check how many lockers there are, add a few more in for * good measure and then allocate all the structures. Then we'll * verify that we have enough room when we go back in and get the * mutex the second time. */ MEMORY_LOCK(lt); retry: count = region->nlockers; region->need_dd = 0; MEMORY_UNLOCK(lt); if (count == 0) { *nlockers = 0; return (0); } if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK)) CDB___db_err(dbenv, "%lu lockers", (u_long)count); count += 40; nentries = ALIGN(count, 32) / 32; /* * Allocate enough space for a count by count bitmap matrix. * * XXX * We can probably save the malloc's between iterations just * reallocing if necessary because count grew by too much. */ if ((ret = CDB___os_calloc((size_t)count, sizeof(u_int32_t) * nentries, &bitmap)) != 0) return (ret); if ((ret = CDB___os_calloc(sizeof(u_int32_t), nentries, &tmpmap)) != 0) { CDB___os_free(bitmap, sizeof(u_int32_t) * nentries); return (ret); } if ((ret = CDB___os_calloc((size_t)count, sizeof(locker_info), &id_array)) != 0) { CDB___os_free(bitmap, count * sizeof(u_int32_t) * nentries); CDB___os_free(tmpmap, sizeof(u_int32_t) * nentries); return (ret); } /* * Now go back in and actually fill in the matrix. */ MEMORY_LOCK(lt); if (region->nlockers > count) { CDB___os_free(bitmap, count * sizeof(u_int32_t) * nentries); CDB___os_free(tmpmap, sizeof(u_int32_t) * nentries); CDB___os_free(id_array, count * sizeof(locker_info)); goto retry; } MEMORY_UNLOCK(lt); /* * First we go through and assign each locker a deadlock detector id. */ for (id = 0, i = 0; i < region->table_size; i++) { LOCKER_LOCK_NDX(lt, i); for (lip = SH_TAILQ_FIRST(<->locker_tab[i], __db_locker); lip != NULL; lip = SH_TAILQ_NEXT(lip, links, __db_locker)) if (lip->master_locker == INVALID_ROFF) { lip->dd_id = id++; id_array[lip->dd_id].id = lip->id; } else lip->dd_id = DD_INVALID_ID; LOCKER_UNLOCK(lt, i); } /* * We go through the hash table and find each object. For each object, * we traverse the waiters list and add an entry in the waitsfor matrix * for each waiter/holder combination. We acquire the hash bucket * locks as we go and then release them all at the end. */ for (i = 0; i < region->table_size; i++) { OBJECT_LOCK_NDX(lt, i); for (op = SH_TAILQ_FIRST(<->obj_tab[i], __db_lockobj); op != NULL; op = SH_TAILQ_NEXT(op, links, __db_lockobj)) { CLEAR_MAP(tmpmap, nentries); /* * First we go through and create a bit map that * represents all the holders of this object. */ for (lp = SH_TAILQ_FIRST(&op->holders, __db_lock); lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { LOCKER_LOCK(lt, region, lp->holder, ndx); if ((ret = CDB___lock_getlocker(lt, lp->holder, ndx, 0, &lockerp)) != 0) { LOCKER_UNLOCK(lt, ndx); continue; } if (lockerp->dd_id == DD_INVALID_ID) dd = ((DB_LOCKER *) R_ADDR(<->reginfo, lockerp->master_locker))->dd_id; else dd = lockerp->dd_id; id_array[dd].valid = 1; /* * If the holder has already been aborted, then * we should ignore it for now. */ if (lp->status == DB_LSTAT_HELD) SET_MAP(tmpmap, dd); LOCKER_UNLOCK(lt, ndx); } /* * Next, for each waiter, we set its row in the matrix * equal to the map of holders we set up above. */ for (is_first = 1, lp = SH_TAILQ_FIRST(&op->waiters, __db_lock); lp != NULL; is_first = 0, lp = SH_TAILQ_NEXT(lp, links, __db_lock)) { LOCKER_LOCK(lt, region, lp->holder, ndx); if ((ret = CDB___lock_getlocker(lt, lp->holder, ndx, 0, &lockerp)) != 0) { LOCKER_UNLOCK(lt, ndx); continue; } if (lockerp->dd_id == DD_INVALID_ID) dd = ((DB_LOCKER *) R_ADDR(<->reginfo, lockerp->master_locker))->dd_id; else dd = lockerp->dd_id; id_array[dd].valid = 1; LOCKER_UNLOCK(lt, ndx); /* * If the transaction is pending abortion, then * ignore it on this iteration. */ if (lp->status != DB_LSTAT_WAITING) continue; entryp = bitmap + (nentries * dd); OR_MAP(entryp, tmpmap, nentries); /* * If this is the first waiter on the queue, * then we remove the waitsfor relationship * with oneself. However, if it's anywhere * else on the queue, then we have to keep * it and we have an automatic deadlock. */ if (is_first) CLR_MAP(entryp, dd); } } } /* * We now have a snapshot of the entire lock table. Release the * object mutexes. */ for (i = 0; i < region->table_size; i++) OBJECT_UNLOCK(lt, i); /* Now for each locker; record its last lock. */ for (id = 0; id < count; id++) { if (!id_array[id].valid) continue; LOCKER_LOCK(lt, region, id_array[id].id, ndx); if ((ret = CDB___lock_getlocker(lt, id_array[id].id, ndx, 0, &lockerp)) != 0) { CDB___db_err(dbenv, "No locks for locker %lu", (u_long)id_array[id].id); LOCKER_UNLOCK(lt, ndx); continue; } /* * If this is a master transaction, try to * find one of its children's locks first, * as they are probably more recent. */ child = SH_LIST_FIRST(&lockerp->child_locker, __db_locker); if (child != NULL) { do { lp = SH_LIST_FIRST(&child->heldby, __db_lock); if (lp != NULL && lp->status == DB_LSTAT_WAITING) { id_array[id].last_locker_id = child->id; goto get_lock; } child = SH_LIST_NEXT( child, child_link, __db_locker); } while (child != NULL); } lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock); if (lp != NULL) { id_array[id].last_locker_id = lockerp->id; get_lock: id_array[id].last_lock = R_OFFSET(<->reginfo, lp); lo = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); pptr = SH_DBT_PTR(&lo->lockobj); if (lo->lockobj.size >= sizeof(db_pgno_t)) memcpy(&id_array[id].pgno, pptr, sizeof(db_pgno_t)); else id_array[id].pgno = 0; } LOCKER_UNLOCK(lt, ndx); } /* Pass complete, reset the deadlock detector bit. */ MEMORY_LOCK(lt); region->need_dd = 0; MEMORY_UNLOCK(lt); /* * Now we can release everything except the bitmap matrix that we * created. */ *nlockers = id; *idmap = id_array; *bmp = bitmap; CDB___os_free(tmpmap, sizeof(u_int32_t) * nentries); return (0); } static int CDB___dd_find(bmp, idmap, nlockers, deadp) u_int32_t *bmp, nlockers; locker_info *idmap; u_int32_t ***deadp; { u_int32_t i, j, k, nentries, *mymap, *tmpmap; u_int32_t **retp; int ndead, ndeadalloc, ret; #undef INITIAL_DEAD_ALLOC #define INITIAL_DEAD_ALLOC 8 ndeadalloc = INITIAL_DEAD_ALLOC; ndead = 0; if ((ret = CDB___os_malloc(ndeadalloc * sizeof(u_int32_t *), NULL, &retp)) != 0) return (ret); /* * For each locker, OR in the bits from the lockers on which that * locker is waiting. */ nentries = ALIGN(nlockers, 32) / 32; for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) { if (!idmap[i].valid) continue; for (j = 0; j < nlockers; j++) { if (!ISSET_MAP(mymap, j)) continue; /* Find the map for this bit. */ tmpmap = bmp + (nentries * j); OR_MAP(mymap, tmpmap, nentries); if (!ISSET_MAP(mymap, i)) continue; /* Make sure we leave room for NULL. */ if (ndead + 2 >= ndeadalloc) { ndeadalloc <<= 1; /* * If the alloc fails, then simply return the * deadlocks that we already have. */ if (CDB___os_realloc(ndeadalloc * sizeof(u_int32_t), NULL, &retp) != 0) { retp[ndead] = NULL; *deadp = retp; return (0); } } retp[ndead++] = mymap; /* Mark all participants in this deadlock invalid. */ for (k = 0; k < nlockers; k++) if (ISSET_MAP(mymap, k)) idmap[k].valid = 0; break; } } retp[ndead] = NULL; *deadp = retp; return (0); } static int CDB___dd_abort(dbenv, info) DB_ENV *dbenv; locker_info *info; { struct __db_lock *lockp; DB_LOCKER *lockerp; DB_LOCKOBJ *sh_obj; DB_LOCKREGION *region; DB_LOCKTAB *lt; u_int32_t ndx; int ret; lt = dbenv->lk_handle; region = lt->reginfo.primary; LOCKREGION(dbenv, lt); /* Find the locker's last lock. */ LOCKER_LOCK(lt, region, info->last_locker_id, ndx); if ((ret = CDB___lock_getlocker(lt, info->last_locker_id, ndx, 0, &lockerp)) != 0 || lockerp == NULL) { if (ret == 0) ret = EINVAL; goto out; } lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock); /* * It's possible that this locker was already aborted. If that's * the case, make sure that we remove its locker from the hash table. */ if (lockp == NULL) { if (LOCKER_FREEABLE(lockerp)) { CDB___lock_freelocker(lt, region, lockerp, ndx); goto out; } } else if (R_OFFSET(<->reginfo, lockp) != info->last_lock || lockp->status != DB_LSTAT_WAITING) { ret = EINVAL; goto out; } sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); SH_LIST_REMOVE(lockp, locker_links, __db_lock); LOCKER_UNLOCK(lt, ndx); /* Abort lock, take it off list, and wake up this lock. */ SHOBJECT_LOCK(lt, region, sh_obj, ndx); lockp->status = DB_LSTAT_ABORTED; SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock); ret = CDB___lock_promote(lt, sh_obj); OBJECT_UNLOCK(lt, ndx); MUTEX_UNLOCK(&lockp->mutex); MEMORY_LOCK(lt); region->ndeadlocks++; MEMORY_UNLOCK(lt); UNLOCKREGION(dbenv, lt); return (0); out: UNLOCKREGION(dbenv, lt); LOCKER_UNLOCK(lt, ndx); return (ret); } #ifdef DIAGNOSTIC static void __dd_debug(dbenv, idmap, bitmap, nlockers) DB_ENV *dbenv; locker_info *idmap; u_int32_t *bitmap, nlockers; { u_int32_t i, j, *mymap, nentries; int ret; char *msgbuf; CDB___db_err(dbenv, "Waitsfor array\nWaiter:\tWaiting on:"); /* Allocate space to print 10 bytes per item waited on. */ #undef MSGBUF_LEN #define MSGBUF_LEN ((nlockers + 1) * 10 + 64) if ((ret = CDB___os_malloc(MSGBUF_LEN, NULL, &msgbuf)) != 0) return; nentries = ALIGN(nlockers, 32) / 32; for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) { if (!idmap[i].valid) continue; sprintf(msgbuf, /* Waiter. */ "%lx/%lu:\t", (u_long)idmap[i].id, (u_long)idmap[i].pgno); for (j = 0; j < nlockers; j++) if (ISSET_MAP(mymap, j)) sprintf(msgbuf, "%s %lx", msgbuf, (u_long)idmap[j].id); (void)sprintf(msgbuf, "%s %lu", msgbuf, (u_long)idmap[i].last_lock); CDB___db_err(dbenv, msgbuf); } CDB___os_free(msgbuf, MSGBUF_LEN); } #endif