1 files changed, 0 insertions, 1153 deletions
diff --git a/externals/mysql/mysys/waiting_threads.c b/externals/mysql/mysys/waiting_threads.c
deleted file mode 100644
index 46b3c1d4910..00000000000
--- a/externals/mysql/mysys/waiting_threads.c
+++ /dev/null
@@ -1,1153 +0,0 @@
-/* Copyright (C) 2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; version 2 of the License.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
-
-/**
-  @file
-
-  "waiting threads" subsystem - a unified interface for threads to wait
-  on each other, with built-in deadlock detection.
-
-  Main concepts
-  ^^^^^^^^^^^^^
-  a thread - is represented by a WT_THD structure. One physical thread
-    can have only one WT_THD descriptor at any given moment.
-
-  a resource - a thread does not wait for other threads directly,
-    instead it waits for a "resource", which is "owned" by other threads.
-    It waits, exactly, for all "owners" to "release" a resource.
-    It does not have to correspond to a physical resource. For example, it
-    may be convenient in certain cases to force resource == thread.
-    A resource is represented by a WT_RESOURCE structure. 
-
-  a resource identifier - a pair of {resource type, value}. A value is
-    an ulonglong number. Represented by a WT_RESOURCE_ID structure.
-
-  a resource type - a pointer to a statically defined instance of
-    WT_RESOURCE_TYPE structure. This structure contains a pointer to
-    a function that knows how to compare values of this resource type.
-    In the simple case it could be wt_resource_id_memcmp().
-
-  a wait-for graph - a graph, that represenst "wait-for" relationships.
-    It has two types of nodes - threads and resources. There are directed
-    edges from a thread to a resource it is waiting for (WT_THD::waiting_for),
-    from a thread to resources that it "owns" (WT_THD::my_resources),
-    and from a resource to threads that "own" it (WT_RESOURCE::owners)
-
-  Graph completeness
-  ^^^^^^^^^^^^^^^^^^
-
-  For flawless deadlock detection wait-for graph must be complete.
-  It means that when a thread starts waiting it needs to know *all* its
-  blockers, and call wt_thd_will_wait_for() for every one of them.
-  Otherwise two phenomena should be expected:
-
-  1. Fuzzy timeouts:
-
-    thread A needs to get a lock, and is blocked by a thread B.
-    it waits.
-    Just before the timeout thread B releases the lock.
-    thread A is ready to grab the lock but discovers that it is also
-    blocked by a thread C.
-    It waits and times out.
-
-    As a result thread A has waited two timeout intervals, instead of one.
-
-  2. Unreliable cycle detection:
-
-     Thread A waits for threads B and C
-     Thread C waits for D
-     Thread D wants to start waiting for A
-
-     one can see immediately that thread D creates a cycle, and thus
-     a deadlock is detected.
-
-     But if thread A would only wait for B, and start waiting for C
-     when B would unlock, thread D would be allowed to wait, a deadlock
-     would be only detected when B unlocks or somebody times out.
-
-  These two phenomena don't affect a correctness, and strictly speaking,
-  the caller is not required to call wt_thd_will_wait_for() for *all*
-  blockers - it may optimize wt_thd_will_wait_for() calls. But they
-  may be perceived as bugs by users, it must be understood that such
-  an optimization comes with its price.
-
-  Usage
-  ^^^^^
-
-  First, the wt* subsystem must be initialized by calling
-  wt_init(). In the server you don't need to do it, it's done
-  in mysqld.cc.
-
-  Similarly, wt_end() frees wt* structures, should be called
-  at the end, but in the server mysqld.cc takes care of that.
-
-  Every WT_THD should be initialized with wt_thd_lazy_init().
-  After that they can be used in other wt_thd_* calls.
-  Before discarding, WT_THD should be free'd with
-  wt_thd_destroy(). In the server both are handled in sql_class.cc,
-  it's an error to try to do it manually.
-
-  To use the deadlock detection one needs to use this thread's WT_THD,
-  call wt_thd_will_wait_for() for every thread it needs to wait on,
-  then call wt_thd_cond_timedwait(). When thread releases a resource
-  it should call wt_thd_release() (or wt_thd_release_all()) - it will
-  notify (send a signal) threads waiting in wt_thd_cond_timedwait(),
-  if appropriate.
-
-  Just like with pthread's cond_wait, there could be spurious
-  wake-ups from wt_thd_cond_timedwait(). A caller is expected to
-  handle that (that is, to re-check the blocking criteria).
-
-  wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either
-  WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return
-  WT_TIMEOUT. Out of memory and other fatal errors are reported as
-  WT_DEADLOCK - and a transaction must be aborted just the same.
-
-  Configuration
-  ^^^^^^^^^^^^^
-  There are four config variables. Two deadlock search depths - short and
-  long - and two timeouts. Deadlock search is performed with the short
-  depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait()
-  waits with a short timeout, performs a deadlock search with the long
-  depth, and waits with a long timeout. As most deadlock cycles are supposed
-  to be short, most deadlocks will be detected at once, and waits will
-  rarely be necessary.
-
-  These config variables are thread-local. Different threads may have
-  different search depth and timeout values.
-
-  Also, deadlock detector supports different killing strategies, the victim
-  in a deadlock cycle is selected based on the "weight". See "weight"
-  description in waiting_threads.h for details. It's up to the caller to
-  set weights accordingly.
-
-  Status
-  ^^^^^^
-  We calculate the number of successfull waits (WT_OK returned from
-  wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle
-  length distribution - number of deadlocks with every length from
-  1 to WT_CYCLE_STATS, and a wait time distribution - number
-  of waits with a time from 1 us to 1 min in WT_WAIT_STATS
-  intervals on a log e scale.
-*/
-
-/*
-  Note that if your lock system satisfy the following condition:
-
-    there exist four lock levels A, B, C, D, such as
-      A is compatible with B
-      A is not compatible with C
-      D is not compatible with B
-
-      (example A=IX, B=IS, C=S, D=X)
-
-   you need to include lock level in the resource identifier - a
-   thread waiting for lock of the type A on resource R and another
-   thread waiting for lock of the type B on resource R should wait on
-   different WT_RESOURCE structures, on different {lock, resource}
-   pairs.  Otherwise the following is possible:
-
-      thread1> take S-lock on R
-      thread2> take IS-lock on R
-      thread3> wants X-lock on R, starts waiting for threads 1 and 2 on R.
-      thread3 is killed (or timeout or whatever)
-      WT_RESOURCE structure for R is still in the hash, as it has two owners
-      thread4> wants an IX-lock on R
-      WT_RESOURCE for R is found in the hash, thread4 starts waiting on it.
-      !! now thread4 is waiting for both thread1 and thread2
-      !! while, in fact, IX-lock and IS-lock are compatible and
-      !! thread4 should not wait for thread2.
-*/
-
-#include <waiting_threads.h>
-#include <m_string.h>
-
-/* status variables */
-
-/**
-  preset table of wait intervals
-*/
-ulonglong wt_wait_table[WT_WAIT_STATS];
-/**
-  wait time distribution (log e scale)
-*/
-uint32 wt_wait_stats[WT_WAIT_STATS+1];
-/**
-  distribution of cycle lengths
-  first column tells whether this was during short or long detection
-*/
-uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1];
-uint32 wt_success_stats;
-
-static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock;
-
-#ifdef SAFE_STATISTICS
-#define incr(VAR, LOCK)                           \
-  do {                                            \
-    my_atomic_rwlock_wrlock(&(LOCK));             \
-    my_atomic_add32(&(VAR), 1);                   \
-    my_atomic_rwlock_wrunlock(&(LOCK));           \
-  } while(0)
-#else
-#define incr(VAR,LOCK)  do { (VAR)++; } while(0)
-#endif
-
-static void increment_success_stats()
-{
-  incr(wt_success_stats, success_stats_lock);
-}
-
-static void increment_cycle_stats(uint depth, uint slot)
-{
-  if (depth >= WT_CYCLE_STATS)
-    depth= WT_CYCLE_STATS;
-  incr(wt_cycle_stats[slot][depth], cycle_stats_lock);
-}
-
-static void increment_wait_stats(ulonglong waited,int ret)
-{
-  uint i;
-  if ((ret) == ETIMEDOUT)
-    i= WT_WAIT_STATS;
-  else
-    for (i= 0; i < WT_WAIT_STATS && waited/10 > wt_wait_table[i]; i++) ;
-  incr(wt_wait_stats[i], wait_stats_lock);
-}
-
-/*
-  'lock' protects 'owners', 'state', and 'waiter_count'
-  'id' is read-only
-
-  a resource is picked up from a hash in a lock-free manner
-  it's returned pinned, so it cannot be freed at once
-  but it may be freed right after the pin is removed
-  to free a resource it should
-    1. have no owners
-    2. have no waiters
-
-  two ways to access a resource:
-    1. find it in a hash
-       - it's returned pinned.
-        a) take a lock in exclusive mode
-        b) check the state, it should be ACTIVE to be usable
-        c) unpin
-    2. by a direct reference
-       - could only used if a resource cannot be freed
-       e.g. accessing a resource by thd->waiting_for is safe,
-       a resource cannot be freed as there's a thread waiting for it
-*/
-struct st_wt_resource {
-  WT_RESOURCE_ID  id;
-  uint            waiter_count;
-  enum { ACTIVE, FREE } state;
-#ifndef DBUG_OFF
-  pthread_mutex_t  *cond_mutex; /* a mutex for the 'cond' below */
-#endif
-  /*
-    before the 'lock' all elements are mutable, after (and including) -
-    immutable in the sense that lf_hash_insert() won't memcpy() over them.
-    See wt_init().
-  */
-#ifdef WT_RWLOCKS_USE_MUTEXES
-  /*
-    we need a special rwlock-like 'lock' to allow readers bypass
-    waiting writers, otherwise readers can deadlock. For example:
-
-      A waits on resource x, owned by B, B waits on resource y, owned
-      by A, we have a cycle (A->x->B->y->A)
-      Both A and B start deadlock detection:
-
-        A locks x                          B locks y
-        A goes deeper                      B goes deeper
-        A locks y                          B locks x
-
-      with mutexes it would deadlock. With rwlocks it won't, as long
-      as both A and B are taking read locks (and they do).
-      But other threads may take write locks. Assume there's
-      C who wants to start waiting on x, and D who wants to start
-      waiting on y.
-
-        A read-locks x                       B read-locks y
-        A goes deeper                        B goes deeper
-     => C write-locks x (to add a new edge)  D write-locks y
-     .. C is blocked                         D is blocked
-        A read-locks y                       B read-locks x
-
-      Now, if a read lock can bypass a pending wrote lock request, we're fine.
-      If it can not, we have a deadlock.
-
-    writer starvation is technically possible, but unlikely, because
-    the contention is expected to be low.
-  */
-  struct {
-    pthread_cond_t   cond;
-    pthread_mutex_t  mutex;
-    uint readers: 16;
-    uint pending_writers: 15;
-    uint write_locked: 1;
-  } lock;
-#else
-  rw_lock_t lock;
-#endif
-  pthread_cond_t   cond; /* the corresponding mutex is provided by the caller */
-  DYNAMIC_ARRAY    owners;
-};
-
-#ifdef  WT_RWLOCKS_USE_MUTEXES
-static void rc_rwlock_init(WT_RESOURCE *rc)
-{
-  pthread_cond_init(&rc->lock.cond, 0);
-  pthread_mutex_init(&rc->lock.mutex, MY_MUTEX_INIT_FAST);
-}
-static void rc_rwlock_destroy(WT_RESOURCE *rc)
-{
-  DBUG_ASSERT(rc->lock.write_locked == 0);
-  DBUG_ASSERT(rc->lock.readers == 0);
-  pthread_cond_destroy(&rc->lock.cond);
-  pthread_mutex_destroy(&rc->lock.mutex);
-}
-static void rc_rdlock(WT_RESOURCE *rc)
-{
-  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value));
-  pthread_mutex_lock(&rc->lock.mutex);
-  while (rc->lock.write_locked)
-    pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex);
-  rc->lock.readers++;
-  pthread_mutex_unlock(&rc->lock.mutex);
-  DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value));
-}
-static void rc_wrlock(WT_RESOURCE *rc)
-{
-  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value));
-  pthread_mutex_lock(&rc->lock.mutex);
-  while (rc->lock.write_locked || rc->lock.readers)
-    pthread_cond_wait(&rc->lock.cond, &rc->lock.mutex);
-  rc->lock.write_locked= 1;
-  pthread_mutex_unlock(&rc->lock.mutex);
-  DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value));
-}
-static void rc_unlock(WT_RESOURCE *rc)
-{
-  DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value));
-  pthread_mutex_lock(&rc->lock.mutex);
-  if (rc->lock.write_locked)
-  {
-    rc->lock.write_locked= 0;
-    pthread_cond_broadcast(&rc->lock.cond);
-  }
-  else if (--rc->lock.readers == 0)
-    pthread_cond_broadcast(&rc->lock.cond);
-  pthread_mutex_unlock(&rc->lock.mutex);
-}
-#else
-static void rc_rwlock_init(WT_RESOURCE *rc)
-{
-  my_rwlock_init(&rc->lock, 0);
-}
-static void rc_rwlock_destroy(WT_RESOURCE *rc)
-{
-  rwlock_destroy(&rc->lock);
-}
-static void rc_rdlock(WT_RESOURCE *rc)
-{
-  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for READ", (ulong)rc->id.value));
-  rw_rdlock(&rc->lock);
-  DBUG_PRINT("wt", ("LOCK resid=%ld for READ", (ulong)rc->id.value));
-}
-static void rc_wrlock(WT_RESOURCE *rc)
-{
-  DBUG_PRINT("wt", ("TRYLOCK resid=%ld for WRITE", (ulong)rc->id.value));
-  rw_wrlock(&rc->lock);
-  DBUG_PRINT("wt", ("LOCK resid=%ld for WRITE", (ulong)rc->id.value));
-}
-static void rc_unlock(WT_RESOURCE *rc)
-{
-  DBUG_PRINT("wt", ("UNLOCK resid=%ld", (ulong)rc->id.value));
-  rw_unlock(&rc->lock);
-}
-#endif
-
-/*
-  All resources are stored in a lock-free hash. Different threads
-  may add new resources and perform deadlock detection concurrently.
-*/
-static LF_HASH      reshash;
-
-/**
-  WT_RESOURCE constructor
-
-  It's called from lf_hash and takes a pointer to an LF_SLIST instance.
-  WT_RESOURCE is located at arg+sizeof(LF_SLIST)
-*/
-static void wt_resource_init(uchar *arg)
-{
-  WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
-  DBUG_ENTER("wt_resource_init");
-
-  bzero(rc, sizeof(*rc));
-  rc_rwlock_init(rc);
-  pthread_cond_init(&rc->cond, 0);
-  my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5);
-  DBUG_VOID_RETURN;
-}
-
-/**
-  WT_RESOURCE destructor
-
-  It's called from lf_hash and takes a pointer to an LF_SLIST instance.
-  WT_RESOURCE is located at arg+sizeof(LF_SLIST)
-*/
-static void wt_resource_destroy(uchar *arg)
-{
-  WT_RESOURCE *rc= (WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
-  DBUG_ENTER("wt_resource_destroy");
-
-  DBUG_ASSERT(rc->owners.elements == 0);
-  rc_rwlock_destroy(rc);
-  pthread_cond_destroy(&rc->cond);
-  delete_dynamic(&rc->owners);
-  DBUG_VOID_RETURN;
-}
-
-void wt_init()
-{
-  DBUG_ENTER("wt_init");
-  DBUG_ASSERT(reshash.alloc.constructor != wt_resource_init);
-
-  lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0,
-               sizeof_WT_RESOURCE_ID, 0, 0);
-  reshash.alloc.constructor= wt_resource_init;
-  reshash.alloc.destructor= wt_resource_destroy;
-  /*
-    Note a trick: we initialize the hash with the real element size,
-    but fix it later to a shortened element size. This way
-    the allocator will allocate elements correctly, but
-    lf_hash_insert() will only overwrite part of the element with memcpy().
-    lock, condition, and dynamic array will be intact.
-  */
-  reshash.element_size= offsetof(WT_RESOURCE, lock);
-  bzero(wt_wait_stats, sizeof(wt_wait_stats));
-  bzero(wt_cycle_stats, sizeof(wt_cycle_stats));
-  wt_success_stats= 0;
-  { /* initialize wt_wait_table[]. from 1 us to 1 min, log e scale */
-    int i;
-    double from= log(1);   /* 1 us */
-    double to= log(60e6);  /* 1 min */
-    for (i= 0; i < WT_WAIT_STATS; i++)
-    {
-      wt_wait_table[i]= (ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from);
-      DBUG_ASSERT(i == 0 || wt_wait_table[i-1] != wt_wait_table[i]);
-    }
-  }
-  my_atomic_rwlock_init(&cycle_stats_lock);
-  my_atomic_rwlock_init(&success_stats_lock);
-  my_atomic_rwlock_init(&wait_stats_lock);
-  DBUG_VOID_RETURN;
-}
-
-void wt_end()
-{
-  DBUG_ENTER("wt_end");
-
-  DBUG_ASSERT(reshash.count == 0);
-  lf_hash_destroy(&reshash);
-  my_atomic_rwlock_destroy(&cycle_stats_lock);
-  my_atomic_rwlock_destroy(&success_stats_lock);
-  my_atomic_rwlock_destroy(&wait_stats_lock);
-  DBUG_VOID_RETURN;
-}
-
-/**
-  Lazy WT_THD initialization
-
-  Cheap initialization of WT_THD. Only initialize fields that don't require
-  memory allocations - basically, it only does assignments. The rest of the
-  WT_THD structure will be initialized on demand, on the first use.
-  This allows one to initialize lazily all WT_THD structures, even if some
-  (or even most) of them will never be used for deadlock detection.
-
-  @param ds     a pointer to deadlock search depth short value
-  @param ts     a pointer to deadlock timeout short value
-  @param dl     a pointer to deadlock search depth long value
-  @param tl     a pointer to deadlock timeout long value
-
-  @note these are pointers to values, and WT_THD stores them as pointers.
-  It allows one later to change search depths and timeouts for existing
-  threads. It also means that the pointers must stay valid for the lifetime
-  of WT_THD.
-*/
-void wt_thd_lazy_init(WT_THD *thd, const ulong *ds, const ulong *ts,
-                                   const ulong *dl, const ulong *tl)
-{
-  DBUG_ENTER("wt_thd_lazy_init");
-  thd->waiting_for= 0;
-  thd->weight= 0;
-  thd->deadlock_search_depth_short= ds;
-  thd->timeout_short= ts;
-  thd->deadlock_search_depth_long= dl;
-  thd->timeout_long= tl;
-  /* dynamic array is also initialized lazily - without memory allocations */
-  my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5);
-#ifndef DBUG_OFF
-  thd->name= my_thread_name();
-#endif
-  DBUG_VOID_RETURN;
-}
-
-/**
-  Finalize WT_THD initialization
-
-  After lazy WT_THD initialization, parts of the structure are still
-  uninitialized. This function completes the initialization, allocating
-  memory, if necessary. It's called automatically on demand, when WT_THD
-  is about to be used.
-*/
-static int fix_thd_pins(WT_THD *thd)
-{
-  if (unlikely(thd->pins == 0))
-  {
-    thd->pins= lf_hash_get_pins(&reshash);
-#ifndef DBUG_OFF
-    thd->name= my_thread_name();
-#endif
-  }
-  return thd->pins == 0;
-}
-
-void wt_thd_destroy(WT_THD *thd)
-{
-  DBUG_ENTER("wt_thd_destroy");
-
-  DBUG_ASSERT(thd->my_resources.elements == 0);
-  DBUG_ASSERT(thd->waiting_for == 0);
-
-  if (thd->pins != 0)
-    lf_hash_put_pins(thd->pins);
-
-  delete_dynamic(&thd->my_resources);
-  DBUG_VOID_RETURN;
-}
-/**
-  Trivial resource id comparison function - bytewise memcmp.
-
-  It can be used in WT_RESOURCE_TYPE structures where bytewise
-  comparison of values is sufficient.
-*/
-my_bool wt_resource_id_memcmp(const void *a, const void *b)
-{
-  /* we use the fact that there's no padding in the middle of WT_RESOURCE_ID */
-  compile_time_assert(offsetof(WT_RESOURCE_ID, type) == sizeof(ulonglong));
-  return memcmp(a, b, sizeof_WT_RESOURCE_ID);
-}
-
-/**
-  arguments for the recursive deadlock_search function
-*/
-struct deadlock_arg {
-  WT_THD * const thd;          /**< starting point of a search */
-  uint const max_depth;        /**< search depth limit */
-  WT_THD *victim;              /**< a thread to be killed to resolve a deadlock */
-  WT_RESOURCE *last_locked_rc; /**< see comment at the end of deadlock_search() */
-};
-
-/**
-  helper function to change the victim, according to the weight
-*/
-static void change_victim(WT_THD* found, struct deadlock_arg *arg)
-{
-  if (found->weight < arg->victim->weight)
-  {
-    if (arg->victim != arg->thd)
-    {
-      rc_unlock(arg->victim->waiting_for); /* release the previous victim */
-      DBUG_ASSERT(arg->last_locked_rc == found->waiting_for);
-    }
-    arg->victim= found;
-    arg->last_locked_rc= 0;
-  }
-}
-
-/**
-  recursive loop detection in a wait-for graph with a limited search depth
-*/
-static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker,
-                           uint depth)
-{
-  WT_RESOURCE *rc, *volatile *shared_ptr= &blocker->waiting_for;
-  WT_THD *cursor;
-  uint i;
-  int ret= WT_OK;
-  DBUG_ENTER("deadlock_search");
-  DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, depth=%u",
-                    arg->thd->name, blocker->name, depth));
-
-  LF_REQUIRE_PINS(1);
-
-  arg->last_locked_rc= 0;
-
-  if (depth > arg->max_depth)
-  {
-    DBUG_PRINT("wt", ("exit: WT_DEPTH_EXCEEDED (early)"));
-    DBUG_RETURN(WT_DEPTH_EXCEEDED);
-  }
-
-retry:
-  /*
-    safe dereference as explained in lf_alloc-pin.c
-    (in short: protects against lf_alloc_free() in lf_hash_delete())
-  */
-  do
-  {
-    rc= *shared_ptr;
-    lf_pin(arg->thd->pins, 0, rc);
-  } while (rc != *shared_ptr && LF_BACKOFF);
-
-  if (rc == 0)
-  {
-    DBUG_PRINT("wt", ("exit: OK (early)"));
-    DBUG_RETURN(0);
-  }
-
-  rc_rdlock(rc);
-  if (rc->state != ACTIVE || *shared_ptr != rc)
-  {
-    /* blocker is not waiting on this resource anymore */
-    rc_unlock(rc);
-    lf_unpin(arg->thd->pins, 0);
-    goto retry;
-  }
-  /* as the state is locked, we can unpin now */
-  lf_unpin(arg->thd->pins, 0);
-
-  /*
-    Below is not a pure depth-first search. It's a depth-first with a
-    slightest hint of breadth-first. Depth-first is:
-
-      check(element, X):
-        foreach current in element->nodes[] do:
-          if current == X return error;
-          check(current, X);
-
-    while we do
-
-      check(element, X):
-        foreach current in element->nodes[] do:
-          if current == X return error;
-        foreach current in element->nodes[] do:
-          check(current, X);
-
-    preferring shorter deadlocks over longer ones.
-  */
-  for (i= 0; i < rc->owners.elements; i++)
-  {
-    cursor= *dynamic_element(&rc->owners, i, WT_THD**);
-    /*
-      We're only looking for (and detecting) cycles that include 'arg->thd'.
-      That is, only deadlocks that *we* have created. For example,
-        thd->A->B->thd
-      (thd waits for A, A waits for B, while B is waiting for thd).
-      While walking the graph we can encounter other cicles, e.g.
-        thd->A->B->C->A
-      This will not be detected. Instead we will walk it in circles until
-      the search depth limit is reached (the latter guarantees that an
-      infinite loop is impossible). We expect the thread that has created
-      the cycle (one of A, B, and C) to detect its deadlock.
-    */
-    if (cursor == arg->thd)
-    {
-      ret= WT_DEADLOCK;
-      increment_cycle_stats(depth, arg->max_depth ==
-                                   *arg->thd->deadlock_search_depth_long);
-      arg->victim= cursor;
-      goto end;
-    }
-  }
-  for (i= 0; i < rc->owners.elements; i++)
-  {
-    cursor= *dynamic_element(&rc->owners, i, WT_THD**);
-    switch (deadlock_search(arg, cursor, depth+1)) {
-    case WT_OK:
-      break;
-    case WT_DEPTH_EXCEEDED:
-      ret= WT_DEPTH_EXCEEDED;
-      break;
-    case WT_DEADLOCK:
-      ret= WT_DEADLOCK;
-      change_victim(cursor, arg);       /* also sets arg->last_locked_rc to 0 */
-      i= rc->owners.elements;           /* jump out of the loop */
-      break;
-    default:
-      DBUG_ASSERT(0);
-    }
-    if (arg->last_locked_rc)
-      rc_unlock(arg->last_locked_rc);
-  }
-end:
-  /*
-    Note that 'rc' is locked in this function, but it's never unlocked here.
-    Instead it's saved in arg->last_locked_rc and the *caller* is
-    expected to unlock it.  It's done to support different killing
-    strategies. This is how it works:
-    Assuming a graph
-
-      thd->A->B->C->thd
-
-    deadlock_search() function starts from thd, locks it (in fact it locks not
-    a thd, but a resource it is waiting on, but below, for simplicity, I'll
-    talk about "locking a thd"). Then it goes down recursively, locks A, and so
-    on. Goes down recursively, locks B. Goes down recursively, locks C.
-    Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd.
-    Returns from the last deadlock_search() call. C stays locked!
-    Now it checks whether C is a more appropriate victim than 'thd'.
-    If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked.
-    Now it checks whether B is a more appropriate victim than arg->victim.
-    If yes - old arg->victim is unlocked and arg->victim=B,
-    otherwise B is unlocked. Return.
-    And so on.
-
-    In short, a resource is locked in a frame. But it's not unlocked in the
-    same frame, it's unlocked by the caller, and only after the caller checks
-    that it doesn't need to use current WT_THD as a victim. If it does - the
-    lock is kept and the old victim's resource is unlocked. When the recursion
-    is unrolled and we are back to deadlock() function, there are only two
-    locks left - on thd and on the victim.
-  */
-  arg->last_locked_rc= rc;
-  DBUG_PRINT("wt", ("exit: %s",
-                    ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" :
-                    ret ? "WT_DEADLOCK" : "OK"));
-  DBUG_RETURN(ret);
-}
-
-/**
-  Deadlock detection in a wait-for graph
-
-  A wrapper for recursive deadlock_search() - prepares deadlock_arg structure,
-  invokes deadlock_search(), increments statistics, notifies the victim.
-
-  @param thd            thread that is going to wait. Deadlock is detected
-                        if, while walking the graph, we reach a thread that
-                        is waiting on thd
-  @param blocker        starting point of a search. In wt_thd_cond_timedwait()
-                        it's thd, in wt_thd_will_wait_for() it's a thread that
-                        thd is going to wait for
-  @param depth          starting search depth. In general it's the number of
-                        edges in the wait-for graph between thd and the
-                        blocker. Practically only two values are used (and
-                        supported) - when thd == blocker it's 0, when thd
-                        waits directly for blocker, it's 1
-  @param max_depth      search depth limit
-*/
-static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth,
-                            uint max_depth)
-{
-  struct deadlock_arg arg= {thd, max_depth, 0, 0};
-  int ret;
-  DBUG_ENTER("deadlock");
-  DBUG_ASSERT(depth < 2);
-  ret= deadlock_search(&arg, blocker, depth);
-  if (ret == WT_DEPTH_EXCEEDED)
-  {
-    increment_cycle_stats(WT_CYCLE_STATS, max_depth ==
-                                          *thd->deadlock_search_depth_long);
-    ret= WT_OK;
-  }
-  /*
-    if we started with depth==1, blocker was never considered for a victim
-    in deadlock_search(). Do it here.
-  */
-  if (ret == WT_DEADLOCK && depth)
-    change_victim(blocker, &arg);
-  if (arg.last_locked_rc)
-  {
-    /*
-      Special return code if there's nobody to wait for.
-
-      depth == 0 means that we start the search from thd (thd == blocker).
-      ret == WT_OK means that no cycle was found and
-        arg.last_locked_rc == thd->waiting_for.
-      and arg.last_locked_rc->owners.elements == 0 means that
-        (applying the rule above) thd->waiting_for->owners.elements == 0,
-        and thd doesn't have anybody to wait for.
-    */
-    if (depth == 0 && ret == WT_OK && arg.last_locked_rc->owners.elements == 0)
-    {
-      DBUG_ASSERT(thd == blocker);
-      DBUG_ASSERT(arg.last_locked_rc == thd->waiting_for);
-      ret= WT_FREE_TO_GO;
-    }
-    rc_unlock(arg.last_locked_rc);
-  }
-  /* notify the victim, if appropriate */
-  if (ret == WT_DEADLOCK && arg.victim != thd)
-  {
-    DBUG_PRINT("wt", ("killing %s", arg.victim->name));
-    arg.victim->killed= 1;
-    pthread_cond_broadcast(&arg.victim->waiting_for->cond);
-    rc_unlock(arg.victim->waiting_for);
-    ret= WT_OK;
-  }
-  DBUG_RETURN(ret);
-}
-
-
-/**
-  Delete an element from reshash if it has no waiters or owners
-
-  rc->lock must be locked by the caller and it's unlocked on return.
-*/
-static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
-{
-  uint keylen;
-  const void *key;
-  DBUG_ENTER("unlock_lock_and_free_resource");
-
-  DBUG_ASSERT(rc->state == ACTIVE);
-
-  if (rc->owners.elements || rc->waiter_count)
-  {
-    DBUG_PRINT("wt", ("nothing to do, %u owners, %u waiters",
-                      rc->owners.elements, rc->waiter_count));
-    rc_unlock(rc);
-    DBUG_RETURN(0);
-  }
-
-  if (fix_thd_pins(thd))
-  {
-    rc_unlock(rc);
-    DBUG_RETURN(1);
-  }
-
-  /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */
-  {
-    key= &rc->id;
-    keylen= sizeof_WT_RESOURCE_ID;
-  }
-
-  /*
-    To free the element correctly we need to:
-     1. take its lock (already done).
-     2. set the state to FREE
-     3. release the lock
-     4. remove from the hash
-  */
-  rc->state= FREE;
-  rc_unlock(rc);
-  DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1);
-}
-
-
-/**
-  register the fact that thd is not waiting anymore
-
-  decrease waiter_count, clear waiting_for, free the resource if appropriate.
-  thd->waiting_for must be locked!
-*/
-static int stop_waiting_locked(WT_THD *thd)
-{
-  int ret;
-  WT_RESOURCE *rc= thd->waiting_for;
-  DBUG_ENTER("stop_waiting_locked");
-
-  DBUG_ASSERT(rc->waiter_count);
-  DBUG_ASSERT(rc->state == ACTIVE);
-  rc->waiter_count--;
-  thd->waiting_for= 0;
-  ret= unlock_lock_and_free_resource(thd, rc);
-  DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK);
-}
-
-/**
-  register the fact that thd is not waiting anymore
-
-  locks thd->waiting_for and calls stop_waiting_locked().
-*/
-static int stop_waiting(WT_THD *thd)
-{
-  int ret;
-  WT_RESOURCE *rc= thd->waiting_for;
-  DBUG_ENTER("stop_waiting");
-
-  if (!rc)
-    DBUG_RETURN(WT_OK);
-  /*
-    nobody's trying to free the resource now,
-    as its waiter_count is guaranteed to be non-zero
-  */
-  rc_wrlock(rc);
-  ret= stop_waiting_locked(thd);
-  DBUG_RETURN(ret);
-}
-
-/**
-  notify the system that a thread needs to wait for another thread
-
-  called by a *waiter* to declare that it (thd) will wait for another
-  thread (blocker) on a specific resource (resid).
-  can be called many times, if many blockers own a blocking resource.
-  but must always be called with the same resource id - a thread cannot
-  wait for more than one resource at a time.
-
-  @return WT_OK or WT_DEADLOCK
-
-  As a new edge is added to the wait-for graph, a deadlock detection is
-  performed for this new edge.
-*/
-int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker,
-                         const WT_RESOURCE_ID *resid)
-{
-  uint i;
-  WT_RESOURCE *rc;
-  DBUG_ENTER("wt_thd_will_wait_for");
-
-  LF_REQUIRE_PINS(3);
-
-  DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%lu",
-                    thd->name, blocker->name, (ulong)resid->value));
-
-  if (fix_thd_pins(thd))
-    DBUG_RETURN(WT_DEADLOCK);
-
-  if (thd->waiting_for == 0)
-  {
-    uint keylen;
-    const void *key;
-    /* XXX if (restype->make_key) key= restype->make_key(resid, &keylen); else */
-    {
-      key= resid;
-      keylen= sizeof_WT_RESOURCE_ID;
-    }
-
-    DBUG_PRINT("wt", ("first blocker"));
-
-retry:
-    while ((rc= lf_hash_search(&reshash, thd->pins, key, keylen)) == 0)
-    {
-      WT_RESOURCE tmp;
-
-      DBUG_PRINT("wt", ("failed to find rc in hash, inserting"));
-      bzero(&tmp, sizeof(tmp));
-      tmp.id= *resid;
-      tmp.state= ACTIVE;
-
-      if (lf_hash_insert(&reshash, thd->pins, &tmp) == -1) /* if OOM */
-        DBUG_RETURN(WT_DEADLOCK);
-      /*
-        Two cases: either lf_hash_insert() failed - because another thread
-        has just inserted a resource with the same id - and we need to retry.
-        Or lf_hash_insert() succeeded, and then we need to repeat
-        lf_hash_search() to find a real address of the newly inserted element.
-        That is, we don't care what lf_hash_insert() has returned.
-        And we need to repeat the loop anyway.
-      */
-    }
-    if (rc == MY_ERRPTR)
-      DBUG_RETURN(WT_DEADLOCK);
-
-    DBUG_PRINT("wt", ("found in hash rc=%p", rc));
-
-    rc_wrlock(rc);
-    if (rc->state != ACTIVE)
-    {
-      DBUG_PRINT("wt", ("but it's not active, retrying"));
-      /* Somebody has freed the element while we weren't looking */
-      rc_unlock(rc);
-      lf_hash_search_unpin(thd->pins);
-      goto retry;
-    }
-
-    lf_hash_search_unpin(thd->pins); /* the element cannot go away anymore */
-    thd->waiting_for= rc;
-    rc->waiter_count++;
-    thd->killed= 0;
-  }
-  else
-  {
-    DBUG_ASSERT(thd->waiting_for->id.type == resid->type);
-    DBUG_ASSERT(resid->type->compare(&thd->waiting_for->id, resid) == 0);
-    DBUG_PRINT("wt", ("adding another blocker"));
-
-    /*
-      we can safely access the resource here, it's in the hash as it has
-      non-zero waiter_count
-    */
-    rc= thd->waiting_for;
-    rc_wrlock(rc);
-    DBUG_ASSERT(rc->waiter_count);
-    DBUG_ASSERT(rc->state == ACTIVE);
-
-    if (thd->killed)
-    {
-      stop_waiting_locked(thd);
-      DBUG_RETURN(WT_DEADLOCK);
-    }
-  }
-  /*
-    Another thread could be waiting on this resource for this very 'blocker'.
-    In this case we should not add it to the list for the second time.
-  */
-  for (i= 0; i < rc->owners.elements; i++)
-    if (*dynamic_element(&rc->owners, i, WT_THD**) == blocker)
-      break;
-  if (i >= rc->owners.elements)
-  {
-    if (push_dynamic(&blocker->my_resources, (void*)&rc))
-    {
-      stop_waiting_locked(thd);
-      DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */
-    }
-    if (push_dynamic(&rc->owners, (void*)&blocker))
-    {
-      pop_dynamic(&blocker->my_resources);
-      stop_waiting_locked(thd);
-      DBUG_RETURN(WT_DEADLOCK);
-    }
-  }
-  rc_unlock(rc);
-
-  if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short) != WT_OK)
-  {
-    stop_waiting(thd);
-    DBUG_RETURN(WT_DEADLOCK);
-  }
-  DBUG_RETURN(WT_OK);
-}
-
-/**
-  called by a *waiter* (thd) to start waiting
-
-  It's supposed to be a drop-in replacement for
-  pthread_cond_timedwait(), and it takes mutex as an argument.
-
-  @return one of WT_TIMEOUT, WT_DEADLOCK, WT_OK
-*/
-int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex)
-{
-  int ret= WT_TIMEOUT;
-  struct timespec timeout;
-  ulonglong before, after, starttime;
-  WT_RESOURCE *rc= thd->waiting_for;
-  DBUG_ENTER("wt_thd_cond_timedwait");
-  DBUG_PRINT("wt", ("enter: thd=%s, rc=%p", thd->name, rc));
-
-#ifndef DBUG_OFF
-  if (rc->cond_mutex)
-    DBUG_ASSERT(rc->cond_mutex == mutex);
-  else
-    rc->cond_mutex= mutex;
-  safe_mutex_assert_owner(mutex);
-#endif
-
-  before= starttime= my_getsystime();
-
-#ifdef __WIN__
-  /*
-    only for the sake of Windows we distinguish between
-    'before' and 'starttime':
-
-    my_getsystime() returns high-resolution value, that cannot be used for
-    waiting (it doesn't follow system clock changes), but is good for time
-    intervals.
-
-    GetSystemTimeAsFileTime() follows system clock, but is low-resolution
-    and will result in lousy intervals.
-  */
-  GetSystemTimeAsFileTime((PFILETIME)&starttime);
-#endif
-
-  rc_wrlock(rc);
-  if (rc->owners.elements == 0)
-    ret= WT_OK;
-  rc_unlock(rc);
-
-  set_timespec_time_nsec(timeout, starttime, (*thd->timeout_short)*1000ULL);
-  if (ret == WT_TIMEOUT && !thd->killed)
-    ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout);
-  if (ret == WT_TIMEOUT && !thd->killed)
-  {
-    int r= deadlock(thd, thd, 0, *thd->deadlock_search_depth_long);
-    if (r == WT_FREE_TO_GO)
-      ret= WT_OK;
-    else if (r != WT_OK)
-      ret= WT_DEADLOCK;
-    else if (*thd->timeout_long > *thd->timeout_short)
-    {
-      set_timespec_time_nsec(timeout, starttime, (*thd->timeout_long)*1000ULL);
-      if (!thd->killed)
-        ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout);
-    }
-  }
-  after= my_getsystime();
-  if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */
-    ret= WT_DEADLOCK;
-  increment_wait_stats(after-before, ret);
-  if (ret == WT_OK)
-    increment_success_stats();
-  DBUG_RETURN(ret);
-}
-
-/**
-  called by a *blocker* when it releases a resource
-
-  it's conceptually similar to pthread_cond_broadcast, and must be done
-  under the same mutex as wt_thd_cond_timedwait().
-
-  @param resid   a resource to release. 0 to release all resources
-*/
-
-void wt_thd_release(WT_THD *thd, const WT_RESOURCE_ID *resid)
-{
-  uint i;
-  DBUG_ENTER("wt_thd_release");
-
-  for (i= 0; i < thd->my_resources.elements; i++)
-  {
-    WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**);
-    if (!resid || (resid->type->compare(&rc->id, resid) == 0))
-    {
-      uint j;
-
-      rc_wrlock(rc);
-      /*
-        nobody's trying to free the resource now,
-        as its owners[] array is not empty (at least thd must be there)
-      */
-      DBUG_ASSERT(rc->state == ACTIVE);
-      for (j= 0; j < rc->owners.elements; j++)
-        if (*dynamic_element(&rc->owners, j, WT_THD**) == thd)
-          break;
-      DBUG_ASSERT(j < rc->owners.elements);
-      delete_dynamic_element(&rc->owners, j);
-      if (rc->owners.elements == 0)
-      {
-        pthread_cond_broadcast(&rc->cond);
-#ifndef DBUG_OFF
-        if (rc->cond_mutex)
-          safe_mutex_assert_owner(rc->cond_mutex);
-#endif
-      }
-      unlock_lock_and_free_resource(thd, rc);
-      if (resid)
-      {
-        delete_dynamic_element(&thd->my_resources, i);
-        DBUG_VOID_RETURN;
-      }
-    }
-  }
-  if (!resid)
-    reset_dynamic(&thd->my_resources);
-  DBUG_VOID_RETURN;
-}
-