c++c windows synchronization kernel-mode

Is there a way to synchronize this without locks?

Say I have 3 functions that can be called by an upper layer:

Start - Will only be called if we haven't been started yet, or Stop was previously called
Stop - Will only be called after a successful call to Start
Process - Can be called at any time (simultaneously on different threads); if started, will call into lower layer

In Stop, it must wait for all Process calls to finish calling into the lower layer, and prevent any further calls. With a locking mechanism, I can come up with the following pseudo code:

Start() {
  ResetEvent(&StopCompleteEvent);
  IsStarted = true;
  RefCount = 0;
}

Stop() {
   AcquireLock();
   IsStarted = false;
   WaitForCompletionEvent = (RefCount != 0);
   ReleaseLock();
   if (WaitForCompletionEvent)
     WaitForEvent(&StopCompleteEvent);
   ASSERT(RefCount == 0);
}

Process() {
  AcquireLock();
  AddedRef = IsStarted;
  if (AddedRef)
    RefCount++;
  ReleaseLock();

  if (!AddedRef) return;

  ProcessLowerLayer();

  AcquireLock();
  FireCompletionEvent = (--RefCount == 0);
  ReleaseLock();
  if (FilreCompletionEvent)
    SetEvent(&StopCompleteEvent);
}

Is there a way to achieve the same behavior without a locking mechanism? Perhaps with some fancy usage of InterlockedCompareExchange and InterlockedIncremenet/InterlockedDecrement?

The reason I ask is that this is in the data path of a network driver and I would really prefer not to have any locks.

Solution

I believe it is possible to avoid the use of explicit locks and any unnecessary blocking or kernel calls.

Note that this is pseudo-code only, for illustrative purposes; it hasn't seen a compiler. And while I believe the threading logic is sound, please verify its correctness for yourself, or get an expert to validate it; lock-free programming is hard.

#define STOPPING 0x20000000;
#define STOPPED 0x40000000;
volatile LONG s = STOPPED;
  // state and count
  // bit 30 set -> stopped
  // bit 29 set -> stopping
  // bits 0 through 28 -> thread count

Start() 
{
   KeClearEvent(&StopCompleteEvent);
   LONG n = InterlockedExchange(&s, 0);  // sets s to 0
   if ((n & STOPPED) == 0) 
       bluescreen("Invalid call to Start()");
}

Stop()
{
   LONG n = InterlockedCompareExchange(&s, STOPPED, 0);
   if (n == 0)
   {
       // No calls to Process() were running so we could jump directly to stopped.
       // Mission accomplished!
       return;
   }

   LONG n = InterlockedOr(&s, STOPPING);
   if ((n & STOPPED) != 0)
       bluescreen("Stop called when already stopped");
   if ((n & STOPPING) != 0)
       bluescreen("Stop called when already stopping");

   n = InterlockedCompareExchange(&s, STOPPED, STOPPING);
   if (n == STOPPING)
   {
       // The last call to Process() exited before we set the STOPPING flag.
       // Mission accomplished!
       return;
   }

   // Now that STOPPING mode is set, and we know at least one call to Process 
   // is running, all we need do is wait for the event to be signaled.

   KeWaitForSingleObject(...);

   // The event is only ever signaled after a thread has successfully
   // changed the state to STOPPED.  Mission accomplished!

   return;
}

Process()
{
    LONG n = InterlockedCompareExchange(&s, STOPPED, STOPPING);
    if (n == STOPPING)
    {
         // We've just stopped; let the call to Stop() complete.
         KeSetEvent(&StopCompleteEvent);
         return;
    }
    if ((n & STOPPED) != 0 || (n & STOPPING) != 0)
    {
         // Checking here avoids changing the state unnecessarily when
         // we already know we can't enter the lower layer.

         // It also ensures that the transition from STOPPING to STOPPED can't
         // be delayed even if there are lots of threads making new calls to Process().

         return;
    }

    n = InterlockedIncrement(&s);
    if ((n & STOPPED) != 0)
    {
        // Turns out we've just stopped, so the call to Process() must be aborted.

        // Explicitly set the state back to STOPPED, rather than decrementing it,
        // in case Start() has been called.  At least one thread will succeed.
        InterlockedCompareExchange(&s, STOPPED, n);
        return;
    }

    if ((n & STOPPING) == 0)
    {
        ProcessLowerLayer();
    }

    n = InterlockedDecrement(&s);
    if ((n & STOPPED) != 0 || n == (STOPPED - 1))
        bluescreen("Stopped during call to Process, shouldn't be possible!");

    if (n != STOPPING)
        return;

    // Stop() has been called, and it looks like we're the last 
    // running call to Process() in which case we need to change the 
    // status to STOPPED and signal the call to Stop() to exit.

    // However, another thread might have beaten us to it, so we must 
    // check again.  The event MUST only be set once per call to Stop().

    n = InterlockedCompareExchange(&s, STOPPED, STOPPING);
    if (n == STOPPING)
    {
         // We've just stopped; let the call to Stop() complete.
         KeSetEvent(&StopCompleteEvent);
    }
    return;
}