Dan Dan - 3 years ago 170
C++ Question

boost::asio io_service::run_one leads to a Segmentation fault

I am implementing a class that uses

boost::asio
to implement a library for TLS connections.

I am implementing only synchronous operations and some of them accept a timeout. I implement the timeout methods using a deadline_timer and io_service.run_one, as explained in this example: http://www.boost.org/doc/libs/1_45_0/doc/html/boost_asio/example/timeouts/async_tcp_client.cpp

My problem is with a method that reads exactly 'n' bytes from the socket and accepts a timeout as a parameter. The problem is that the
io_service.run_one()
is raising a
SIGSEV
and I do no know why. Below is the code (it is so long, but I do not know any other better way to explain this):

The code



Below are the methods involved in the test I am executing:

void CMDRboostConnection::check_deadline()
{
// Check whether the deadline has passed. We compare the deadline against
// the current time since a new asynchronous operation may have moved the
// deadline before this actor had a chance to run.
if (m_timeoutOpsTimer->expires_at() <= boost::asio::deadline_timer::traits_type::now())
{
// TODO do I need to cancel async operations?
m_timeoutOpsErrorCode = boost::asio::error::timed_out;

// There is no longer an active deadline. The expiry is set to positive
// infinity so that the actor takes no action until a new deadline is set.
m_timeoutOpsTimer->expires_at(boost::posix_time::pos_infin);
}

// Put the actor back to sleep.
m_timeoutOpsTimer->async_wait(
boost::bind(&CMDRboostConnection::check_deadline, this));
}

bool CMDRboostConnection::connect()
{
// TODO: This method already throws an exception, it should be void.
DEBUG("Connecting to " + m_url + " : " + m_port);
try
{
// If the socket is already connected, disconnect it before
// opening a new conneciont.
if (isConnected())
{
disconnect();
}

m_socket = new SSLSocket(m_ioService, m_context);

tcp::resolver resolver(m_ioService);
tcp::resolver::query query(m_url, m_port);

tcp::resolver::iterator end;
tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);

boost::asio::connect(m_socket->lowest_layer(), resolver.resolve(query));

if (endpoint_iterator == end)
{
DEBUG("Endpoint cannot be resolved, disconnecting...");
disconnect();
}
else
{
m_timeoutOpsTimer = new boost::asio::deadline_timer(m_ioService);
m_timeoutOpsTimer->expires_at(boost::posix_time::pos_infin);
// Start the persistent actor that checks for deadline expiry.
check_deadline();

DEBUG("Endpoint resolved, performing handshake");
m_socket->set_verify_mode(boost::asio::ssl::verify_none);
m_socket->handshake(SSLSocket::client);

DEBUG("Handshake done, connected to " + m_url + " : " + m_port);
m_isConnected = true;
}
}
catch (boost::system::system_error &err)
{
disconnect();
throw;
}

return m_isConnected;
}

std::streambuf& CMDRboostConnection::readNBytes(int n, unsigned int timeout)
{
try
{
if(!isConnected())
{
std::string err = "Cannot read, not connected";
ERROR(err);
throw std::logic_error(err);
}

if(n == 0)
{
return m_buffer;
}

m_timeoutOpsTimer->expires_from_now(
boost::posix_time::milliseconds(timeout));

m_timeoutOpsErrorCode = boost::asio::error::would_block;

boost::asio::async_read(
*m_socket,
m_buffer,
boost::asio::transfer_exactly(n),
boost::bind(
&CMDRboostConnection::timoutOpsCallback,
this,
boost::asio::placeholders::error,
boost::asio::placeholders::bytes_transferred)
);

do
{
m_ioService.run_one();
} while (m_timeoutOpsErrorCode == boost::asio::error::would_block);

if(m_timeoutOpsErrorCode)
{
throw boost::system::system_error(m_timeoutOpsErrorCode);
}

return m_buffer;
}
catch(boost::system::system_error &err)
{
ERROR("Timeout reached trying to read a message");
disconnect();
throw;
}
}

void CMDRboostConnection::disconnect()
{
try
{
DEBUG("Disconnecting...");
if(isConnected())
{
m_socket->shutdown();

DEBUG("Closing socket...");
m_socket->lowest_layer().close();

if(m_socket != NULL)
{
delete m_socket;
m_socket = NULL;
}
}

if(m_timeoutOpsTimer != NULL)
{
delete m_timeoutOpsTimer;
m_timeoutOpsTimer = NULL;
}

DEBUG("Disconnection performed properly");
m_isConnected = false;
}
catch (boost::system::system_error &err)
{
ERROR("Exception thrown, error = " << err.code() <<
", category: " << err.code().category().name() << std::endl);

m_isConnected = false;

throw;
}
}


The test



Below it is the test I am running to test the method:

TEST(CMDRboostConnection, readNbytesTimeoutDoesNotMakeTheProgramCrashWhenTmeout)
{
std::auto_ptr<CMDR::SSL::ICMDRsslConnection> m_connection =
std::auto_ptr<CMDR::SSL::ICMDRsslConnection>(
new CMDR::SSL::CMDRboostConnection("localhost", "9999"));

unsigned int sleepInterval = 0; // seconds
unsigned int timeout = 10; // milliseconds
unsigned int numIterations = 10;
std::string msg("delay 500000"); // microseconds

if(!m_connection->isConnected())
{
m_connection->connect();
}


for(unsigned int i = 0; i < numIterations; i++)
{

if(!m_connection->isConnected())
{
m_connection->connect();
}

ASSERT_NO_THROW( m_connection->write(msg) );

ASSERT_THROW (
m_connection->readNBytes(msg.size(), timeout),
boost::system::system_error);

ASSERT_FALSE(m_connection->isConnected());

ASSERT_NO_THROW( m_connection->connect() );

sleep(sleepInterval);
}
}


The problem



In the above test, the first loop iteration goes ok, that is, the first time the method
readNBytes
is called, it works (throws an exception as expected). The second time it is executed, it raises the
SIGSEV
.

EDIT



I am executing the above test among others that test other functionalities. I have realized that if I execute the above test only, it works. But, If I execute it in addition other, then the program crashes with the mentioned
SIGSEV
.

This is one of the tests that causes the problem:

TEST(CMDRboostConnection, canConnectDisconnect)
{
std::auto_ptr<CMDR::SSL::ICMDRsslConnection> m_connection =
std::auto_ptr<CMDR::SSL::ICMDRsslConnection>(
new CMDR::SSL::CMDRboostConnection("localhost", "9999"));

unsigned int sleepInterval = 0; // seconds
unsigned int timeout = 1000; // milliseconds
unsigned int numIterations = 10;
std::string msg("normally");


if(!m_connection->isConnected())
{
ASSERT_NO_THROW (m_connection->connect() );
}

for(unsigned int i = 0; i < numIterations; i++)
{
ASSERT_NO_THROW( m_connection->disconnect() );
sleep(sleepInterval);
ASSERT_NO_THROW( m_connection->connect() );
}
}


In conclusion, If I execute both the above tests, the first one crashes. But if I execute only the first one, it works.

EDIT 2
Fixed the bug mentioned in the comments.

Dan Dan
Answer Source

I have replaced all the member attributes by pointers, and now it works (that is, I can pass all tests I have write). The methods disconnect / connect are now as following:

bool CMDRboostConnection::connect()
{
  // TODO: This method already throws an exception, it should be void.
  DEBUG("Connecting to " + m_url + " : " + m_port);
  try
  {
    // If the socket is already connected, disconnect it before
    // opening a new conneciont.
    if (isConnected())
    {
      disconnect();
    }

    m_ioService = new boost::asio::io_service();
    m_timeoutOpsTimer = new boost::asio::deadline_timer(*m_ioService);
    m_context = new boost::asio::ssl::context(boost::asio::ssl::context::sslv23);
    m_socket = new SSLSocket(*m_ioService, *m_context);

    tcp::resolver resolver(*m_ioService);
    tcp::resolver::query query(m_url, m_port);

    tcp::resolver::iterator end;
    tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);

    boost::asio::connect(m_socket->lowest_layer(), resolver.resolve(query));

    if (endpoint_iterator == end)
    {
      DEBUG("Endpoint cannot be resolved, disconnecting...");
      disconnect();
    }
    else
    {
      m_timeoutOpsTimer->expires_at(boost::posix_time::pos_infin);
      // Start the persistent actor that checks for deadline expiry.
      check_deadline();

      DEBUG("Endpoint resolved, performing handshake");
      m_socket->set_verify_mode(boost::asio::ssl::verify_none);
      m_socket->handshake(SSLSocket::client);

      DEBUG("Handshake done, connected to " + m_url + " : " + m_port);
      m_isConnected = true;
    }
  }
  catch (boost::system::system_error &err)
  {
    disconnect();
    throw;
  }

  return m_isConnected;
}

void CMDRboostConnection::disconnect()
{
  try
  {
    DEBUG("Disconnecting...");
    if(isConnected())
    {
      m_socket->shutdown();

      DEBUG("Closing socket...");
      m_socket->lowest_layer().close();

      if(m_socket != NULL)
      {
        delete m_socket;
        m_socket = NULL;
      }
    }

    if(m_timeoutOpsTimer != NULL)
    {
      delete m_timeoutOpsTimer;
      m_timeoutOpsTimer = NULL;
    }

    if(m_context != NULL)
    {
      delete m_context;
      m_context = NULL;
    }

    if(m_ioService != NULL)
    {
      delete m_ioService;
      m_ioService = NULL;
    }

    DEBUG("Disconnection performed properly");
    m_isConnected = false;
  }
  catch (boost::system::system_error &err)
  {
    ERROR("Exception thrown, error = " << err.code() <<
        ", category: " << err.code().category().name() << std::endl);

    if(m_timeoutOpsTimer != NULL)
    {
      delete m_timeoutOpsTimer;
      m_timeoutOpsTimer = NULL;
    }

    if(m_context != NULL)
    {
      delete m_context;
      m_context = NULL;
    }

    if(m_ioService != NULL)
    {
      delete m_ioService;
      m_ioService = NULL;
    }

    m_isConnected = false;

    throw;
  }
}

As you can see, now the socket, the io_service, the deadline_timer and the context are created on connecting and released on disconnecting. I still do not understand what is going on, let me explain:

I have tried to reimplement the above variables one next one, that is, first the socket, then the timer, then the context and finally the io_service.

The tests have passed only when the io_service is a ptr, but I can't understand why. If io_service is a class-scoped variable, it should be deleted every time the class instance goes out of scope, that is, every time one of my TESTs finishes.

It seems that, before implementing it as a ptr, that was not happening. I suspect that maybe, when the readNBytes throws an exception due to a timeout, the read_async call remains in the io_service action queue, and maybe that caused the problem.

Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download