Initial commit
This commit is contained in:
484
common/rfb/Congestion.cxx
Normal file
484
common/rfb/Congestion.cxx
Normal file
@@ -0,0 +1,484 @@
|
||||
/* Copyright 2009-2018 Pierre Ossman for Cendio AB
|
||||
*
|
||||
* This is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This software is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this software; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||
* USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This code implements congestion control in the same way as TCP in
|
||||
* order to avoid excessive latency in the transport. This is needed
|
||||
* because "buffer bloat" is unfortunately still a very real problem.
|
||||
*
|
||||
* The basic principle is TCP Congestion Control (RFC 5618), with the
|
||||
* addition of using the TCP Vegas algorithm. The reason we use Vegas
|
||||
* is that we run on top of a reliable transport so we need a latency
|
||||
* based algorithm rather than a loss based one. There is also a lot of
|
||||
* interpolation of values. This is because we have rather horrible
|
||||
* granularity in our measurements.
|
||||
*
|
||||
* We use a simplistic form of slow start in order to ramp up quickly
|
||||
* from an idle state. We do not have any persistent threshold though
|
||||
* as we have too much noise for it to be reliable.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <linux/sockios.h>
|
||||
#endif
|
||||
|
||||
#include <rfb/Congestion.h>
|
||||
#include <rfb/LogWriter.h>
|
||||
#include <rfb/util.h>
|
||||
|
||||
// Debug output on what the congestion control is up to
|
||||
#undef CONGESTION_DEBUG
|
||||
|
||||
// Dump socket congestion window debug trace to disk
|
||||
#undef CONGESTION_TRACE
|
||||
|
||||
using namespace rfb;
|
||||
|
||||
// This window should get us going fairly fast on a decent bandwidth network.
|
||||
// If it's too high, it will rapidly be reduced and stay low.
|
||||
static const unsigned INITIAL_WINDOW = 16384;
|
||||
|
||||
// TCP's minimal window is 3*MSS. But since we don't know the MSS, we
|
||||
// make a guess at 4 KiB (it's probably a bit higher).
|
||||
static const unsigned MINIMUM_WINDOW = 4096;
|
||||
|
||||
// The current default maximum window for Linux (4 MiB). Should be a good
|
||||
// limit for now...
|
||||
static const unsigned MAXIMUM_WINDOW = 4194304;
|
||||
|
||||
// Compare position even when wrapped around
|
||||
static inline bool isAfter(unsigned a, unsigned b) {
|
||||
return a != b && a - b <= UINT_MAX / 2;
|
||||
}
|
||||
|
||||
static LogWriter vlog("Congestion");
|
||||
|
||||
Congestion::Congestion() :
|
||||
lastPosition(0), extraBuffer(0),
|
||||
baseRTT(-1), congWindow(INITIAL_WINDOW), inSlowStart(true),
|
||||
safeBaseRTT(-1), measurements(0), minRTT(-1), minCongestedRTT(-1)
|
||||
{
|
||||
gettimeofday(&lastUpdate, NULL);
|
||||
gettimeofday(&lastSent, NULL);
|
||||
memset(&lastPong, 0, sizeof(lastPong));
|
||||
gettimeofday(&lastPongArrival, NULL);
|
||||
gettimeofday(&lastAdjustment, NULL);
|
||||
}
|
||||
|
||||
Congestion::~Congestion()
|
||||
{
|
||||
}
|
||||
|
||||
void Congestion::updatePosition(unsigned pos)
|
||||
{
|
||||
struct timeval now;
|
||||
unsigned delta, consumed;
|
||||
|
||||
gettimeofday(&now, NULL);
|
||||
|
||||
delta = pos - lastPosition;
|
||||
if ((delta > 0) || (extraBuffer > 0))
|
||||
lastSent = now;
|
||||
|
||||
// Idle for too long?
|
||||
// We use a very crude RTO calculation in order to keep things simple
|
||||
// FIXME: should implement RFC 2861
|
||||
if (msBetween(&lastSent, &now) > __rfbmax(baseRTT*2, 100)) {
|
||||
|
||||
#ifdef CONGESTION_DEBUG
|
||||
vlog.debug("Connection idle for %d ms, resetting congestion control",
|
||||
msBetween(&lastSent, &now));
|
||||
#endif
|
||||
|
||||
// Close congestion window and redo wire latency measurement
|
||||
congWindow = __rfbmin(INITIAL_WINDOW, congWindow);
|
||||
baseRTT = -1;
|
||||
measurements = 0;
|
||||
gettimeofday(&lastAdjustment, NULL);
|
||||
minRTT = minCongestedRTT = -1;
|
||||
inSlowStart = true;
|
||||
}
|
||||
|
||||
// Commonly we will be in a state of overbuffering. We need to
|
||||
// estimate the extra delay that causes so we can separate it from
|
||||
// the delay caused by an incorrect congestion window.
|
||||
// (we cannot do this until we have a RTT measurement though)
|
||||
if (baseRTT != (unsigned)-1) {
|
||||
extraBuffer += delta;
|
||||
consumed = msBetween(&lastUpdate, &now) * congWindow / baseRTT;
|
||||
if (extraBuffer < consumed)
|
||||
extraBuffer = 0;
|
||||
else
|
||||
extraBuffer -= consumed;
|
||||
}
|
||||
|
||||
lastPosition = pos;
|
||||
lastUpdate = now;
|
||||
}
|
||||
|
||||
void Congestion::sentPing()
|
||||
{
|
||||
struct RTTInfo rttInfo;
|
||||
|
||||
memset(&rttInfo, 0, sizeof(struct RTTInfo));
|
||||
|
||||
gettimeofday(&rttInfo.tv, NULL);
|
||||
rttInfo.pos = lastPosition;
|
||||
rttInfo.extra = getExtraBuffer();
|
||||
rttInfo.congested = isCongested();
|
||||
|
||||
pings.push_back(rttInfo);
|
||||
}
|
||||
|
||||
void Congestion::gotPong()
|
||||
{
|
||||
struct timeval now;
|
||||
struct RTTInfo rttInfo;
|
||||
unsigned rtt, delay;
|
||||
|
||||
if (pings.empty())
|
||||
return;
|
||||
|
||||
gettimeofday(&now, NULL);
|
||||
|
||||
rttInfo = pings.front();
|
||||
pings.pop_front();
|
||||
|
||||
lastPong = rttInfo;
|
||||
lastPongArrival = now;
|
||||
|
||||
rtt = msBetween(&rttInfo.tv, &now);
|
||||
if (rtt < 1)
|
||||
rtt = 1;
|
||||
|
||||
// Try to estimate wire latency by tracking lowest seen latency
|
||||
if (rtt < baseRTT)
|
||||
safeBaseRTT = baseRTT = rtt;
|
||||
|
||||
// Pings sent before the last adjustment aren't interesting as they
|
||||
// aren't a measurement of the current congestion window
|
||||
if (isBefore(&rttInfo.tv, &lastAdjustment))
|
||||
return;
|
||||
|
||||
// Estimate added delay because of overtaxed buffers (see above)
|
||||
delay = rttInfo.extra * baseRTT / congWindow;
|
||||
if (delay < rtt)
|
||||
rtt -= delay;
|
||||
else
|
||||
rtt = 1;
|
||||
|
||||
// A latency less than the wire latency means that we've
|
||||
// understimated the congestion window. We can't really determine
|
||||
// how much, so pretend that we got no buffer latency at all.
|
||||
if (rtt < baseRTT)
|
||||
rtt = baseRTT;
|
||||
|
||||
// Record the minimum seen delay (hopefully ignores jitter) and let
|
||||
// the congestion control do its thing.
|
||||
//
|
||||
// Note: We are delay based rather than loss based, which means we
|
||||
// need to look at pongs even if they weren't limited by the
|
||||
// current window ("congested"). Otherwise we will fail to
|
||||
// detect increasing congestion until the application exceeds
|
||||
// the congestion window.
|
||||
if (rtt < minRTT)
|
||||
minRTT = rtt;
|
||||
if (rttInfo.congested) {
|
||||
if (rtt < minCongestedRTT)
|
||||
minCongestedRTT = rtt;
|
||||
}
|
||||
|
||||
measurements++;
|
||||
updateCongestion();
|
||||
}
|
||||
|
||||
bool Congestion::isCongested()
|
||||
{
|
||||
if (getInFlight() < congWindow)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int Congestion::getUncongestedETA()
|
||||
{
|
||||
unsigned targetAcked;
|
||||
|
||||
const struct RTTInfo* prevPing;
|
||||
unsigned eta, elapsed;
|
||||
unsigned etaNext, delay;
|
||||
|
||||
std::list<struct RTTInfo>::const_iterator iter;
|
||||
|
||||
targetAcked = lastPosition - congWindow;
|
||||
|
||||
// Simple case?
|
||||
if (isAfter(lastPong.pos, targetAcked))
|
||||
return 0;
|
||||
|
||||
// No measurements yet?
|
||||
if (baseRTT == (unsigned)-1)
|
||||
return -1;
|
||||
|
||||
prevPing = &lastPong;
|
||||
eta = 0;
|
||||
elapsed = msSince(&lastPongArrival);
|
||||
|
||||
// Walk the ping queue and figure out which one we are waiting for to
|
||||
// get to an uncongested state
|
||||
|
||||
for (iter = pings.begin(); ;++iter) {
|
||||
struct RTTInfo curPing;
|
||||
|
||||
// If we aren't waiting for a pong that will clear the congested
|
||||
// state then we have to estimate the final bit by pretending that
|
||||
// we had a ping just after the last position update.
|
||||
if (iter == pings.end()) {
|
||||
curPing.tv = lastUpdate;
|
||||
curPing.pos = lastPosition;
|
||||
curPing.extra = extraBuffer;
|
||||
} else {
|
||||
curPing = *iter;
|
||||
}
|
||||
|
||||
etaNext = msBetween(&prevPing->tv, &curPing.tv);
|
||||
// Compensate for buffering delays
|
||||
delay = curPing.extra * baseRTT / congWindow;
|
||||
etaNext += delay;
|
||||
delay = prevPing->extra * baseRTT / congWindow;
|
||||
if (delay >= etaNext)
|
||||
etaNext = 0;
|
||||
else
|
||||
etaNext -= delay;
|
||||
|
||||
// Found it?
|
||||
if (isAfter(curPing.pos, targetAcked)) {
|
||||
eta += etaNext * (curPing.pos - targetAcked) / (curPing.pos - prevPing->pos);
|
||||
if (elapsed > eta)
|
||||
return 0;
|
||||
else
|
||||
return eta - elapsed;
|
||||
}
|
||||
|
||||
assert(iter != pings.end());
|
||||
|
||||
eta += etaNext;
|
||||
prevPing = &*iter;
|
||||
}
|
||||
}
|
||||
|
||||
size_t Congestion::getBandwidth()
|
||||
{
|
||||
// No measurements yet? Guess RTT of 60 ms
|
||||
if (safeBaseRTT == (unsigned)-1)
|
||||
return congWindow * 1000 / 60;
|
||||
|
||||
return congWindow * 1000 / safeBaseRTT;
|
||||
}
|
||||
|
||||
void Congestion::debugTrace(const char* filename, int fd)
|
||||
{
|
||||
#ifdef CONGESTION_TRACE
|
||||
#ifdef __linux__
|
||||
FILE *f;
|
||||
f = fopen(filename, "ab");
|
||||
if (f != NULL) {
|
||||
struct tcp_info info;
|
||||
int buffered;
|
||||
socklen_t len;
|
||||
len = sizeof(info);
|
||||
if ((getsockopt(fd, IPPROTO_TCP,
|
||||
TCP_INFO, &info, &len) == 0) &&
|
||||
(ioctl(fd, SIOCOUTQ, &buffered) == 0)) {
|
||||
struct timeval now;
|
||||
gettimeofday(&now, NULL);
|
||||
fprintf(f, "%u.%06u,%u,%u,%u,%u\n",
|
||||
(unsigned)now.tv_sec, (unsigned)now.tv_usec,
|
||||
congWindow, info.tcpi_snd_cwnd * info.tcpi_snd_mss,
|
||||
getInFlight(), buffered);
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned Congestion::getExtraBuffer()
|
||||
{
|
||||
unsigned elapsed;
|
||||
unsigned consumed;
|
||||
|
||||
if (baseRTT == (unsigned)-1)
|
||||
return 0;
|
||||
|
||||
elapsed = msSince(&lastUpdate);
|
||||
consumed = elapsed * congWindow / baseRTT;
|
||||
|
||||
if (consumed >= extraBuffer)
|
||||
return 0;
|
||||
else
|
||||
return extraBuffer - consumed;
|
||||
}
|
||||
|
||||
unsigned Congestion::getInFlight()
|
||||
{
|
||||
struct RTTInfo nextPong;
|
||||
unsigned etaNext, delay, elapsed, acked;
|
||||
|
||||
// Simple case?
|
||||
if (lastPosition == lastPong.pos)
|
||||
return 0;
|
||||
|
||||
// No measurements yet?
|
||||
if (baseRTT == (unsigned)-1) {
|
||||
if (!pings.empty())
|
||||
return lastPosition - pings.front().pos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If we aren't waiting for any pong then we have to estimate things
|
||||
// by pretending that we had a ping just after the last position
|
||||
// update.
|
||||
if (pings.empty()) {
|
||||
nextPong.tv = lastUpdate;
|
||||
nextPong.pos = lastPosition;
|
||||
nextPong.extra = extraBuffer;
|
||||
} else {
|
||||
nextPong = pings.front();
|
||||
}
|
||||
|
||||
// First we need to estimate how many bytes have made it through
|
||||
// completely. Look at the next ping that should arrive and figure
|
||||
// out how far behind it should be and interpolate the positions.
|
||||
|
||||
etaNext = msBetween(&lastPong.tv, &nextPong.tv);
|
||||
// Compensate for buffering delays
|
||||
delay = nextPong.extra * baseRTT / congWindow;
|
||||
etaNext += delay;
|
||||
delay = lastPong.extra * baseRTT / congWindow;
|
||||
if (delay >= etaNext)
|
||||
etaNext = 0;
|
||||
else
|
||||
etaNext -= delay;
|
||||
|
||||
elapsed = msSince(&lastPongArrival);
|
||||
|
||||
// The pong should be here any second. Be optimistic and assume
|
||||
// we can already use its value.
|
||||
if (etaNext <= elapsed)
|
||||
acked = nextPong.pos;
|
||||
else {
|
||||
acked = lastPong.pos;
|
||||
acked += (nextPong.pos - lastPong.pos) * elapsed / etaNext;
|
||||
}
|
||||
|
||||
return lastPosition - acked;
|
||||
}
|
||||
|
||||
void Congestion::updateCongestion()
|
||||
{
|
||||
unsigned diff;
|
||||
|
||||
// We want at least three measurements to avoid noise
|
||||
if (measurements < 3)
|
||||
return;
|
||||
|
||||
assert(minRTT >= baseRTT);
|
||||
assert(minCongestedRTT >= baseRTT);
|
||||
|
||||
// The goal is to have a slightly too large congestion window since
|
||||
// a "perfect" one cannot be distinguished from a too small one. This
|
||||
// translates to a goal of a few extra milliseconds of delay.
|
||||
|
||||
diff = minRTT - baseRTT;
|
||||
|
||||
if (diff > __rfbmax(100, baseRTT/2)) {
|
||||
// We have no way of detecting loss, so assume massive latency
|
||||
// spike means packet loss. Adjust the window and go directly
|
||||
// to congestion avoidance.
|
||||
#ifdef CONGESTION_DEBUG
|
||||
vlog.debug("Latency spike! Backing off...");
|
||||
#endif
|
||||
congWindow = congWindow * baseRTT / minRTT;
|
||||
inSlowStart = false;
|
||||
}
|
||||
|
||||
if (inSlowStart) {
|
||||
// Slow start. Aggressive growth until we see congestion.
|
||||
|
||||
if (diff > 25) {
|
||||
// If we see an increased latency then we assume we've hit the
|
||||
// limit and it's time to leave slow start and switch to
|
||||
// congestion avoidance
|
||||
congWindow = congWindow * baseRTT / minRTT;
|
||||
inSlowStart = false;
|
||||
} else {
|
||||
// It's not safe to increase unless we actually used the entire
|
||||
// congestion window, hence we look at minCongestedRTT and not
|
||||
// minRTT
|
||||
|
||||
diff = minCongestedRTT - baseRTT;
|
||||
if (diff < 25)
|
||||
congWindow *= 2;
|
||||
}
|
||||
} else {
|
||||
// Congestion avoidance (VEGAS)
|
||||
|
||||
if (diff > 50) {
|
||||
// Slightly too fast
|
||||
congWindow -= 4096;
|
||||
} else {
|
||||
// Only the "congested" pongs are checked to see if the
|
||||
// window is too small.
|
||||
|
||||
diff = minCongestedRTT - baseRTT;
|
||||
|
||||
if (diff < 5) {
|
||||
// Way too slow
|
||||
congWindow += 8192;
|
||||
} else if (diff < 25) {
|
||||
// Too slow
|
||||
congWindow += 4096;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (congWindow < MINIMUM_WINDOW)
|
||||
congWindow = MINIMUM_WINDOW;
|
||||
if (congWindow > MAXIMUM_WINDOW)
|
||||
congWindow = MAXIMUM_WINDOW;
|
||||
|
||||
#ifdef CONGESTION_DEBUG
|
||||
vlog.debug("RTT: %d/%d ms (%d ms), Window: %d KiB, Bandwidth: %g Mbps%s",
|
||||
minRTT, minCongestedRTT, baseRTT, congWindow / 1024,
|
||||
congWindow * 8.0 / baseRTT / 1000.0,
|
||||
inSlowStart ? " (slow start)" : "");
|
||||
#endif
|
||||
|
||||
measurements = 0;
|
||||
gettimeofday(&lastAdjustment, NULL);
|
||||
minRTT = minCongestedRTT = -1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user