summaryrefslogtreecommitdiff
path: root/tcp-proxy/tcp-proxy.cpp
blob: 9e3f5d0798dfd493cec419d39180b563a13a4531 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
/*
 * ZeroTier One - Network Virtualization Everywhere
 * Copyright (C) 2011-2015  ZeroTier, Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * --
 *
 * ZeroTier may be used and distributed under the terms of the GPLv3, which
 * are available at: http://www.gnu.org/licenses/gpl-3.0.html
 *
 * If you would like to embed ZeroTier into a commercial application or
 * redistribute it in a modified binary form, please contact ZeroTier Networks
 * LLC. Start here: http://www.zerotier.com/
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdint.h>
#include <unistd.h>
#include <signal.h>

#include <map>
#include <set>
#include <string>
#include <algorithm>
#include <vector>

#include "../osdep/Phy.hpp"

#define ZT_TCP_PROXY_UDP_POOL_SIZE 1024
#define ZT_TCP_PROXY_UDP_POOL_START_PORT 10000
#define ZT_TCP_PROXY_CONNECTION_TIMEOUT_SECONDS 300

using namespace ZeroTier;

/*
 * ZeroTier TCP Proxy Server
 *
 * This implements a simple packet encapsulation that is designed to look like
 * a TLS connection. It's not a TLS connection, but it sends TLS format record
 * headers. It could be extended in the future to implement a fake TLS
 * handshake.
 *
 * At the moment, each packet is just made to look like TLS application data:
 *   <[1] TLS content type> - currently 0x17 for "application data"
 *   <[1] TLS major version> - currently 0x03 for TLS 1.2
 *   <[1] TLS minor version> - currently 0x03 for TLS 1.2
 *   <[2] payload length> - 16-bit length of payload in bytes
 *   <[...] payload> - Message payload
 *
 * TCP is inherently inefficient for encapsulating Ethernet, since TCP and TCP
 * like protocols over TCP lead to double-ACKs. So this transport is only used
 * to enable access when UDP or other datagram protocols are not available.
 *
 * Clients send a greeting, which is a four-byte message that contains:
 *   <[1] ZeroTier major version>
 *   <[1] minor version>
 *   <[2] revision>
 *
 * If a client has sent a greeting, it uses the new version of this protocol
 * in which every encapsulated ZT packet is prepended by an IP address where
 * it should be forwarded (or where it came from for replies). This causes
 * this proxy to act as a remote UDP socket similar to a socks proxy, which
 * will allow us to move this function off the supernodes and onto dedicated
 * proxy nodes.
 *
 * Older ZT clients that do not send this message get their packets relayed
 * to/from 127.0.0.1:9993, which will allow them to talk to and relay via
 * the ZT node on the same machine as the proxy. We'll only support this for
 * as long as such nodes appear to be in the wild.
 */

struct TcpProxyService;
struct TcpProxyService
{
	Phy<TcpProxyService *> *phy;
	PhySocket *udpPool[ZT_TCP_PROXY_UDP_POOL_SIZE];

	struct Client
	{
		char tcpReadBuf[131072];
		char tcpWriteBuf[131072];
		unsigned long tcpWritePtr;
		unsigned long tcpReadPtr;
		PhySocket *tcp;
		PhySocket *assignedUdp;
		time_t lastActivity;
		bool newVersion;
	};

	std::map< PhySocket *,Client > clients;

	struct ReverseMappingKey
	{
		uint64_t sourceZTAddress;
		PhySocket *sendingUdpSocket;
		uint32_t destIp;
		unsigned int destPort;

		ReverseMappingKey() {}
		ReverseMappingKey(uint64_t zt,PhySocket *s,uint32_t ip,unsigned int port) : sourceZTAddress(zt),sendingUdpSocket(s),destIp(ip),destPort(port) {}
		inline bool operator<(const ReverseMappingKey &k) const throw() { return (memcmp((const void *)this,(const void *)&k,sizeof(ReverseMappingKey)) < 0); }
		inline bool operator==(const ReverseMappingKey &k) const throw() { return (memcmp((const void *)this,(const void *)&k,sizeof(ReverseMappingKey)) == 0); }
	};

	std::map< ReverseMappingKey,Client * > reverseMappings;

	void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *from,void *data,unsigned long len)
	{
		if ((from->sa_family == AF_INET)&&(len > 16)&&(len < 2048)) {
			const uint64_t destZt = (
				(((uint64_t)(((const unsigned char *)data)[8])) << 32) |
				(((uint64_t)(((const unsigned char *)data)[9])) << 24) |
				(((uint64_t)(((const unsigned char *)data)[10])) << 16) |
				(((uint64_t)(((const unsigned char *)data)[11])) << 8) |
				((uint64_t)(((const unsigned char *)data)[12])) );
			const uint32_t fromIp = ((const struct sockaddr_in *)from)->sin_addr.s_addr;
			const unsigned int fromPort = ntohs(((const struct sockaddr_in *)from)->sin_port);

			std::map< ReverseMappingKey,Client * >::iterator rm(reverseMappings.find(ReverseMappingKey(destZt,sock,fromIp,fromPort)));
			if (rm != reverseMappings.end()) {
				Client &c = *(rm->second);

				unsigned long mlen = len;
				if (c.newVersion)
					mlen += 7; // new clients get IP info

				if ((c.tcpWritePtr + 5 + mlen) <= sizeof(c.tcpWriteBuf)) {
					if (!c.tcpWritePtr)
						phy->tcpSetNotifyWritable(c.tcp,true);

					c.tcpWriteBuf[c.tcpWritePtr++] = 0x17; // look like TLS data
					c.tcpWriteBuf[c.tcpWritePtr++] = 0x03; // look like TLS 1.2
					c.tcpWriteBuf[c.tcpWritePtr++] = 0x03; // look like TLS 1.2

					c.tcpWriteBuf[c.tcpWritePtr++] = (char)((mlen >> 8) & 0xff);
					c.tcpWriteBuf[c.tcpWritePtr++] = (char)(mlen & 0xff);

					if (c.newVersion) {
						c.tcpWriteBuf[c.tcpWritePtr++] = (char)4; // IPv4
						*((uint32_t *)(c.tcpWriteBuf + c.tcpWritePtr)) = fromIp;
						c.tcpWritePtr += 4;
						c.tcpWriteBuf[c.tcpWritePtr++] = (char)((fromPort >> 8) & 0xff);
						c.tcpWriteBuf[c.tcpWritePtr++] = (char)(fromPort & 0xff);
					}

					for(unsigned long i=0;i<len;++i)
						c.tcpWriteBuf[c.tcpWritePtr++] = ((const char *)data)[i];
				}
			}
		}
	}

	void phyOnTcpConnect(PhySocket *sock,void **uptr,bool success)
	{
		// unused, we don't initiate
	}

	void phyOnTcpAccept(PhySocket *sockL,PhySocket *sockN,void **uptrL,void **uptrN,const struct sockaddr *from)
	{
		Client &c = clients[sockN];
		c.tcpWritePtr = 0;
		c.tcpReadPtr = 0;
		c.tcp = sockN;
		c.assignedUdp = udpPool[rand() % ZT_TCP_PROXY_UDP_POOL_SIZE];
		c.lastActivity = time((time_t *)0);
		c.newVersion = false;
		*uptrN = (void *)&c;
	}

	void phyOnTcpClose(PhySocket *sock,void **uptr)
	{
		for(std::map< ReverseMappingKey,Client * >::iterator rm(reverseMappings.begin());rm!=reverseMappings.end();) {
			if (rm->second == (Client *)*uptr)
				reverseMappings.erase(rm++);
			else ++rm;
		}
		clients.erase(sock);
	}

	void phyOnTcpData(PhySocket *sock,void **uptr,void *data,unsigned long len)
	{
		Client &c = *((Client *)*uptr);
		c.lastActivity = time((time_t *)0);

		for(unsigned long i=0;i<len;++i) {
			if (c.tcpReadPtr >= sizeof(c.tcpReadBuf)) {
				phy->close(sock);
				return;
			}
			c.tcpReadBuf[c.tcpReadPtr++] = ((const char *)data)[i];

			if (c.tcpReadPtr >= 5) {
				unsigned long mlen = ( ((((unsigned long)c.tcpReadBuf[3]) & 0xff) << 8) | (((unsigned long)c.tcpReadBuf[4]) & 0xff) );
				if (c.tcpReadPtr >= (mlen + 5)) {
					if (mlen == 4) {
						// Right now just sending this means the client is 'new enough' for the IP header
						c.newVersion = true;
					} else if (mlen >= 7) {
						char *payload = c.tcpReadBuf + 5;
						unsigned long payloadLen = mlen;

						struct sockaddr_in dest;
						memset(&dest,0,sizeof(dest));
						if (c.newVersion) {
							if (*payload == (char)4) {
								// New clients tell us where their packets go.
								++payload;
								dest.sin_family = AF_INET;
								dest.sin_addr.s_addr = *((uint32_t *)payload);
								payload += 4;
								dest.sin_port = *((uint16_t *)payload); // will be in network byte order already
								payload += 2;
								payloadLen -= 7;
							}
						} else {
							// For old clients we will just proxy everything to a local ZT instance. The
							// fact that this will come from 127.0.0.1 will in turn prevent that instance
							// from doing unite() with us. It'll just forward. There will not be many of
							// these.
							dest.sin_family = AF_INET;
							dest.sin_addr.s_addr = htonl(0x7f000001); // 127.0.0.1
							dest.sin_port = htons(9993);
						}

						// Note: we do not relay to privileged ports... just an abuse prevention rule.
						if ((ntohs(dest.sin_port) > 1024)&&(payloadLen >= 16)) {
							if ((payloadLen >= 28)&&(payload[13] != (char)0xff)) {
								// Learn reverse mappings -- we will route replies to these packets
								// back to their sending TCP socket. They're on a first come first
								// served basis.
								const uint64_t sourceZt = (
									(((uint64_t)(((const unsigned char *)payload)[13])) << 32) |
									(((uint64_t)(((const unsigned char *)payload)[14])) << 24) |
									(((uint64_t)(((const unsigned char *)payload)[15])) << 16) |
									(((uint64_t)(((const unsigned char *)payload)[16])) << 8) |
									((uint64_t)(((const unsigned char *)payload)[17])) );
								ReverseMappingKey k(sourceZt,c.assignedUdp,dest.sin_addr.s_addr,ntohl(dest.sin_port));
								if (reverseMappings.count(k) == 0)
									reverseMappings[k] = &c;
							}

							phy->udpSend(c.assignedUdp,(const struct sockaddr *)&dest,payload,payloadLen);
						}
					}

					memmove(c.tcpReadBuf,c.tcpReadBuf + (mlen + 5),c.tcpReadPtr -= (mlen + 5));
				}
			}
		}
	}

	void phyOnTcpWritable(PhySocket *sock,void **uptr)
	{
		Client &c = *((Client *)*uptr);
		if (c.tcpWritePtr) {
			long n = phy->tcpSend(sock,c.tcpWriteBuf,c.tcpWritePtr);
			if (n > 0) {
				memmove(c.tcpWriteBuf,c.tcpWriteBuf + n,c.tcpWritePtr -= (unsigned long)n);
				if (!c.tcpWritePtr)
					phy->tcpSetNotifyWritable(sock,false);
			}
		} else phy->tcpSetNotifyWritable(sock,false);
	}

	void doHousekeeping()
	{
		std::vector<PhySocket *> toClose;
		time_t now = time((time_t *)0);
		for(std::map< PhySocket *,Client >::iterator c(clients.begin());c!=clients.end();++c) {
			if ((now - c->second.lastActivity) >= ZT_TCP_PROXY_CONNECTION_TIMEOUT_SECONDS)
				toClose.push_back(c->first);
		}
		for(std::vector<PhySocket *>::iterator s(toClose.begin());s!=toClose.end();++s)
			phy->close(*s); // will call phyOnTcpClose() which does cleanup
	}
};

int main(int argc,char **argv)
{
	signal(SIGPIPE,SIG_IGN);
	signal(SIGHUP,SIG_IGN);
	srand(time((time_t *)0));

	TcpProxyService svc;
	Phy<TcpProxyService *> phy(&svc,true);
	svc.phy = &phy;

	{
		int poolSize = 0;
		for(unsigned int p=ZT_TCP_PROXY_UDP_POOL_START_PORT;((poolSize<ZT_TCP_PROXY_UDP_POOL_SIZE)&&(p<=65535));++p) {
			struct sockaddr_in laddr;
			memset(&laddr,0,sizeof(laddr));
			laddr.sin_family = AF_INET;
			laddr.sin_port = htons((uint16_t)p);
			PhySocket *s = phy.udpBind((const struct sockaddr *)&laddr);
			if (s)
				svc.udpPool[poolSize++] = s;
		}
		if (poolSize < ZT_TCP_PROXY_UDP_POOL_SIZE) {
			fprintf(stderr,"%s: fatal error: cannot bind %d UDP ports\n",argv[0],ZT_TCP_PROXY_UDP_POOL_SIZE);
			return 1;
		}
	}

	time_t lastDidHousekeeping = time((time_t *)0);
	for(;;) {
		phy.poll(120000);
		time_t now = time((time_t *)0);
		if ((now - lastDidHousekeeping) > 120) {
			lastDidHousekeeping = now;
			svc.doHousekeeping();
		}
	}
}