summaryrefslogtreecommitdiff
path: root/tcp-proxy/tcp-proxy.cpp
blob: 2fe500d18211ae0f43673d6b556cb0a9353715ef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
/*
 * ZeroTier One - Network Virtualization Everywhere
 * Copyright (C) 2011-2016  ZeroTier, Inc.  https://www.zerotier.com/
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

// HACK! Will eventually use epoll() or something in Phy<> instead of select().
// Also be sure to change ulimit -n and fs.file-max in /etc/sysctl.conf on relays.
#if defined(__linux__) || defined(__LINUX__) || defined(__LINUX) || defined(LINUX)
#include <linux/posix_types.h>
#include <bits/types.h>
#undef __FD_SETSIZE
#define __FD_SETSIZE 1048576
#undef FD_SETSIZE
#define FD_SETSIZE 1048576
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdint.h>
#include <unistd.h>
#include <signal.h>

#include <map>
#include <set>
#include <string>
#include <algorithm>
#include <vector>

#include "../osdep/Phy.hpp"

#define ZT_TCP_PROXY_CONNECTION_TIMEOUT_SECONDS 300
#define ZT_TCP_PROXY_TCP_PORT 443

using namespace ZeroTier;

/*
 * ZeroTier TCP Proxy Server
 *
 * This implements a simple packet encapsulation that is designed to look like
 * a TLS connection. It's not a TLS connection, but it sends TLS format record
 * headers. It could be extended in the future to implement a fake TLS
 * handshake.
 *
 * At the moment, each packet is just made to look like TLS application data:
 *   <[1] TLS content type> - currently 0x17 for "application data"
 *   <[1] TLS major version> - currently 0x03 for TLS 1.2
 *   <[1] TLS minor version> - currently 0x03 for TLS 1.2
 *   <[2] payload length> - 16-bit length of payload in bytes
 *   <[...] payload> - Message payload
 *
 * TCP is inherently inefficient for encapsulating Ethernet, since TCP and TCP
 * like protocols over TCP lead to double-ACKs. So this transport is only used
 * to enable access when UDP or other datagram protocols are not available.
 *
 * Clients send a greeting, which is a four-byte message that contains:
 *   <[1] ZeroTier major version>
 *   <[1] minor version>
 *   <[2] revision>
 *
 * If a client has sent a greeting, it uses the new version of this protocol
 * in which every encapsulated ZT packet is prepended by an IP address where
 * it should be forwarded (or where it came from for replies). This causes
 * this proxy to act as a remote UDP socket similar to a socks proxy, which
 * will allow us to move this function off the rootservers and onto dedicated
 * proxy nodes.
 *
 * Older ZT clients that do not send this message get their packets relayed
 * to/from 127.0.0.1:9993, which will allow them to talk to and relay via
 * the ZT node on the same machine as the proxy. We'll only support this for
 * as long as such nodes appear to be in the wild.
 */

struct TcpProxyService;
struct TcpProxyService
{
	Phy<TcpProxyService *> *phy;
	int udpPortCounter;
	struct Client
	{
		char tcpReadBuf[131072];
		char tcpWriteBuf[131072];
		unsigned long tcpWritePtr;
		unsigned long tcpReadPtr;
		PhySocket *tcp;
		PhySocket *udp;
		time_t lastActivity;
		bool newVersion;
	};
	std::map< PhySocket *,Client > clients;

	PhySocket *getUnusedUdp(void *uptr)
	{
		for(int i=0;i<65535;++i) {
			++udpPortCounter;
			if (udpPortCounter > 0xfffe)
				udpPortCounter = 1024;
			struct sockaddr_in laddr;
			memset(&laddr,0,sizeof(struct sockaddr_in));
			laddr.sin_family = AF_INET;
			laddr.sin_port = htons((uint16_t)udpPortCounter);
			PhySocket *udp = phy->udpBind(reinterpret_cast<struct sockaddr *>(&laddr),uptr);
			if (udp)
				return udp;
		}
		return (PhySocket *)0;
	}

	void phyOnDatagram(PhySocket *sock,void **uptr,const struct sockaddr *from,void *data,unsigned long len)
	{
		if (!*uptr)
			return;
		if ((from->sa_family == AF_INET)&&(len >= 16)&&(len < 2048)) {
			Client &c = *((Client *)*uptr);
			c.lastActivity = time((time_t *)0);

			unsigned long mlen = len;
			if (c.newVersion)
				mlen += 7; // new clients get IP info

			if ((c.tcpWritePtr + 5 + mlen) <= sizeof(c.tcpWriteBuf)) {
				if (!c.tcpWritePtr)
					phy->tcpSetNotifyWritable(c.tcp,true);

				c.tcpWriteBuf[c.tcpWritePtr++] = 0x17; // look like TLS data
				c.tcpWriteBuf[c.tcpWritePtr++] = 0x03; // look like TLS 1.2
				c.tcpWriteBuf[c.tcpWritePtr++] = 0x03; // look like TLS 1.2

				c.tcpWriteBuf[c.tcpWritePtr++] = (char)((mlen >> 8) & 0xff);
				c.tcpWriteBuf[c.tcpWritePtr++] = (char)(mlen & 0xff);

				if (c.newVersion) {
					c.tcpWriteBuf[c.tcpWritePtr++] = (char)4; // IPv4
					*((uint32_t *)(c.tcpWriteBuf + c.tcpWritePtr)) = ((const struct sockaddr_in *)from)->sin_addr.s_addr;
					c.tcpWritePtr += 4;
					*((uint16_t *)(c.tcpWriteBuf + c.tcpWritePtr)) = ((const struct sockaddr_in *)from)->sin_port;
					c.tcpWritePtr += 2;
				}

				for(unsigned long i=0;i<len;++i)
					c.tcpWriteBuf[c.tcpWritePtr++] = ((const char *)data)[i];
			}

			//printf("<< UDP %s:%d -> %.16llx\n",inet_ntoa(reinterpret_cast<const struct sockaddr_in *>(from)->sin_addr),(int)ntohs(reinterpret_cast<const struct sockaddr_in *>(from)->sin_port),(unsigned long long)&c);
		}
	}

	void phyOnTcpConnect(PhySocket *sock,void **uptr,bool success)
	{
		// unused, we don't initiate outbound connections
	}

	void phyOnTcpAccept(PhySocket *sockL,PhySocket *sockN,void **uptrL,void **uptrN,const struct sockaddr *from)
	{
		Client &c = clients[sockN];
		PhySocket *udp = getUnusedUdp((void *)&c);
		if (!udp) {
			phy->close(sockN);
			clients.erase(sockN);
			//printf("** TCP rejected, no more UDP ports to assign\n");
			return;
		}
		c.tcpWritePtr = 0;
		c.tcpReadPtr = 0;
		c.tcp = sockN;
		c.udp = udp;
		c.lastActivity = time((time_t *)0);
		c.newVersion = false;
		*uptrN = (void *)&c;
		//printf("<< TCP from %s -> %.16llx\n",inet_ntoa(reinterpret_cast<const struct sockaddr_in *>(from)->sin_addr),(unsigned long long)&c);
	}

	void phyOnTcpClose(PhySocket *sock,void **uptr)
	{
		if (!*uptr)
			return;
		Client &c = *((Client *)*uptr);
		phy->close(c.udp);
		clients.erase(sock);
		//printf("** TCP %.16llx closed\n",(unsigned long long)*uptr);
	}

	void phyOnTcpData(PhySocket *sock,void **uptr,void *data,unsigned long len)
	{
		Client &c = *((Client *)*uptr);
		c.lastActivity = time((time_t *)0);

		for(unsigned long i=0;i<len;++i) {
			if (c.tcpReadPtr >= sizeof(c.tcpReadBuf)) {
				phy->close(sock);
				return;
			}
			c.tcpReadBuf[c.tcpReadPtr++] = ((const char *)data)[i];

			if (c.tcpReadPtr >= 5) {
				unsigned long mlen = ( ((((unsigned long)c.tcpReadBuf[3]) & 0xff) << 8) | (((unsigned long)c.tcpReadBuf[4]) & 0xff) );
				if (c.tcpReadPtr >= (mlen + 5)) {
					if (mlen == 4) {
						// Right now just sending this means the client is 'new enough' for the IP header
						c.newVersion = true;
						//printf("<< TCP %.16llx HELLO\n",(unsigned long long)*uptr);
					} else if (mlen >= 7) {
						char *payload = c.tcpReadBuf + 5;
						unsigned long payloadLen = mlen;

						struct sockaddr_in dest;
						memset(&dest,0,sizeof(dest));
						if (c.newVersion) {
							if (*payload == (char)4) {
								// New clients tell us where their packets go.
								++payload;
								dest.sin_family = AF_INET;
								dest.sin_addr.s_addr = *((uint32_t *)payload);
								payload += 4;
								dest.sin_port = *((uint16_t *)payload); // will be in network byte order already
								payload += 2;
								payloadLen -= 7;
							}
						} else {
							// For old clients we will just proxy everything to a local ZT instance. The
							// fact that this will come from 127.0.0.1 will in turn prevent that instance
							// from doing unite() with us. It'll just forward. There will not be many of
							// these.
							dest.sin_family = AF_INET;
							dest.sin_addr.s_addr = htonl(0x7f000001); // 127.0.0.1
							dest.sin_port = htons(9993);
						}

						// Note: we do not relay to privileged ports... just an abuse prevention rule.
						if ((ntohs(dest.sin_port) > 1024)&&(payloadLen >= 16)) {
							phy->udpSend(c.udp,(const struct sockaddr *)&dest,payload,payloadLen);
							//printf(">> TCP %.16llx to %s:%d\n",(unsigned long long)*uptr,inet_ntoa(dest.sin_addr),(int)ntohs(dest.sin_port));
						}
					}

					memmove(c.tcpReadBuf,c.tcpReadBuf + (mlen + 5),c.tcpReadPtr -= (mlen + 5));
				}
			}
		}
	}

	void phyOnTcpWritable(PhySocket *sock,void **uptr)
	{
		Client &c = *((Client *)*uptr);
		if (c.tcpWritePtr) {
			long n = phy->tcpSend(sock,c.tcpWriteBuf,c.tcpWritePtr);
			if (n > 0) {
				memmove(c.tcpWriteBuf,c.tcpWriteBuf + n,c.tcpWritePtr -= (unsigned long)n);
				if (!c.tcpWritePtr)
					phy->tcpSetNotifyWritable(sock,false);
			}
		} else phy->tcpSetNotifyWritable(sock,false);
	}

	void doHousekeeping()
	{
		std::vector<PhySocket *> toClose;
		time_t now = time((time_t *)0);
		for(std::map< PhySocket *,Client >::iterator c(clients.begin());c!=clients.end();++c) {
			if ((now - c->second.lastActivity) >= ZT_TCP_PROXY_CONNECTION_TIMEOUT_SECONDS) {
				toClose.push_back(c->first);
				toClose.push_back(c->second.udp);
			}
		}
		for(std::vector<PhySocket *>::iterator s(toClose.begin());s!=toClose.end();++s)
			phy->close(*s);
	}
};

int main(int argc,char **argv)
{
	signal(SIGPIPE,SIG_IGN);
	signal(SIGHUP,SIG_IGN);
	srand(time((time_t *)0));

	TcpProxyService svc;
	Phy<TcpProxyService *> phy(&svc,false,true);
	svc.phy = &phy;
	svc.udpPortCounter = 1023;

	{
		struct sockaddr_in laddr;
		memset(&laddr,0,sizeof(laddr));
		laddr.sin_family = AF_INET;
		laddr.sin_port = htons(ZT_TCP_PROXY_TCP_PORT);
		if (!phy.tcpListen((const struct sockaddr *)&laddr)) {
			fprintf(stderr,"%s: fatal error: unable to bind TCP port %d\n",argv[0],ZT_TCP_PROXY_TCP_PORT);
			return 1;
		}
	}

	time_t lastDidHousekeeping = time((time_t *)0);
	for(;;) {
		phy.poll(120000);
		time_t now = time((time_t *)0);
		if ((now - lastDidHousekeeping) > 120) {
			lastDidHousekeeping = now;
			svc.doHousekeeping();
		}
	}

	return 0;
}