diff options
author | Adam Ierymenko <adam.ierymenko@gmail.com> | 2017-05-31 08:36:09 -0700 |
---|---|---|
committer | Adam Ierymenko <adam.ierymenko@gmail.com> | 2017-05-31 08:36:09 -0700 |
commit | 2a4a50b1daaec74d7a4d08869ead31ff1f966fa1 (patch) | |
tree | 8ddccedd8bd4ee309fffdc409ef89476111f6b0d | |
parent | 2ec88e800877cfbc7f007d21f10429bc1b493006 (diff) | |
download | infinitytier-2a4a50b1daaec74d7a4d08869ead31ff1f966fa1.tar.gz infinitytier-2a4a50b1daaec74d7a4d08869ead31ff1f966fa1.zip |
Add some also-ZeroTier-written ext/ code for use in new clustering, delete some old code, and change Mac to use -Os which is just as fast as -Ofast and may be faster due to cache effects.
-rw-r--r-- | ext/kissdb/Makefile | 7 | ||||
-rw-r--r-- | ext/kissdb/README.md | 69 | ||||
-rw-r--r-- | ext/kissdb/SPEC.txt | 62 | ||||
-rw-r--r-- | ext/kissdb/kissdb.c | 452 | ||||
-rw-r--r-- | ext/kissdb/kissdb.h | 173 | ||||
-rw-r--r-- | ext/vsdm/LICENSE.txt | 22 | ||||
-rw-r--r-- | ext/vsdm/Makefile | 5 | ||||
-rw-r--r-- | ext/vsdm/README.md | 40 | ||||
-rw-r--r-- | ext/vsdm/vsdm-test.cpp | 72 | ||||
-rw-r--r-- | ext/vsdm/vsdm.hpp | 1491 | ||||
-rw-r--r-- | include/ZeroTierOne.h | 197 | ||||
-rw-r--r-- | make-mac.mk | 2 | ||||
-rw-r--r-- | node/Node.cpp | 52 | ||||
-rw-r--r-- | node/Node.hpp | 15 |
14 files changed, 2396 insertions, 263 deletions
diff --git a/ext/kissdb/Makefile b/ext/kissdb/Makefile new file mode 100644 index 00000000..f47372c6 --- /dev/null +++ b/ext/kissdb/Makefile @@ -0,0 +1,7 @@ +# http://creativecommons.org/publicdomain/zero/1.0/ + +all: + gcc -Wall -O2 -DKISSDB_TEST -o kissdb-test kissdb.c + +clean: + rm -f kissdb-test *.o test.db diff --git a/ext/kissdb/README.md b/ext/kissdb/README.md new file mode 100644 index 00000000..ab8a6cff --- /dev/null +++ b/ext/kissdb/README.md @@ -0,0 +1,69 @@ +kissdb +====== + +(Keep It) Simple Stupid Database + +KISSDB is about the simplest key/value store you'll ever see, anywhere. +It's written in plain vanilla C using only the standard string and FILE +I/O functions, and should port to just about anything with a disk or +something that acts like one. + +It stores keys and values of fixed length in a stupid-simple file format +based on fixed-size hash tables. If a hash collision occurrs, a new "page" +of hash table is appended to the database. The format is append-only. +There is no delete. Puts that replace an existing value, however, will not +grow the file as they will overwrite the existing entry. + +Hash table size is a space/speed trade-off parameter. Larger hash tables +will reduce collisions and speed things up a bit, at the expense of memory +and disk space. A good size is usually about 1/2 the average number of +entries you expect. + +Features: + + * Tiny, compiles to ~4k on an x86_64 Linux system + * Small memory footprint (only caches hash tables) + * Very space-efficient (on disk) if small hash tables are used + * Makes a decent effort to be robust on power loss + * Pretty respectably fast, especially given its simplicity + * 64-bit, file size limit is 2^64 bytes + * Ports to anything with a C compiler and stdlib/stdio + * Public domain + +Limitations: + + * Fixed-size keys and values, must recreate and copy to change any init size parameter + * Add/update only, no delete + * Iteration is supported but key order is undefined + * No search for subsets of keys/values + * No indexes + * No transactions + * No special recovery features if a database gets corrupted + * No built-in thread-safety (guard it with a mutex in MT code) + * No built-in caching of data (only hash tables are cached for lookup speed) + * No endian-awareness (currently), so big-endian DBs won't read on little-endian machines + +Alternative key/value stores and embedded databases: + + * [MDB](http://symas.com/mdb/) uses mmap() and is very fast (not quite as tiny/simple/portable) + * [CDB](http://cr.yp.to/cdb.html) is also minimal and fast, probably the closest thing to this (but has a 4gb size limit) + * [Kyoto Cabinet](http://fallabs.com/kyotocabinet/) is very fast, full-featured, and modern (license required for commercial use) + * [SQLite](http://www.sqlite.org/) gives you a complete embedded SQL server (public domain, very mature, much larger) + * Others include GDBM, NDBM, Berkeley DB, etc. Use your Googles. :) + +KISSDB is good if you want space-efficient relatively fast write-once/read-many storage +of keys mapped to values. It's not a good choice if you need searches, indexes, delete, +structured storage, or widely varying key/value sizes. It's also probably not a good +choice if you need a long-lived database for critical data, since it lacks recovery +features and is brittle if its internals are modified. It would be better for a cache +of data that can be restored or "re-learned," such as keys, Bitcoin transactions, nodes +on a peer-to-peer network, log analysis results, rendered web pages, session cookies, +auth tokens, etc. + +KISSDB is in the public domain as according to the [Creative Commons Public Domain Dedication](http://creativecommons.org/publicdomain/zero/1.0/). +One reason it was written was the poverty of simple key/value databases with wide open licensing. Even old ones like GDBM have GPL, not LGPL, licenses. + +See comments in kissdb.h for documentation. Makefile can be used to build +a test program on systems with gcc. + +Author: Adam Ierymenko / ZeroTier Networks LLC diff --git a/ext/kissdb/SPEC.txt b/ext/kissdb/SPEC.txt new file mode 100644 index 00000000..732c4df5 --- /dev/null +++ b/ext/kissdb/SPEC.txt @@ -0,0 +1,62 @@ +----- + +KISSDB file format (version 2) +Author: Adam Ierymenko <adam.ierymenko@zerotier.com> + +http://creativecommons.org/publicdomain/zero/1.0/ + +----- + +In keeping with the goal of minimalism the file format is very simple, the +sort of thing that would be given as an example in an introductory course in +data structures. It's a basic hash table that adds additional pages of hash +table entries on collision. + +It consists of a 28 byte header followed by a series of hash tables and data. +All integer values are stored in the native word order of the target +architecture (in the future the code might be fixed to make everything +little-endian if anyone cares about that). + +The header consists of the following fields: + +[0-3] magic numbers: (ASCII) 'K', 'd', 'B', KISSDB_VERSION (currently 2) +[4-11] 64-bit hash table size in entries +[12-19] 64-bit key size in bytes +[20-27] 64-bit value size in bytes + +Hash tables are arrays of [hash table size + 1] 64-bit integers. The extra +entry, if nonzero, is the offset in the file of the next hash table, forming +a linked list of hash tables across the file. + +Immediately following the header, the first hash table will be written when +the first key/value is added. The algorithm for adding new entries is as +follows: + +(1) The key is hashed using a 64-bit variant of the DJB2 hash function, and + this is taken modulo hash table size to get a bucket number. +(2) Hash tables are checked in order, starting with the first hash table, + until a zero (empty) bucket is found. If one is found, skip to step (4). +(3) If no empty buckets are found in any hash table, a new table is appended + to the file and the final pointer in the previous hash table is set to + its offset. (In the code the update of the next hash table pointer in + the previous hash table happens last, after the whole write is complete, + to avoid corruption on power loss.) +(4) The key and value are appended, in order with no additional meta-data, + to the database file. Before appending the offset in the file stream + where they will be stored is saved. After appending, this offset is + written to the empty hash table bucket we chose in steps 2/3. Hash table + updates happen last to avoid corruption if the write does not complete. + +Lookup of a key/value pair occurs as follows: + +(1) The key is hashed and taken modulo hash table size to get a bucket + number. +(2) If this bucket's entry in the hash table is nonzero, the key at the + offset specified by this bucket is compared to the key being looked up. + If they are equal, the value is read and returned. +(3) If the keys are not equal, the next hash table is checked and step (2) + is repeated. If an empty bucket is encountered or if we run out of hash + tables, the key was not found. + +To update an existing value, its location is looked up and the value portion +of the entry is rewritten. diff --git a/ext/kissdb/kissdb.c b/ext/kissdb/kissdb.c new file mode 100644 index 00000000..6b275686 --- /dev/null +++ b/ext/kissdb/kissdb.c @@ -0,0 +1,452 @@ +/* (Keep It) Simple Stupid Database + * + * Written by Adam Ierymenko <adam.ierymenko@zerotier.com> + * KISSDB is in the public domain and is distributed with NO WARRANTY. + * + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +/* Compile with KISSDB_TEST to build as a test program. */ + +/* Note: big-endian systems will need changes to implement byte swapping + * on hash table file I/O. Or you could just use it as-is if you don't care + * that your database files will be unreadable on little-endian systems. */ + +#define _FILE_OFFSET_BITS 64 + +#include "kissdb.h" + +#include <string.h> +#include <stdlib.h> +#include <stdint.h> + +#ifdef _WIN32 +#define fseeko _fseeki64 +#define ftello _ftelli64 +#endif + +#define KISSDB_HEADER_SIZE ((sizeof(uint64_t) * 3) + 4) + +/* djb2 hash function */ +static uint64_t KISSDB_hash(const void *b,unsigned long len) +{ + unsigned long i; + uint64_t hash = 5381; + for(i=0;i<len;++i) + hash = ((hash << 5) + hash) + (uint64_t)(((const uint8_t *)b)[i]); + return hash; +} + +int KISSDB_open( + KISSDB *db, + const char *path, + int mode, + unsigned long hash_table_size, + unsigned long key_size, + unsigned long value_size) +{ + uint64_t tmp; + uint8_t tmp2[4]; + uint64_t *httmp; + uint64_t *hash_tables_rea; + +#ifdef _WIN32 + db->f = (FILE *)0; + fopen_s(&db->f,path,((mode == KISSDB_OPEN_MODE_RWREPLACE) ? "w+b" : (((mode == KISSDB_OPEN_MODE_RDWR)||(mode == KISSDB_OPEN_MODE_RWCREAT)) ? "r+b" : "rb"))); +#else + db->f = fopen(path,((mode == KISSDB_OPEN_MODE_RWREPLACE) ? "w+b" : (((mode == KISSDB_OPEN_MODE_RDWR)||(mode == KISSDB_OPEN_MODE_RWCREAT)) ? "r+b" : "rb"))); +#endif + if (!db->f) { + if (mode == KISSDB_OPEN_MODE_RWCREAT) { +#ifdef _WIN32 + db->f = (FILE *)0; + fopen_s(&db->f,path,"w+b"); +#else + db->f = fopen(path,"w+b"); +#endif + } + if (!db->f) + return KISSDB_ERROR_IO; + } + + if (fseeko(db->f,0,SEEK_END)) { + fclose(db->f); + return KISSDB_ERROR_IO; + } + if (ftello(db->f) < KISSDB_HEADER_SIZE) { + /* write header if not already present */ + if ((hash_table_size)&&(key_size)&&(value_size)) { + if (fseeko(db->f,0,SEEK_SET)) { fclose(db->f); return KISSDB_ERROR_IO; } + tmp2[0] = 'K'; tmp2[1] = 'd'; tmp2[2] = 'B'; tmp2[3] = KISSDB_VERSION; + if (fwrite(tmp2,4,1,db->f) != 1) { fclose(db->f); return KISSDB_ERROR_IO; } + tmp = hash_table_size; + if (fwrite(&tmp,sizeof(uint64_t),1,db->f) != 1) { fclose(db->f); return KISSDB_ERROR_IO; } + tmp = key_size; + if (fwrite(&tmp,sizeof(uint64_t),1,db->f) != 1) { fclose(db->f); return KISSDB_ERROR_IO; } + tmp = value_size; + if (fwrite(&tmp,sizeof(uint64_t),1,db->f) != 1) { fclose(db->f); return KISSDB_ERROR_IO; } + fflush(db->f); + } else { + fclose(db->f); + return KISSDB_ERROR_INVALID_PARAMETERS; + } + } else { + if (fseeko(db->f,0,SEEK_SET)) { fclose(db->f); return KISSDB_ERROR_IO; } + if (fread(tmp2,4,1,db->f) != 1) { fclose(db->f); return KISSDB_ERROR_IO; } + if ((tmp2[0] != 'K')||(tmp2[1] != 'd')||(tmp2[2] != 'B')||(tmp2[3] != KISSDB_VERSION)) { + fclose(db->f); + return KISSDB_ERROR_CORRUPT_DBFILE; + } + if (fread(&tmp,sizeof(uint64_t),1,db->f) != 1) { fclose(db->f); return KISSDB_ERROR_IO; } + if (!tmp) { + fclose(db->f); + return KISSDB_ERROR_CORRUPT_DBFILE; + } + hash_table_size = (unsigned long)tmp; + if (fread(&tmp,sizeof(uint64_t),1,db->f) != 1) { fclose(db->f); return KISSDB_ERROR_IO; } + if (!tmp) { + fclose(db->f); + return KISSDB_ERROR_CORRUPT_DBFILE; + } + key_size = (unsigned long)tmp; + if (fread(&tmp,sizeof(uint64_t),1,db->f) != 1) { fclose(db->f); return KISSDB_ERROR_IO; } + if (!tmp) { + fclose(db->f); + return KISSDB_ERROR_CORRUPT_DBFILE; + } + value_size = (unsigned long)tmp; + } + + db->hash_table_size = hash_table_size; + db->key_size = key_size; + db->value_size = value_size; + db->hash_table_size_bytes = sizeof(uint64_t) * (hash_table_size + 1); /* [hash_table_size] == next table */ + + httmp = malloc(db->hash_table_size_bytes); + if (!httmp) { + fclose(db->f); + return KISSDB_ERROR_MALLOC; + } + db->num_hash_tables = 0; + db->hash_tables = (uint64_t *)0; + while (fread(httmp,db->hash_table_size_bytes,1,db->f) == 1) { + hash_tables_rea = realloc(db->hash_tables,db->hash_table_size_bytes * (db->num_hash_tables + 1)); + if (!hash_tables_rea) { + KISSDB_close(db); + free(httmp); + return KISSDB_ERROR_MALLOC; + } + db->hash_tables = hash_tables_rea; + + memcpy(((uint8_t *)db->hash_tables) + (db->hash_table_size_bytes * db->num_hash_tables),httmp,db->hash_table_size_bytes); + ++db->num_hash_tables; + if (httmp[db->hash_table_size]) { + if (fseeko(db->f,httmp[db->hash_table_size],SEEK_SET)) { + KISSDB_close(db); + free(httmp); + return KISSDB_ERROR_IO; + } + } else break; + } + free(httmp); + + return 0; +} + +void KISSDB_close(KISSDB *db) +{ + if (db->hash_tables) + free(db->hash_tables); + if (db->f) + fclose(db->f); + memset(db,0,sizeof(KISSDB)); +} + +int KISSDB_get(KISSDB *db,const void *key,void *vbuf) +{ + uint8_t tmp[4096]; + const uint8_t *kptr; + unsigned long klen,i; + uint64_t hash = KISSDB_hash(key,db->key_size) % (uint64_t)db->hash_table_size; + uint64_t offset; + uint64_t *cur_hash_table; + long n; + + cur_hash_table = db->hash_tables; + for(i=0;i<db->num_hash_tables;++i) { + offset = cur_hash_table[hash]; + if (offset) { + if (fseeko(db->f,offset,SEEK_SET)) + return KISSDB_ERROR_IO; + + kptr = (const uint8_t *)key; + klen = db->key_size; + while (klen) { + n = (long)fread(tmp,1,(klen > sizeof(tmp)) ? sizeof(tmp) : klen,db->f); + if (n > 0) { + if (memcmp(kptr,tmp,n)) + goto get_no_match_next_hash_table; + kptr += n; + klen -= (unsigned long)n; + } else return 1; /* not found */ + } + + if (fread(vbuf,db->value_size,1,db->f) == 1) + return 0; /* success */ + else return KISSDB_ERROR_IO; + } else return 1; /* not found */ +get_no_match_next_hash_table: + cur_hash_table += db->hash_table_size + 1; + } + + return 1; /* not found */ +} + +int KISSDB_put(KISSDB *db,const void *key,const void *value) +{ + uint8_t tmp[4096]; + const uint8_t *kptr; + unsigned long klen,i; + uint64_t hash = KISSDB_hash(key,db->key_size) % (uint64_t)db->hash_table_size; + uint64_t offset; + uint64_t htoffset,lasthtoffset; + uint64_t endoffset; + uint64_t *cur_hash_table; + uint64_t *hash_tables_rea; + long n; + + lasthtoffset = htoffset = KISSDB_HEADER_SIZE; + cur_hash_table = db->hash_tables; + for(i=0;i<db->num_hash_tables;++i) { + offset = cur_hash_table[hash]; + if (offset) { + /* rewrite if already exists */ + if (fseeko(db->f,offset,SEEK_SET)) + return KISSDB_ERROR_IO; + + kptr = (const uint8_t *)key; + klen = db->key_size; + while (klen) { + n = (long)fread(tmp,1,(klen > sizeof(tmp)) ? sizeof(tmp) : klen,db->f); + if (n > 0) { + if (memcmp(kptr,tmp,n)) + goto put_no_match_next_hash_table; + kptr += n; + klen -= (unsigned long)n; + } + } + + /* C99 spec demands seek after fread(), required for Windows */ + fseeko(db->f,0,SEEK_CUR); + + if (fwrite(value,db->value_size,1,db->f) == 1) { + fflush(db->f); + return 0; /* success */ + } else return KISSDB_ERROR_IO; + } else { + /* add if an empty hash table slot is discovered */ + if (fseeko(db->f,0,SEEK_END)) + return KISSDB_ERROR_IO; + endoffset = ftello(db->f); + + if (fwrite(key,db->key_size,1,db->f) != 1) + return KISSDB_ERROR_IO; + if (fwrite(value,db->value_size,1,db->f) != 1) + return KISSDB_ERROR_IO; + + if (fseeko(db->f,htoffset + (sizeof(uint64_t) * hash),SEEK_SET)) + return KISSDB_ERROR_IO; + if (fwrite(&endoffset,sizeof(uint64_t),1,db->f) != 1) + return KISSDB_ERROR_IO; + cur_hash_table[hash] = endoffset; + + fflush(db->f); + + return 0; /* success */ + } +put_no_match_next_hash_table: + lasthtoffset = htoffset; + htoffset = cur_hash_table[db->hash_table_size]; + cur_hash_table += (db->hash_table_size + 1); + } + + /* if no existing slots, add a new page of hash table entries */ + if (fseeko(db->f,0,SEEK_END)) + return KISSDB_ERROR_IO; + endoffset = ftello(db->f); + + hash_tables_rea = realloc(db->hash_tables,db->hash_table_size_bytes * (db->num_hash_tables + 1)); + if (!hash_tables_rea) + return KISSDB_ERROR_MALLOC; + db->hash_tables = hash_tables_rea; + cur_hash_table = &(db->hash_tables[(db->hash_table_size + 1) * db->num_hash_tables]); + memset(cur_hash_table,0,db->hash_table_size_bytes); + + cur_hash_table[hash] = endoffset + db->hash_table_size_bytes; /* where new entry will go */ + + if (fwrite(cur_hash_table,db->hash_table_size_bytes,1,db->f) != 1) + return KISSDB_ERROR_IO; + + if (fwrite(key,db->key_size,1,db->f) != 1) + return KISSDB_ERROR_IO; + if (fwrite(value,db->value_size,1,db->f) != 1) + return KISSDB_ERROR_IO; + + if (db->num_hash_tables) { + if (fseeko(db->f,lasthtoffset + (sizeof(uint64_t) * db->hash_table_size),SEEK_SET)) + return KISSDB_ERROR_IO; + if (fwrite(&endoffset,sizeof(uint64_t),1,db->f) != 1) + return KISSDB_ERROR_IO; + db->hash_tables[((db->hash_table_size + 1) * (db->num_hash_tables - 1)) + db->hash_table_size] = endoffset; + } + + ++db->num_hash_tables; + + fflush(db->f); + + return 0; /* success */ +} + +void KISSDB_Iterator_init(KISSDB *db,KISSDB_Iterator *dbi) +{ + dbi->db = db; + dbi->h_no = 0; + dbi->h_idx = 0; +} + +int KISSDB_Iterator_next(KISSDB_Iterator *dbi,void *kbuf,void *vbuf) +{ + uint64_t offset; + + if ((dbi->h_no < dbi->db->num_hash_tables)&&(dbi->h_idx < dbi->db->hash_table_size)) { + while (!(offset = dbi->db->hash_tables[((dbi->db->hash_table_size + 1) * dbi->h_no) + dbi->h_idx])) { + if (++dbi->h_idx >= dbi->db->hash_table_size) { + dbi->h_idx = 0; + if (++dbi->h_no >= dbi->db->num_hash_tables) + return 0; + } + } + if (fseeko(dbi->db->f,offset,SEEK_SET)) + return KISSDB_ERROR_IO; + if (fread(kbuf,dbi->db->key_size,1,dbi->db->f) != 1) + return KISSDB_ERROR_IO; + if (fread(vbuf,dbi->db->value_size,1,dbi->db->f) != 1) + return KISSDB_ERROR_IO; + if (++dbi->h_idx >= dbi->db->hash_table_size) { + dbi->h_idx = 0; + ++dbi->h_no; + } + return 1; + } + + return 0; +} + +#ifdef KISSDB_TEST + +#include <inttypes.h> + +int main(int argc,char **argv) +{ + uint64_t i,j; + uint64_t v[8]; + KISSDB db; + KISSDB_Iterator dbi; + char got_all_values[10000]; + int q; + + printf("Opening new empty database test.db...\n"); + + if (KISSDB_open(&db,"test.db",KISSDB_OPEN_MODE_RWREPLACE,1024,8,sizeof(v))) { + printf("KISSDB_open failed\n"); + return 1; + } + + printf("Adding and then re-getting 10000 64-byte values...\n"); + + for(i=0;i<10000;++i) { + for(j=0;j<8;++j) + v[j] = i; + if (KISSDB_put(&db,&i,v)) { + printf("KISSDB_put failed (%"PRIu64")\n",i); + return 1; + } + memset(v,0,sizeof(v)); + if ((q = KISSDB_get(&db,&i,v))) { + printf("KISSDB_get (1) failed (%"PRIu64") (%d)\n",i,q); + return 1; + } + for(j=0;j<8;++j) { + if (v[j] != i) { + printf("KISSDB_get (1) failed, bad data (%"PRIu64")\n",i); + return 1; + } + } + } + + printf("Getting 10000 64-byte values...\n"); + + for(i=0;i<10000;++i) { + if ((q = KISSDB_get(&db,&i,v))) { + printf("KISSDB_get (2) failed (%"PRIu64") (%d)\n",i,q); + return 1; + } + for(j=0;j<8;++j) { + if (v[j] != i) { + printf("KISSDB_get (2) failed, bad data (%"PRIu64")\n",i); + return 1; + } + } + } + + printf("Closing and re-opening database in read-only mode...\n"); + + KISSDB_close(&db); + + if (KISSDB_open(&db,"test.db",KISSDB_OPEN_MODE_RDONLY,1024,8,sizeof(v))) { + printf("KISSDB_open failed\n"); + return 1; + } + + printf("Getting 10000 64-byte values...\n"); + + for(i=0;i<10000;++i) { + if ((q = KISSDB_get(&db,&i,v))) { + printf("KISSDB_get (3) failed (%"PRIu64") (%d)\n",i,q); + return 1; + } + for(j=0;j<8;++j) { + if (v[j] != i) { + printf("KISSDB_get (3) failed, bad data (%"PRIu64")\n",i); + return 1; + } + } + } + + printf("Iterator test...\n"); + + KISSDB_Iterator_init(&db,&dbi); + i = 0xdeadbeef; + memset(got_all_values,0,sizeof(got_all_values)); + while (KISSDB_Iterator_next(&dbi,&i,&v) > 0) { + if (i < 10000) + got_all_values[i] = 1; + else { + printf("KISSDB_Iterator_next failed, bad data (%"PRIu64")\n",i); + return 1; + } + } + for(i=0;i<10000;++i) { + if (!got_all_values[i]) { + printf("KISSDB_Iterator failed, missing value index %"PRIu64"\n",i); + return 1; + } + } + + KISSDB_close(&db); + + printf("All tests OK!\n"); + + return 0; +} + +#endif diff --git a/ext/kissdb/kissdb.h b/ext/kissdb/kissdb.h new file mode 100644 index 00000000..926906b0 --- /dev/null +++ b/ext/kissdb/kissdb.h @@ -0,0 +1,173 @@ +/* (Keep It) Simple Stupid Database + * + * Written by Adam Ierymenko <adam.ierymenko@zerotier.com> + * KISSDB is in the public domain and is distributed with NO WARRANTY. + * + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +#ifndef ___KISSDB_H +#define ___KISSDB_H + +#include <stdio.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Version: 2 + * + * This is the file format identifier, and changes any time the file + * format changes. The code version will be this dot something, and can + * be seen in tags in the git repository. + */ +#define KISSDB_VERSION 2 + +/** + * KISSDB database state + * + * These fields can be read by a user, e.g. to look up key_size and + * value_size, but should never be changed. + */ +typedef struct { + unsigned long hash_table_size; + unsigned long key_size; + unsigned long value_size; + unsigned long hash_table_size_bytes; + unsigned long num_hash_tables; + uint64_t *hash_tables; + FILE *f; +} KISSDB; + +/** + * I/O error or file not found + */ +#define KISSDB_ERROR_IO -1 + +/** + * Out of memory + */ +#define KISSDB_ERROR_MALLOC -2 + +/** + * Invalid paramters (e.g. missing _size paramters on init to create database) + */ +#define KISSDB_ERROR_INVALID_PARAMETERS -3 + +/** + * Database file appears corrupt + */ +#define KISSDB_ERROR_CORRUPT_DBFILE -4 + +/** + * Open mode: read only + */ +#define KISSDB_OPEN_MODE_RDONLY 1 + +/** + * Open mode: read/write + */ +#define KISSDB_OPEN_MODE_RDWR 2 + +/** + * Open mode: read/write, create if doesn't exist + */ +#define KISSDB_OPEN_MODE_RWCREAT 3 + +/** + * Open mode: truncate database, open for reading and writing + */ +#define KISSDB_OPEN_MODE_RWREPLACE 4 + +/** + * Open database + * + * The three _size parameters must be specified if the database could + * be created or re-created. Otherwise an error will occur. If the + * database already exists, these parameters are ignored and are read + * from the database. You can check the struture afterwords to see what + * they were. + * + * @param db Database struct + * @param path Path to file + * @param mode One of the KISSDB_OPEN_MODE constants + * @param hash_table_size Size of hash table in 64-bit entries (must be >0) + * @param key_size Size of keys in bytes + * @param value_size Size of values in bytes + * @return 0 on success, nonzero on error + */ +extern int KISSDB_open( + KISSDB *db, + const char *path, + int mode, + unsigned long hash_table_size, + unsigned long key_size, + unsigned long value_size); + +/** + * Close database + * + * @param db Database struct + */ +extern void KISSDB_close(KISSDB *db); + +/** + * Get an entry + * + * @param db Database struct + * @param key Key (key_size bytes) + * @param vbuf Value buffer (value_size bytes capacity) + * @return -1 on I/O error, 0 on success, 1 on not found + */ +extern int KISSDB_get(KISSDB *db,const void *key,void *vbuf); + +/** + * Put an entry (overwriting it if it already exists) + * + * In the already-exists case the size of the database file does not + * change. + * + * @param db Database struct + * @param key Key (key_size bytes) + * @param value Value (value_size bytes) + * @return -1 on I/O error, 0 on success + */ +extern int KISSDB_put(KISSDB *db,const void *key,const void *value); + +/** + * Cursor used for iterating over all entries in database + */ +typedef struct { + KISSDB *db; + unsigned long h_no; + unsigned long h_idx; +} KISSDB_Iterator; + +/** + * Initialize an iterator + * + * @param db Database struct + * @param i Iterator to initialize + */ +extern void KISSDB_Iterator_init(KISSDB *db,KISSDB_Iterator *dbi); + +/** + * Get the next entry + * + * The order of entries returned by iterator is undefined. It depends on + * how keys hash. + * + * @param Database iterator + * @param kbuf Buffer to fill with next key (key_size bytes) + * @param vbuf Buffer to fill with next value (value_size bytes) + * @return 0 if there are no more entries, negative on error, positive if an kbuf/vbuf have been filled + */ +extern int KISSDB_Iterator_next(KISSDB_Iterator *dbi,void *kbuf,void *vbuf); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/ext/vsdm/LICENSE.txt b/ext/vsdm/LICENSE.txt new file mode 100644 index 00000000..00b65473 --- /dev/null +++ b/ext/vsdm/LICENSE.txt @@ -0,0 +1,22 @@ +MIT LICENSE + +Copyright 2017 ZeroTier, Inc. +https://www.zerotier.com/ + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/ext/vsdm/Makefile b/ext/vsdm/Makefile new file mode 100644 index 00000000..828d6edc --- /dev/null +++ b/ext/vsdm/Makefile @@ -0,0 +1,5 @@ +all: + c++ -Os -std=c++11 -o vsdm-test vsdm-test.cpp + +clean: + rm -f vsdm-test *.o *.dSYM diff --git a/ext/vsdm/README.md b/ext/vsdm/README.md new file mode 100644 index 00000000..03354be6 --- /dev/null +++ b/ext/vsdm/README.md @@ -0,0 +1,40 @@ +VSDM: Very Simple Distributed Map +====== + +VSDM is a super-minimal replicated in-memory associative container. Its advantages are small code size, small footprint, simplicity, and lack of dependencies. + +VSDM uses a rumor mill replication algorithm that provides fast best-effort replication. If connectivity is stable this results in eventual consistency, but data loss or regression can occur under split brain conditions. This class is not recommended for data that is intolerant of loss or regression. Its ideal use case is a distributed cache for small data objects or a distributed database for ephemeral data. + +Transport is via TCP and can optionally be encrypted if one specifies a cryptor class (see below). The transport protocol does not implement any features for versioning or backward compatibility and changes to key/value type, cryptor, or other relevant parameters will render it incompatible. Again this is designed for simple app-embedded use cases. Use something more feature complete if you need to support different versions across the network. + +Each node maintains a 64-bit monotonically increasing revision counter that starts at zero. When a node connects to another node it sends this revision counter and the other party will send all updates with revision numbers greater than or equal to it. When a node receives an update it replaces the entry it has if the revision counter is higher and also sets its own revision counter to the new counter if it is higher. It then re-transmits the update if a replace event occurred (if the new update was newer). This is the "rumor mill" part: each node retransmits all changes to all other connected nodes (excluding the source). + +VSDM nodes can be connected according to any arbitrary connectivity graph. A more fully connected graph trades increased bandwidth consumption (due to redundant messages) for decreased likelihood of data loss or split brain conditions. + +The VSDM class is thread safe. It launches a single background thread to handle network I/O and periodic cleanup. Deleted keys are purged from memory after a period of time to allow time for propagation and possible re-propagation. + +## Template parameters + +VSDM supports a number of template parameters for customization. The only required parameters are K and V, the key and value types. The default serializer allows these to be one of the *uintX_t* stdint numeric types or *std::string*. Specify a different serializer for anything else. + +Here are the other parameters after K and V (in order): + + * **L**: Maximum message length, which also limits the max size of the combined key and value for a given entry. This imposes a sanity limit to prevent memory exhaustion. Default is 131072. Absolute max is UINT32_MAX - 4 (4294967291). + * **W**: Watcher function type. Default is `vsdm_watcher_noop` which does nothing. The watcher function (or function object) is passed into the constructor and receives notifications of remotely initiated changes to the map's contents. (Local changes via set() or del() do not trigger the watcher). The watcher must have the following methods: + * `void add(uint64_t remoteNodeId,const K &key,const V &value,uint64_t revision)` + * `void update(uint64_t remoteNodeId,const K &key,const V &value,uint64_t revision)` + * `void del(uint64_t remoteNodeId,const K &key)` + * **S**: Serializer class, which must contain *static* methods for serializing and deserializing keys and values. The following must be present for both key and value types: + * `unsigned long objectSize(const [K|V] &)` + * `const char *objectData(const [K|V] &)` + * `bool objectDeserialize(const char *,unsigned long,[K|V] &)` (false return means object was invalid and causes disconnect) + * **C**: Cryptor type to encrypt transport, default is `vsdm_cryptor_noop` (no encryption). It is also passed into the constructor for internal initialization. This must implement a static method `static unsigned long overhead()` that returns the *constant* per-message overhead for things like IV and MAC, and two methods to encrypt and decrypt the payload in place. The vsdm class will add space for overhead at the *end* of the message. Encrypt/decrypt mathods have these signatures: + * `void encrypt(void *,unsigned long)` + * `bool decrypt(void *,unsigned long)` (false return means invalid MAC and causes disconnect) + * **M**: Map type for underlying storage. Default is `std::unordered_map` with default STL hashers. Substitute a different container if you need to deal with non-hashable keys or need a sorted map. Containers supporting duplicate keys should not be used as the replication algorithm will not function properly. + +## License + +(c)2017 ZeroTier, Inc. (MIT license) + +Written by [Adam Ierymenko](https://github.com/adamierymenko) diff --git a/ext/vsdm/vsdm-test.cpp b/ext/vsdm/vsdm-test.cpp new file mode 100644 index 00000000..13c49afe --- /dev/null +++ b/ext/vsdm/vsdm-test.cpp @@ -0,0 +1,72 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <iostream> + +#define VSDM_DEBUG 1 + +#include "vsdm.hpp" + +int main(int argc,char **argv) +{ + if (argc < 4) { + printf("Usage: vsdm-test <id> <node-id> <port> [<remote-node-id>/<remote-address>/<remote-port>]\n"); + return 0; + } + + uint64_t id = (uint64_t)strtoull(argv[1],(char **)0,10); + uint64_t node = (uint64_t)strtoull(argv[2],(char **)0,10); + int port = (int)strtol(argv[3],(char **)0,10); + + struct sockaddr_in sa; + memset(&sa,0,sizeof(sa)); + sa.sin_family = AF_INET; + sa.sin_port = htons((uint16_t)port); + + vsdm<std::string,std::string> m(id,node,false); + m.listen((const struct sockaddr *)&sa); + + for(int i=4;i<argc;++i) { + char tmp[1024]; + strncpy(tmp,argv[i],sizeof(tmp)); + int k = 0; + char *rnode = (char *)0; + char *raddr = (char *)0; + char *rport = (char *)0; + for (char *f=strtok(tmp,"/");f;f=strtok((char *)0,"/")) { + switch(k++) { + case 0: + rnode = f; + break; + case 1: + raddr = f; + break; + case 2: + rport = f; + break; + } + } + + if ((rnode)&&(raddr)&&(rport)) { + sa.sin_family = AF_INET; + sa.sin_port = htons((uint16_t)strtol(rport,(char **)0,10)); + if (!inet_aton(raddr,&(sa.sin_addr))) { + printf("Error: %s is not a valid IPv4 address.\n",raddr); + return 1; + } + m.link((uint64_t)strtoull(rnode,(char **)0,10),&sa,sizeof(sa)); + } + } + + for(;;) { + char k[1024]; + char v[1024]; + snprintf(k,sizeof(k),"%lu",(unsigned long)node); + snprintf(v,sizeof(v),"%lu",(unsigned long)time(0)); + m.set(k,v); + usleep(1000); + } + + return 0; +} diff --git a/ext/vsdm/vsdm.hpp b/ext/vsdm/vsdm.hpp new file mode 100644 index 00000000..2f42123f --- /dev/null +++ b/ext/vsdm/vsdm.hpp @@ -0,0 +1,1491 @@ +/* + * VSDM: Very Simple Distributed Map + * + * (c)2017 ZeroTier, Inc. + * Written by Adam Ierymenko <adam.ierymenko@zerotier.com> + * License: MIT + */ + +#ifndef ZT_VSDM_HPP__ +#define ZT_VSDM_HPP__ + +#include <stdint.h> +#include <stdio.h> +#include <time.h> + +#if defined(_WIN32) || defined(_WIN64) + +#include <WinSock2.h> +#include <WS2tcpip.h> +#include <Windows.h> + +#define ZT_PHY_SOCKFD_TYPE SOCKET +#define ZT_PHY_SOCKFD_NULL (INVALID_SOCKET) +#define ZT_PHY_SOCKFD_VALID(s) ((s) != INVALID_SOCKET) +#define ZT_PHY_CLOSE_SOCKET(s) ::closesocket(s) +#define ZT_PHY_MAX_SOCKETS (FD_SETSIZE) +#define ZT_PHY_MAX_INTERCEPTS ZT_PHY_MAX_SOCKETS +#define ZT_PHY_SOCKADDR_STORAGE_TYPE struct sockaddr_storage + +#else // not Windows + +#include <errno.h> +#include <signal.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/select.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netinet/tcp.h> + +#define ZT_PHY_SOCKFD_TYPE int +#define ZT_PHY_SOCKFD_NULL (-1) +#define ZT_PHY_SOCKFD_VALID(s) ((s) > -1) +#define ZT_PHY_CLOSE_SOCKET(s) ::close(s) +#define ZT_PHY_MAX_SOCKETS (FD_SETSIZE) +#define ZT_PHY_MAX_INTERCEPTS ZT_PHY_MAX_SOCKETS +#define ZT_PHY_SOCKADDR_STORAGE_TYPE struct sockaddr_storage + +#endif + +#include <string> +#include <queue> +#include <unordered_map> +#include <thread> +#include <mutex> +#include <list> +#include <stdexcept> +#include <vector> +#include <functional> + +/*********************************************************************************************************/ + +namespace ztVsdmInternal { + +/* This is the Phy<> adapter implementation for selected sockets from ZeroTier One. + * It should build and run out of the box on Windows and most *nix systems. Parts + * not used by VSDM have been removed. */ + +typedef void PhySocket; + +template <typename HANDLER_PTR_TYPE> +class Phy +{ +private: + HANDLER_PTR_TYPE _handler; + + enum PhySocketType + { + ZT_PHY_SOCKET_CLOSED = 0x00, // socket is closed, will be removed on next poll() + ZT_PHY_SOCKET_TCP_OUT_PENDING = 0x01, + ZT_PHY_SOCKET_TCP_OUT_CONNECTED = 0x02, + ZT_PHY_SOCKET_TCP_IN = 0x03, + ZT_PHY_SOCKET_TCP_LISTEN = 0x04, + ZT_PHY_SOCKET_UDP = 0x05, + ZT_PHY_SOCKET_FD = 0x06, + ZT_PHY_SOCKET_UNIX_IN = 0x07, + ZT_PHY_SOCKET_UNIX_LISTEN = 0x08 + }; + + struct PhySocketImpl + { + PhySocketType type; + ZT_PHY_SOCKFD_TYPE sock; + void *uptr; // user-settable pointer + ZT_PHY_SOCKADDR_STORAGE_TYPE saddr; // remote for TCP_OUT and TCP_IN, local for TCP_LISTEN, RAW, and UDP + }; + + std::list<PhySocketImpl> _socks; + fd_set _readfds; + fd_set _writefds; +#if defined(_WIN32) || defined(_WIN64) + fd_set _exceptfds; +#endif + long _nfds; + + ZT_PHY_SOCKFD_TYPE _whackReceiveSocket; + ZT_PHY_SOCKFD_TYPE _whackSendSocket; + + bool _noDelay; + bool _noCheck; + +public: + /** + * @param handler Pointer of type HANDLER_PTR_TYPE to handler + * @param noDelay If true, disable TCP NAGLE algorithm on TCP sockets + * @param noCheck If true, attempt to set UDP SO_NO_CHECK option to disable sending checksums + */ + Phy(HANDLER_PTR_TYPE handler,bool noDelay,bool noCheck) : + _handler(handler) + { + FD_ZERO(&_readfds); + FD_ZERO(&_writefds); + +#if defined(_WIN32) || defined(_WIN64) + FD_ZERO(&_exceptfds); + + SOCKET pipes[2]; + { // hack copied from StackOverflow, behaves a bit like pipe() on *nix systems + struct sockaddr_in inaddr; + struct sockaddr addr; + SOCKET lst=::socket(AF_INET, SOCK_STREAM,IPPROTO_TCP); + if (lst == INVALID_SOCKET) + throw std::runtime_error("unable to create pipes for select() abort"); + memset(&inaddr, 0, sizeof(inaddr)); + memset(&addr, 0, sizeof(addr)); + inaddr.sin_family = AF_INET; + inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + inaddr.sin_port = 0; + int yes=1; + setsockopt(lst,SOL_SOCKET,SO_REUSEADDR,(char*)&yes,sizeof(yes)); + bind(lst,(struct sockaddr *)&inaddr,sizeof(inaddr)); + listen(lst,1); + int len=sizeof(inaddr); + getsockname(lst, &addr,&len); + pipes[0]=::socket(AF_INET, SOCK_STREAM,0); + if (pipes[0] == INVALID_SOCKET) + throw std::runtime_error("unable to create pipes for select() abort"); + connect(pipes[0],&addr,len); + pipes[1]=accept(lst,0,0); + closesocket(lst); + } +#else // not Windows + int pipes[2]; + if (::pipe(pipes)) + throw std::runtime_error("unable to create pipes for select() abort"); +#endif // Windows or not + + _nfds = (pipes[0] > pipes[1]) ? (long)pipes[0] : (long)pipes[1]; + _whackReceiveSocket = pipes[0]; + _whackSendSocket = pipes[1]; + _noDelay = noDelay; + _noCheck = noCheck; + } + + ~Phy() + { + for(typename std::list<PhySocketImpl>::const_iterator s(_socks.begin());s!=_socks.end();++s) { + if (s->type != ZT_PHY_SOCKET_CLOSED) + this->close((PhySocket *)&(*s),true); + } + ZT_PHY_CLOSE_SOCKET(_whackReceiveSocket); + ZT_PHY_CLOSE_SOCKET(_whackSendSocket); + } + + /** + * @param s Socket object + * @return Underlying OS-type (usually int or long) file descriptor associated with object + */ + static inline ZT_PHY_SOCKFD_TYPE getDescriptor(PhySocket *s) throw() { return reinterpret_cast<PhySocketImpl *>(s)->sock; } + + /** + * @param s Socket object + * @return Pointer to user object + */ + static inline void** getuptr(PhySocket *s) throw() { return &(reinterpret_cast<PhySocketImpl *>(s)->uptr); } + + /** + * Cause poll() to stop waiting immediately + * + * This can be used to reset the polling loop after changes that require + * attention, or to shut down a background thread that is waiting, etc. + */ + inline void whack() + { +#if defined(_WIN32) || defined(_WIN64) + ::send(_whackSendSocket,(const char *)this,1,0); +#else + (void)(::write(_whackSendSocket,(PhySocket *)this,1)); +#endif + } + + /** + * @return Number of open sockets + */ + inline unsigned long count() const throw() { return _socks.size(); } + + /** + * @return Maximum number of sockets allowed + */ + inline unsigned long maxCount() const throw() { return ZT_PHY_MAX_SOCKETS; } + + /** + * Bind a local listen socket to listen for new TCP connections + * + * @param localAddress Local address and port + * @param uptr Initial value of uptr for new socket (default: NULL) + * @return Socket or NULL on failure to bind + */ + inline PhySocket *tcpListen(const struct sockaddr *localAddress,void *uptr = (void *)0) + { + if (_socks.size() >= ZT_PHY_MAX_SOCKETS) + return (PhySocket *)0; + + ZT_PHY_SOCKFD_TYPE s = ::socket(localAddress->sa_family,SOCK_STREAM,0); + if (!ZT_PHY_SOCKFD_VALID(s)) + return (PhySocket *)0; + +#if defined(_WIN32) || defined(_WIN64) + { + BOOL f; + f = TRUE; ::setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,(const char *)&f,sizeof(f)); + f = TRUE; ::setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(const char *)&f,sizeof(f)); + f = (_noDelay ? TRUE : FALSE); setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); + u_long iMode=1; + ioctlsocket(s,FIONBIO,&iMode); + } +#else + { + int f; + f = 1; ::setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,(void *)&f,sizeof(f)); + f = 1; ::setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(void *)&f,sizeof(f)); + f = (_noDelay ? 1 : 0); setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); + fcntl(s,F_SETFL,O_NONBLOCK); + } +#endif + + if (::bind(s,localAddress,(localAddress->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in))) { + ZT_PHY_CLOSE_SOCKET(s); + return (PhySocket *)0; + } + + if (::listen(s,1024)) { + ZT_PHY_CLOSE_SOCKET(s); + return (PhySocket *)0; + } + + try { + _socks.push_back(PhySocketImpl()); + } catch ( ... ) { + ZT_PHY_CLOSE_SOCKET(s); + return (PhySocket *)0; + } + PhySocketImpl &sws = _socks.back(); + + if ((long)s > _nfds) + _nfds = (long)s; + FD_SET(s,&_readfds); + sws.type = ZT_PHY_SOCKET_TCP_LISTEN; + sws.sock = s; + sws.uptr = uptr; + memset(&(sws.saddr),0,sizeof(struct sockaddr_storage)); + memcpy(&(sws.saddr),localAddress,(localAddress->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); + + return (PhySocket *)&sws; + } + + /** + * Start a non-blocking connect; CONNECT handler is called on success or failure + * + * A return value of NULL indicates a synchronous failure such as a + * failure to open a socket. The TCP connection handler is not called + * in this case. + * + * It is possible on some platforms for an "instant connect" to occur, + * such as when connecting to a loopback address. In this case, the + * 'connected' result parameter will be set to 'true' and if the + * 'callConnectHandler' flag is true (the default) the TCP connect + * handler will be called before the function returns. + * + * These semantics can be a bit confusing, but they're less so than + * the underlying semantics of asynchronous TCP connect. + * + * @param remoteAddress Remote address + * @param connected Result parameter: set to whether an "instant connect" has occurred (true if yes) + * @param uptr Initial value of uptr for new socket (default: NULL) + * @param callConnectHandler If true, call TCP connect handler even if result is known before function exit (default: true) + * @return New socket or NULL on failure + */ + inline PhySocket *tcpConnect(const struct sockaddr *remoteAddress,bool &connected,void *uptr = (void *)0,bool callConnectHandler = true) + { + if (_socks.size() >= ZT_PHY_MAX_SOCKETS) + return (PhySocket *)0; + + ZT_PHY_SOCKFD_TYPE s = ::socket(remoteAddress->sa_family,SOCK_STREAM,0); + if (!ZT_PHY_SOCKFD_VALID(s)) { + connected = false; + return (PhySocket *)0; + } + +#if defined(_WIN32) || defined(_WIN64) + { + BOOL f; + if (remoteAddress->sa_family == AF_INET6) { f = TRUE; ::setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,(const char *)&f,sizeof(f)); } + f = TRUE; ::setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(const char *)&f,sizeof(f)); + f = (_noDelay ? TRUE : FALSE); setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); + u_long iMode=1; + ioctlsocket(s,FIONBIO,&iMode); + } +#else + { + int f; + if (remoteAddress->sa_family == AF_INET6) { f = 1; ::setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,(void *)&f,sizeof(f)); } + f = 1; ::setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(void *)&f,sizeof(f)); + f = (_noDelay ? 1 : 0); setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); + fcntl(s,F_SETFL,O_NONBLOCK); + } +#endif + + connected = true; + if (::connect(s,remoteAddress,(remoteAddress->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in))) { + connected = false; +#if defined(_WIN32) || defined(_WIN64) + if (WSAGetLastError() != WSAEWOULDBLOCK) { +#else + if (errno != EINPROGRESS) { +#endif + ZT_PHY_CLOSE_SOCKET(s); + return (PhySocket *)0; + } // else connection is proceeding asynchronously... + } + + try { + _socks.push_back(PhySocketImpl()); + } catch ( ... ) { + ZT_PHY_CLOSE_SOCKET(s); + return (PhySocket *)0; + } + PhySocketImpl &sws = _socks.back(); + + if ((long)s > _nfds) + _nfds = (long)s; + if (connected) { + FD_SET(s,&_readfds); + sws.type = ZT_PHY_SOCKET_TCP_OUT_CONNECTED; + } else { + FD_SET(s,&_writefds); +#if defined(_WIN32) || defined(_WIN64) + FD_SET(s,&_exceptfds); +#endif + sws.type = ZT_PHY_SOCKET_TCP_OUT_PENDING; + } + sws.sock = s; + sws.uptr = uptr; + memset(&(sws.saddr),0,sizeof(struct sockaddr_storage)); + memcpy(&(sws.saddr),remoteAddress,(remoteAddress->sa_family == AF_INET6) ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); + + if ((callConnectHandler)&&(connected)) { + try { + _handler->phyOnTcpConnect((PhySocket *)&sws,&(sws.uptr),true); + } catch ( ... ) {} + } + + return (PhySocket *)&sws; + } + + /** + * Attempt to send data to a stream socket (non-blocking) + * + * If -1 is returned, the socket should no longer be used as it is now + * destroyed. If callCloseHandler is true, the close handler will be + * called before the function returns. + * + * This can be used with TCP, Unix, or socket pair sockets. + * + * @param sock An open stream socket (other socket types will fail) + * @param data Data to send + * @param len Length of data + * @param callCloseHandler If true, call close handler on socket closing failure condition (default: true) + * @return Number of bytes actually sent or -1 on fatal error (socket closure) + */ + inline long streamSend(PhySocket *sock,const void *data,unsigned long len,bool callCloseHandler = true) + { + PhySocketImpl &sws = *(reinterpret_cast<PhySocketImpl *>(sock)); +#if defined(_WIN32) || defined(_WIN64) + long n = (long)::send(sws.sock,reinterpret_cast<const char *>(data),len,0); + if (n == SOCKET_ERROR) { + switch(WSAGetLastError()) { + case WSAEINTR: + case WSAEWOULDBLOCK: + return 0; + default: + this->close(sock,callCloseHandler); + return -1; + } + } +#else // not Windows + long n = (long)::send(sws.sock,data,len,0); + if (n < 0) { + switch(errno) { +#ifdef EAGAIN + case EAGAIN: +#endif +#if defined(EWOULDBLOCK) && ( !defined(EAGAIN) || (EWOULDBLOCK != EAGAIN) ) + case EWOULDBLOCK: +#endif +#ifdef EINTR + case EINTR: +#endif + return 0; + default: + this->close(sock,callCloseHandler); + return -1; + } + } +#endif // Windows or not + return n; + } + + /** + * For streams, sets whether we want to be notified that the socket is writable + * + * This can be used with TCP, Unix, or socket pair sockets. + * + * Call whack() if this is being done from another thread and you want + * it to take effect immediately. Otherwise it is only guaranteed to + * take effect on the next poll(). + * + * @param sock Stream connection socket + * @param notifyWritable Want writable notifications? + */ + inline const void setNotifyWritable(PhySocket *sock,bool notifyWritable) + { + PhySocketImpl &sws = *(reinterpret_cast<PhySocketImpl *>(sock)); + if (notifyWritable) { + FD_SET(sws.sock,&_writefds); + } else { + FD_CLR(sws.sock,&_writefds); + } + } + + /** + * Set whether we want to be notified that a socket is readable + * + * This is primarily for raw sockets added with wrapSocket(). It could be + * used with others, but doing so would essentially lock them and prevent + * data from being read from them until this is set to 'true' again. + * + * @param sock Socket to modify + * @param notifyReadable True if socket should be monitored for readability + */ + inline const void setNotifyReadable(PhySocket *sock,bool notifyReadable) + { + PhySocketImpl &sws = *(reinterpret_cast<PhySocketImpl *>(sock)); + if (notifyReadable) { + FD_SET(sws.sock,&_readfds); + } else { + FD_CLR(sws.sock,&_readfds); + } + } + + /** + * Wait for activity and handle one or more events + * + * Note that this is not guaranteed to wait up to 'timeout' even + * if nothing happens, as whack() or other events such as signals + * may cause premature termination. + * + * @param timeout Timeout in milliseconds or 0 for none (forever) + */ + inline void poll(unsigned long timeout) + { + char buf[131072]; + struct sockaddr_storage ss; + struct timeval tv; + fd_set rfds,wfds,efds; + + memcpy(&rfds,&_readfds,sizeof(rfds)); + memcpy(&wfds,&_writefds,sizeof(wfds)); +#if defined(_WIN32) || defined(_WIN64) + memcpy(&efds,&_exceptfds,sizeof(efds)); +#else + FD_ZERO(&efds); +#endif + + tv.tv_sec = (long)(timeout / 1000); + tv.tv_usec = (long)((timeout % 1000) * 1000); + if (::select((int)_nfds + 1,&rfds,&wfds,&efds,(timeout > 0) ? &tv : (struct timeval *)0) <= 0) + return; + + if (FD_ISSET(_whackReceiveSocket,&rfds)) { + char tmp[16]; +#if defined(_WIN32) || defined(_WIN64) + ::recv(_whackReceiveSocket,tmp,16,0); +#else + ::read(_whackReceiveSocket,tmp,16); +#endif + } + + for(typename std::list<PhySocketImpl>::iterator s(_socks.begin());s!=_socks.end();) { + switch (s->type) { + + case ZT_PHY_SOCKET_TCP_OUT_PENDING: +#if defined(_WIN32) || defined(_WIN64) + if (FD_ISSET(s->sock,&efds)) { + this->close((PhySocket *)&(*s),true); + } else // ... if +#endif + if (FD_ISSET(s->sock,&wfds)) { + socklen_t slen = sizeof(ss); + if (::getpeername(s->sock,(struct sockaddr *)&ss,&slen) != 0) { + this->close((PhySocket *)&(*s),true); + } else { + s->type = ZT_PHY_SOCKET_TCP_OUT_CONNECTED; + FD_SET(s->sock,&_readfds); + FD_CLR(s->sock,&_writefds); +#if defined(_WIN32) || defined(_WIN64) + FD_CLR(s->sock,&_exceptfds); +#endif + try { + _handler->phyOnTcpConnect((PhySocket *)&(*s),&(s->uptr),true); + } catch ( ... ) {} + } + } + break; + + case ZT_PHY_SOCKET_TCP_OUT_CONNECTED: + case ZT_PHY_SOCKET_TCP_IN: { + ZT_PHY_SOCKFD_TYPE sock = s->sock; // if closed, s->sock becomes invalid as s is no longer dereferencable + if (FD_ISSET(sock,&rfds)) { + long n = (long)::recv(sock,buf,sizeof(buf),0); + if (n <= 0) { + this->close((PhySocket *)&(*s),true); + } else { + try { + _handler->phyOnTcpData((PhySocket *)&(*s),&(s->uptr),(void *)buf,(unsigned long)n); + } catch ( ... ) {} + } + } + if ((FD_ISSET(sock,&wfds))&&(FD_ISSET(sock,&_writefds))) { + try { + _handler->phyOnTcpWritable((PhySocket *)&(*s),&(s->uptr)); + } catch ( ... ) {} + } + } break; + + case ZT_PHY_SOCKET_TCP_LISTEN: + if (FD_ISSET(s->sock,&rfds)) { + memset(&ss,0,sizeof(ss)); + socklen_t slen = sizeof(ss); + ZT_PHY_SOCKFD_TYPE newSock = ::accept(s->sock,(struct sockaddr *)&ss,&slen); + if (ZT_PHY_SOCKFD_VALID(newSock)) { + if (_socks.size() >= ZT_PHY_MAX_SOCKETS) { + ZT_PHY_CLOSE_SOCKET(newSock); + } else { +#if defined(_WIN32) || defined(_WIN64) + { BOOL f = (_noDelay ? TRUE : FALSE); setsockopt(newSock,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); } + { u_long iMode=1; ioctlsocket(newSock,FIONBIO,&iMode); } +#else + { int f = (_noDelay ? 1 : 0); setsockopt(newSock,IPPROTO_TCP,TCP_NODELAY,(char *)&f,sizeof(f)); } + fcntl(newSock,F_SETFL,O_NONBLOCK); +#endif + _socks.push_back(PhySocketImpl()); + PhySocketImpl &sws = _socks.back(); + FD_SET(newSock,&_readfds); + if ((long)newSock > _nfds) + _nfds = (long)newSock; + sws.type = ZT_PHY_SOCKET_TCP_IN; + sws.sock = newSock; + sws.uptr = (void *)0; + memcpy(&(sws.saddr),&ss,sizeof(struct sockaddr_storage)); + try { + _handler->phyOnTcpAccept((PhySocket *)&(*s),(PhySocket *)&(_socks.back()),&(s->uptr),&(sws.uptr),(const struct sockaddr *)&(sws.saddr)); + } catch ( ... ) {} + } + } + } + break; + + case ZT_PHY_SOCKET_UDP: + if (FD_ISSET(s->sock,&rfds)) { + for(;;) { + memset(&ss,0,sizeof(ss)); + socklen_t slen = sizeof(ss); + long n = (long)::recvfrom(s->sock,buf,sizeof(buf),0,(struct sockaddr *)&ss,&slen); + if (n > 0) { + try { + _handler->phyOnDatagram((PhySocket *)&(*s),&(s->uptr),(const struct sockaddr *)&(s->saddr),(const struct sockaddr *)&ss,(void *)buf,(unsigned long)n); + } catch ( ... ) {} + } else if (n < 0) + break; + } + } + break; + + case ZT_PHY_SOCKET_UNIX_IN: { +#ifdef __UNIX_LIKE__ + ZT_PHY_SOCKFD_TYPE sock = s->sock; // if closed, s->sock becomes invalid as s is no longer dereferencable + if ((FD_ISSET(sock,&wfds))&&(FD_ISSET(sock,&_writefds))) { + try { + _handler->phyOnUnixWritable((PhySocket *)&(*s),&(s->uptr),false); + } catch ( ... ) {} + } + if (FD_ISSET(sock,&rfds)) { + long n = (long)::read(sock,buf,sizeof(buf)); + if (n <= 0) { + this->close((PhySocket *)&(*s),true); + } else { + try { + _handler->phyOnUnixData((PhySocket *)&(*s),&(s->uptr),(void *)buf,(unsigned long)n); + } catch ( ... ) {} + } + } +#endif // __UNIX_LIKE__ + } break; + + case ZT_PHY_SOCKET_UNIX_LISTEN: +#ifdef __UNIX_LIKE__ + if (FD_ISSET(s->sock,&rfds)) { + memset(&ss,0,sizeof(ss)); + socklen_t slen = sizeof(ss); + ZT_PHY_SOCKFD_TYPE newSock = ::accept(s->sock,(struct sockaddr *)&ss,&slen); + if (ZT_PHY_SOCKFD_VALID(newSock)) { + if (_socks.size() >= ZT_PHY_MAX_SOCKETS) { + ZT_PHY_CLOSE_SOCKET(newSock); + } else { + fcntl(newSock,F_SETFL,O_NONBLOCK); + _socks.push_back(PhySocketImpl()); + PhySocketImpl &sws = _socks.back(); + FD_SET(newSock,&_readfds); + if ((long)newSock > _nfds) + _nfds = (long)newSock; + sws.type = ZT_PHY_SOCKET_UNIX_IN; + sws.sock = newSock; + sws.uptr = (void *)0; + memcpy(&(sws.saddr),&ss,sizeof(struct sockaddr_storage)); + try { + //_handler->phyOnUnixAccept((PhySocket *)&(*s),(PhySocket *)&(_socks.back()),&(s->uptr),&(sws.uptr)); + } catch ( ... ) {} + } + } + } +#endif // __UNIX_LIKE__ + break; + + case ZT_PHY_SOCKET_FD: { + ZT_PHY_SOCKFD_TYPE sock = s->sock; + const bool readable = ((FD_ISSET(sock,&rfds))&&(FD_ISSET(sock,&_readfds))); + const bool writable = ((FD_ISSET(sock,&wfds))&&(FD_ISSET(sock,&_writefds))); + if ((readable)||(writable)) { + try { + //_handler->phyOnFileDescriptorActivity((PhySocket *)&(*s),&(s->uptr),readable,writable); + } catch ( ... ) {} + } + } break; + + default: + break; + + } + + if (s->type == ZT_PHY_SOCKET_CLOSED) + _socks.erase(s++); + else ++s; + } + } + + /** + * @param sock Socket to close + * @param callHandlers If true, call handlers for TCP connect (success: false) or close (default: true) + */ + inline void close(PhySocket *sock,bool callHandlers = true) + { + if (!sock) + return; + PhySocketImpl &sws = *(reinterpret_cast<PhySocketImpl *>(sock)); + if (sws.type == ZT_PHY_SOCKET_CLOSED) + return; + + FD_CLR(sws.sock,&_readfds); + FD_CLR(sws.sock,&_writefds); +#if defined(_WIN32) || defined(_WIN64) + FD_CLR(sws.sock,&_exceptfds); +#endif + + if (sws.type != ZT_PHY_SOCKET_FD) + ZT_PHY_CLOSE_SOCKET(sws.sock); + +#ifdef __UNIX_LIKE__ + if (sws.type == ZT_PHY_SOCKET_UNIX_LISTEN) + ::unlink(((struct sockaddr_un *)(&(sws.saddr)))->sun_path); +#endif // __UNIX_LIKE__ + + if (callHandlers) { + switch(sws.type) { + case ZT_PHY_SOCKET_TCP_OUT_PENDING: + try { + _handler->phyOnTcpConnect(sock,&(sws.uptr),false); + } catch ( ... ) {} + break; + case ZT_PHY_SOCKET_TCP_OUT_CONNECTED: + case ZT_PHY_SOCKET_TCP_IN: + try { + _handler->phyOnTcpClose(sock,&(sws.uptr)); + } catch ( ... ) {} + break; + case ZT_PHY_SOCKET_UNIX_IN: +#ifdef __UNIX_LIKE__ + try { + _handler->phyOnUnixClose(sock,&(sws.uptr)); + } catch ( ... ) {} +#endif // __UNIX_LIKE__ + break; + default: + break; + } + } + + // Causes entry to be deleted from list in poll(), ignored elsewhere + sws.type = ZT_PHY_SOCKET_CLOSED; + + if ((long)sws.sock >= (long)_nfds) { + long nfds = (long)_whackSendSocket; + if ((long)_whackReceiveSocket > nfds) + nfds = (long)_whackReceiveSocket; + for(typename std::list<PhySocketImpl>::iterator s(_socks.begin());s!=_socks.end();++s) { + if ((s->type != ZT_PHY_SOCKET_CLOSED)&&((long)s->sock > nfds)) + nfds = (long)s->sock; + } + _nfds = nfds; + } + } +}; + +static inline uint64_t _swap64(const uint64_t n) +{ + return ( + ((n & 0x00000000000000FFULL) << 56) | + ((n & 0x000000000000FF00ULL) << 40) | + ((n & 0x0000000000FF0000ULL) << 24) | + ((n & 0x00000000FF000000ULL) << 8) | + ((n & 0x000000FF00000000ULL) >> 8) | + ((n & 0x0000FF0000000000ULL) >> 24) | + ((n & 0x00FF000000000000ULL) >> 40) | + ((n & 0xFF00000000000000ULL) >> 56) + ); +} + +} // namespace ztVsdmInternal + +/*********************************************************************************************************/ + +/** + * No-op update watcher + */ +class vsdm_watcher_noop +{ +public: + template<typename K,typename V> + inline void add(uint64_t,const K &k,const V &v,uint64_t) {} + template<typename K,typename V> + inline void update(uint64_t,const K &k,const V &v,uint64_t) {} + template<typename K> + inline void del(uint64_t,const K &k) {} +}; + +/** + * No-op cryptor that adds no overhead and does no encryption + */ +class vsdm_cryptor_noop +{ +public: + static inline unsigned long overhead() { return 0; } + inline void encrypt(void *d,unsigned long l) {} + inline bool decrypt(void *d,unsigned long l) { return true; } +}; + +/** + * Default serializer supporting std::string and stdint.h types + */ +class vsdm_default_serializer +{ +public: + static inline unsigned long objectSize(const std::string &o) { return o.length(); } + static inline unsigned long objectSize(const uint8_t o) { return 1; } + static inline unsigned long objectSize(const int8_t o) { return 1; } + static inline unsigned long objectSize(const uint16_t o) { return 2; } + static inline unsigned long objectSize(const int16_t o) { return 2; } + static inline unsigned long objectSize(const uint32_t o) { return 4; } + static inline unsigned long objectSize(const int32_t o) { return 4; } + static inline unsigned long objectSize(const uint64_t o) { return 8; } + static inline unsigned long objectSize(const int64_t o) { return 8; } + + static inline const char *objectData(const std::string &o) { return o.data(); } + static inline const char *objectData(const uint8_t &o) { return reinterpret_cast<const char *>(&o); } + static inline const char *objectData(const int8_t &o) { return reinterpret_cast<const char *>(&o); } + static inline const char *objectData(const uint16_t &o) { return reinterpret_cast<const char *>(&o); } + static inline const char *objectData(const int16_t &o) { return reinterpret_cast<const char *>(&o); } + static inline const char *objectData(const uint32_t &o) { return reinterpret_cast<const char *>(&o); } + static inline const char *objectData(const int32_t &o) { return reinterpret_cast<const char *>(&o); } + static inline const char *objectData(const uint64_t &o) { return reinterpret_cast<const char *>(&o); } + static inline const char *objectData(const int64_t &o) { return reinterpret_cast<const char *>(&o); } + + static inline bool objectDeserialize(const char *d,unsigned long l,std::string &o) { o.assign(d,l); return true; } + static inline bool objectDeserialize(const char *d,unsigned long l,uint8_t &o) { if (l == 1) { memcpy(&o,d,1); return true; } else { return false; } } + static inline bool objectDeserialize(const char *d,unsigned long l,int8_t &o) { if (l == 1) { memcpy(&o,d,1); return true; } else { return false; } } + static inline bool objectDeserialize(const char *d,unsigned long l,uint16_t &o) { if (l == 2) { memcpy(&o,d,2); return true; } else { return false; } } + static inline bool objectDeserialize(const char *d,unsigned long l,int16_t &o) { if (l == 2) { memcpy(&o,d,2); return true; } else { return false; } } + static inline bool objectDeserialize(const char *d,unsigned long l,uint32_t &o) { if (l == 4) { memcpy(&o,d,4); return true; } else { return false; } } + static inline bool objectDeserialize(const char *d,unsigned long l,int32_t &o) { if (l == 4) { memcpy(&o,d,4); return true; } else { return false; } } + static inline bool objectDeserialize(const char *d,unsigned long l,uint64_t &o) { if (l == 8) { memcpy(&o,d,8); return true; } else { return false; } } + static inline bool objectDeserialize(const char *d,unsigned long l,int64_t &o) { if (l == 8) { memcpy(&o,d,8); return true; } else { return false; } } +}; + +/** + * VSDM: Very Simple Distributed Map + * + * See README.md for full docs. + * + * @tparam K Key type (must be supported by serializer) + * @tparam V Value type (must be supported by serializer) + * @tparam L Maximum message length (max allowed: UINT32_MAX - 1, default: 131072) + * @tparam W Watcher function (default: vsdm_watcher_noop) + * @tparam S Serializer class with static methods to serialize keys and values (default: vsdm_default_serializer) + * @tparam C Cryptor to encrypt/decrypt and authenticate network traffic (default: vsdm_cryptor_noop) + * @tparam M Map type for underlying data store (default: std::unordered_map) + */ +template< + typename K, + typename V, + unsigned long L = 131072, + typename W = vsdm_watcher_noop, + typename S = vsdm_default_serializer, + typename C = vsdm_cryptor_noop, + template<typename,typename...> class M = std::unordered_map +> +class vsdm +{ + friend void vsdm_thread_main(void *parent); + friend class ztVsdmInternal::Phy<vsdm *>; + +private: + struct vsdm_entry + { + vsdm_entry() : rev(0),deletedAt(0),v() {} + uint64_t rev; + uint64_t deletedAt; + V v; + }; + + struct _connection + { + _connection() : outbuf(),inbuf(),gotHello(false),node(0),sock((ztVsdmInternal::PhySocket *)0) {} + std::string outbuf; + std::string inbuf; + bool gotHello; + uint64_t node; + ztVsdmInternal::PhySocket *sock; + }; + +public: + typedef K key_type; + typedef V value_type; + + /** + * @param id Cluster ID, must be the same on all nodes + * @param node Arbitrary unique node ID + * @param restrictInbound If true, restrict inbound connections to known peer IPs (added via link()) + * @param cryptor Encryptor/decryptor instance (default: C()) + * @param watcher Watcher function instance (default: W()) + */ + vsdm(uint64_t id,uint64_t node,bool restrictInbound,const C &cryptor = C(),const W &watcher = W()) : + _node(node), + _id(id), + _rev(0), + _connections(), + _m(), + _lock(), + _phy(this,false,false), + _cryptor(cryptor), + _watcher(watcher), + _run(true), + _restrictInbound(restrictInbound), + _t(_threadMain,reinterpret_cast<void *>(this)) + { + } + + ~vsdm() + { + _run = false; + _phy.whack(); + _t.join(); + } + + /** + * @param k Key to set + * @param v New value for key + * @return Revision of entry in map + */ + inline uint64_t set(const K &k,const V &v) + { + std::lock_guard<std::mutex> l(_lock); + + vsdm_entry &e = _m[k]; + e.rev = ++_rev; + e.deletedAt = 0; + e.v = v; + + std::vector<uint64_t> sentToNodes; + for(typename std::unordered_map<ztVsdmInternal::PhySocket *,_connection>::iterator c2(_connections.begin());c2!=_connections.end();++c2) { + if ((c2->second.gotHello)&&(std::find(sentToNodes.begin(),sentToNodes.end(),c2->second.node) == sentToNodes.end())) { + sendUpdate(c2->second,k,e); + sentToNodes.push_back(c2->second.node); +#ifdef VSDM_DEBUG + fprintf(stderr,">> %lu: %s=%s\n",(unsigned long)c2->second.node,k.c_str(),v.c_str()); fflush(stderr); +#endif + } + } + _phy.whack(); + + return _rev; + } + + /** + * @param k Key to check + * @return Revision of key that we have or 0 if not found + */ + inline uint64_t have(const K &k) const + { + std::lock_guard<std::mutex> l(_lock); + typename std::unordered_map<std::string,vsdm_entry>::const_iterator i(_m.find(k)); + if ((i == _m.end())||(i->second.deletedAt)) + return 0; + return i->second.rev; + } + + /** + * @param k Key to get + * @param dfl Default value if key is not found + * @param have If non-NULL, set to revision of this key or 0 if not found + * @return Key value or dfl if not found + */ + inline V get(const K &k,const V &dfl = V(),uint64_t *have = (uint64_t *)0) const + { + std::lock_guard<std::mutex> l(_lock); + typename std::unordered_map<std::string,vsdm_entry>::const_iterator i(_m.find(k)); + if ((i == _m.end())||(i->second.deletedAt)) { + if (have) + *have = 0; + return dfl; + } + if (have) + *have = i->second.rev; + return i->second.v; + } + + /** + * @param k Key to get + * @param have If non-NULL, set to revision of this key or 0 if not found + * @return Key's value or default/empty V() if not found + */ + inline V get(const K &k,uint64_t *have) const + { + return get(k,V(),have); + } + + /** + * Erase a key + * + * Erased entries are not wholly purged from memory immediately. They + * are marked as erased and purged after sufficient time for propagation. + * + * @param k Key to erase + * @return Previous revision of this key in map or 0 if not found + */ + inline bool del(const K &k) + { + uint64_t prev = 0; + std::lock_guard<std::mutex> l(_lock); + + typename std::unordered_map<std::string,vsdm_entry>::iterator i(_m.find(k)); + if (i == _m.end()) + return 0; + prev = i->second.rev; + i->second.rev = ++_rev; + i->second.deletedAt = _rev; + i->second.v.clear(); + + std::vector<uint64_t> sentToNodes; + for(typename std::unordered_map<ztVsdmInternal::PhySocket *,_connection>::iterator c2(_connections.begin());c2!=_connections.end();++c2) { + if ((c2->second.gotHello)&&(std::find(sentToNodes.begin(),sentToNodes.end(),c2->second.node) == sentToNodes.end())) { + sendUpdate(c2->second,k,i->second); + sentToNodes.push_back(c2->second.node); +#ifdef VSDM_DEBUG + fprintf(stderr,">> %lu: %s=<DEL>\n",(unsigned long)c2->second.node,k.c_str()); fflush(stderr); +#endif + } + } + _phy.whack(); + + return prev; + } + + /** + * Listen for incoming node connections on an address + * + * This can be called more than once to listen on more than one address and port. + * + * @param sa Socket address + * @return True if bind succeeded + */ + inline bool listen(const struct sockaddr *sa) + { + std::lock_guard<std::mutex> l(_lock); + return (_phy.tcpListen(sa) != (ztVsdmInternal::PhySocket *)0); + } + + /** + * Add a remote node endpoint + * + * This can be called for an arbitrary number of other endpoints in the + * network to tell this node to attempt to maintain a link to them. + * + * @param node Node ID of remote + * @param sa Socket address of remote + * @param salen Length of socket address structure + */ + inline void link(uint64_t node,const struct sockaddr_in *sa,unsigned int salen) + { + std::lock_guard<std::mutex> l(_lock); + if ((node != _node)&&(salen <= sizeof(struct sockaddr_storage))) + memcpy(&(_peers[node]),sa,salen); + } + + /** + * @return Node IDs of nodes that are currently connected + */ + inline std::vector<uint64_t> who() const + { + std::vector<uint64_t> w; + std::lock_guard<std::mutex> l(_lock); + for(typename std::unordered_map<ztVsdmInternal::PhySocket *,_connection>::const_iterator i(_connections.begin());i!=_connections.end();++i) { + if ((i->gotHello)&&(std::find(w.begin(),w.end(),i->second.node) == w.end())) + w.push_back(i->second.node); + } + return w; + } + + /** + * @return True if we are currently connected to at least one other node + */ + inline bool connected() const + { + std::lock_guard<std::mutex> l(_lock); + for(typename std::unordered_map<ztVsdmInternal::PhySocket *,_connection>::const_iterator i(_connections.begin());i!=_connections.end();++i) { + if (i->gotHello) + return true; + } + return false; + } + + /** + * Iterate through all members of this map, with optional deletion + * + * The function is executed against all key/value pairs and returns a signed integer. + * A negative return value causes the entry to be deleted, while a positive return + * value means the key's value (which is passed into the function as a reference) has + * been modified and should be replicated. A return value of zero means no change. + * + * Other methods should not be called since doing so can result in a deadlock. + * + * @param func Function to execute against all members of map, returns integer (see description) + */ + template<typename F> + inline void each(F func) + { + std::vector<uint64_t> sentToNodes; + bool whack = false; + std::lock_guard<std::mutex> l(_lock); + for(typename M<K,vsdm_entry>::iterator i(_m.begin());i!=_m.end();++i) { + if (!i->second.deletedAt) { + try { + const int result = func(i->first,i->second.v); + if (result < 0) { + i->second.rev = ++_rev; + i->second.deletedAt = _rev; + i->second.v.clear(); + } else if (result > 0) { + i->second.rev = ++_rev; + i->second.deletedAt = 0; + // v will have been modified in place + } + if (result != 0) { + sentToNodes.clear(); + for(typename std::unordered_map<ztVsdmInternal::PhySocket *,_connection>::iterator c2(_connections.begin());c2!=_connections.end();++c2) { + if ((c2->second.gotHello)&&(std::find(sentToNodes.begin(),sentToNodes.end(),c2->second.node) == sentToNodes.end())) { + sendUpdate(c2->second,i->first,i->second); + sentToNodes.push_back(c2->second.node); + } + } + whack = true; + } + } catch ( ... ) {} + } + } + if (whack) + _phy.whack(); + } + +private: + inline vsdm &operator=(const vsdm &v) { return *this; } + + static void _threadMain(void *p) { reinterpret_cast<vsdm *>(p)->threadMain(); } + inline void threadMain() + { + std::vector<uint64_t> haveNodes; + time_t lastcheck = 0; + time_t lastclean = 0; + while (_run) { + _phy.poll(1000); + + time_t now = time(0); + + // Check connections with other nodes and try to establish them + // if they're not present. + if ((now - lastcheck) >= 2) { + lastcheck = now; + haveNodes.clear(); + + std::lock_guard<std::mutex> l(_lock); + + for(typename std::unordered_map<ztVsdmInternal::PhySocket *,_connection>::const_iterator c(_connections.begin());c!=_connections.end();++c) { + if (std::find(haveNodes.begin(),haveNodes.end(),c->second.node) == haveNodes.end()) + haveNodes.push_back(c->second.node); + } + + for(std::unordered_map<uint64_t,struct sockaddr_storage>::iterator p(_peers.begin());p!=_peers.end();++p) { + if (std::find(haveNodes.begin(),haveNodes.end(),p->first) == haveNodes.end()) { + bool connected = false; + ztVsdmInternal::PhySocket *ns = _phy.tcpConnect((const struct sockaddr *)&(p->second),connected,(void *)0,true); + if (ns) { + _connection &c = _connections[ns]; + c.gotHello = false; + c.node = p->first; + c.sock = ns; + } + } + } + } + + // Forget deleted entries if they've had ample time to propagate + if ((now - lastclean) >= 120) { + lastclean = now; + uint64_t delHorizon = _m.size() * (2 + _peers.size()); + if (_rev > delHorizon) { + delHorizon -= _rev; + std::lock_guard<std::mutex> l(_lock); + for(typename M<K,vsdm_entry>::iterator i(_m.begin());i!=_m.end();) { + if ((i->second.deletedAt > 0)&&(i->second.deletedAt < delHorizon)) + _m.erase(i++); + else ++i; + } + } + } + } + } + + inline void sendUpdate(_connection &c,const std::string &k,const vsdm_entry &e) + { + // assumes lock is locked + const uint32_t ks = (uint32_t)S::objectSize(k); + uint32_t vs = 0; + uint32_t hdr[4]; + hdr[0] = htonl((uint32_t)((e.rev >> 32) & 0xffffffff)); + hdr[1] = htonl((uint32_t)(e.rev & 0xffffffff)); + hdr[2] = htonl(ks); + if (e.deletedAt) { + hdr[3] = 0xffffffff; + } else { + vs = (uint32_t)S::objectSize(e.v); + hdr[3] = htonl(vs); + } + + const uint32_t s = htonl((uint32_t)(16 + C::overhead() + ks + vs)); + c.outbuf.append((const char *)&s,4); + + const unsigned long start = (unsigned long)c.outbuf.length(); + c.outbuf.append((const char *)hdr,16); + c.outbuf.append(S::objectData(k),ks); + if (!e.deletedAt) + c.outbuf.append(S::objectData(e.v),vs); + c.outbuf.append(C::overhead(),(char)0); + const unsigned long end = (unsigned long)c.outbuf.length(); + + _cryptor.encrypt(reinterpret_cast<void *>(const_cast<char *>(c.outbuf.data()) + start),end - start); + + _phy.setNotifyWritable(c.sock,true); + } + + inline void sendUpdateToAll(ztVsdmInternal::PhySocket *receivedOnSock,const uint64_t receivedFromNode,const std::string &k,const vsdm_entry &e) + { + // assumes lock is locked + std::vector<uint64_t> sentToNodes; + for(typename std::unordered_map<ztVsdmInternal::PhySocket *,_connection>::iterator c2(_connections.begin());c2!=_connections.end();++c2) { + if ((c2->first != receivedOnSock)&&(c2->second.gotHello)&&(c2->second.node != receivedFromNode)&&(std::find(sentToNodes.begin(),sentToNodes.end(),c2->second.node) == sentToNodes.end())) { + sendUpdate(c2->second,k,e); + sentToNodes.push_back(c2->second.node); +#ifdef VSDM_DEBUG + fprintf(stderr,">> %lu: %s=%s\n",(unsigned long)c2->second.node,k.c_str(),(e.deletedAt) ? "<DEL>" : e.v.c_str()); fflush(stderr); +#endif + } + } + _phy.whack(); + } + + inline void sendHello(ztVsdmInternal::PhySocket *sock) + { + uint64_t hdr[3]; + if (htonl(1) == 1) { + hdr[0] = _node; + hdr[1] = _id; + hdr[2] = _rev; + } else { + hdr[0] = ztVsdmInternal::_swap64(_node); + hdr[1] = ztVsdmInternal::_swap64(_id); + hdr[2] = ztVsdmInternal::_swap64(_rev); + } + uint8_t tmp[24 + C::overhead()]; + memcpy(tmp,hdr,24); + _cryptor.encrypt(reinterpret_cast<void *>(tmp),sizeof(tmp)); + _phy.streamSend(sock,reinterpret_cast<void *>(tmp),sizeof(tmp)); + } + + inline void phyOnTcpConnect(ztVsdmInternal::PhySocket *sock,void **uptr,bool success) + { + std::lock_guard<std::mutex> l(_lock); + if (success) { + _connection &c = _connections[sock]; + c.gotHello = false; + c.sock = sock; + *uptr = (void *)&c; + sendHello(sock); + } else { + _connections.erase(sock); + } + } + + inline void phyOnTcpAccept(ztVsdmInternal::PhySocket *sockL,ztVsdmInternal::PhySocket *sockN,void **uptrL,void **uptrN,const struct sockaddr *from) + { + std::lock_guard<std::mutex> l(_lock); + + if (_restrictInbound) { + bool ok = false; + for(typename std::unordered_map<uint64_t,struct sockaddr_storage>::const_iterator i(_peers.begin());i!=_peers.end();++i) { + if (from->sa_family == i->second.ss_family) { + if ( (from->sa_family == AF_INET) && (reinterpret_cast<const struct sockaddr_in *>(from)->sin_addr.s_addr == reinterpret_cast<const struct sockaddr_in *>(&(i->second))->sin_addr.s_addr) ) { + ok = true; + break; + } else if ( (from->sa_family == AF_INET6) && (memcmp(reinterpret_cast<const struct sockaddr_in6 *>(from)->sin6_addr.s6_addr,reinterpret_cast<const struct sockaddr_in6 *>(&(i->second))->sin6_addr.s6_addr,16) == 0) ) { + ok = true; + break; + } + } + } + if (!ok) { +#ifdef VSDM_DEBUG + fprintf(stderr," * dropped inbound connection: peer not from a known IP address\n"); fflush(stderr); +#endif + _phy.close(sockN,false); + return; + } + } + + _connection &c = _connections[sockN]; + c.gotHello = false; + c.node = _node; // impossible value for a remote + c.sock = sockN; + *uptrN = (void *)&c; + sendHello(sockN); + } + + inline void phyOnTcpClose(ztVsdmInternal::PhySocket *sock,void **uptr) + { + std::lock_guard<std::mutex> l(_lock); + _connections.erase(sock); + } + + inline void phyOnTcpData(ztVsdmInternal::PhySocket *sock,void **uptr,void *data,unsigned long len) + { + _connection *const c = (_connection *)*uptr; + if (!c) return; + + std::unique_lock<std::mutex> l(_lock); + c->inbuf.append(reinterpret_cast<char *>(data),len); + for(;;) { + if (c->gotHello) { + + if (c->inbuf.length() >= 20) { // got message size and header + uint32_t _totalLen; + memcpy(&_totalLen,c->inbuf.data(),4); + const unsigned long totalLen = ntohl(_totalLen); + if ((totalLen > L)||(totalLen < 16)) { // message too small or too large + _connections.erase(sock); + _phy.close(sock,false); + return; + } + + if (c->inbuf.length() >= (4 + totalLen)) { // got full message + + if (!_cryptor.decrypt(reinterpret_cast<void *>(const_cast<char *>(c->inbuf.data()) + 4),totalLen)) { + _connections.erase(sock); + _phy.close(sock,false); + return; + } + + uint32_t hdr[4]; + memcpy(hdr,c->inbuf.data() + 4,16); + + const uint64_t objectRev = ((uint64_t)ntohl(hdr[0]) << 32) | (uint64_t)ntohl(hdr[1]); + const unsigned long keyLen = (unsigned long)ntohl(hdr[2]); + unsigned long valueLen = (unsigned long)ntohl(hdr[3]); + + if (objectRev > _rev) + _rev = objectRev; + + uint64_t deletedAt = 0; + if (valueLen == 0xffffffff) { + valueLen = 0; + deletedAt = _rev; + } + if ((keyLen + valueLen + 16 + C::overhead()) > totalLen) { // key and/or value length invalid + _connections.erase(sock); + _phy.close(sock,false); + return; + } + + K k; + if (!S::objectDeserialize(c->inbuf.data() + 16 + 4,keyLen,k)) { + _connections.erase(sock); + _phy.close(sock,false); + return; + } + + vsdm_entry &e = _m[k]; + if (e.rev < objectRev) { + const bool added = (e.rev == 0); + e.rev = objectRev; + e.deletedAt = deletedAt; + if (e.deletedAt) { + e.v = V(); + } else { + if (!S::objectDeserialize(c->inbuf.data() + 16 + 4 + keyLen,valueLen,e.v)) { + _connections.erase(sock); + _phy.close(sock,false); + return; + } + } +#ifdef VSDM_DEBUG + fprintf(stderr,"<< %lu: %s=%s\n",(unsigned long)c->node,k.c_str(),(deletedAt) ? "<DEL>" : e.v.c_str()); fflush(stderr); +#endif + sendUpdateToAll(sock,c->node,k,e); + + l.unlock(); + try { + if (added) { + _watcher.add(c->node,k,e.v,objectRev); + } else if (deletedAt) { + _watcher.del(c->node,k); + } else { + _watcher.update(c->node,k,e.v,objectRev); + } + } catch ( ... ) {} + l.lock(); + } + + c->inbuf.erase(c->inbuf.begin(),c->inbuf.begin() + totalLen + 4); + + // continue and process more messages in queue, if any + } else { // still waiting on full message + break; + } + } else { // still waiting on message size and header + break; + } + + } else if (c->inbuf.length() >= (24 + C::overhead())) { // got hello header + + if (!_cryptor.decrypt(reinterpret_cast<void *>(const_cast<char *>(c->inbuf.data())),24 + C::overhead())) { + _connections.erase(sock); + _phy.close(sock,false); + return; + } + + uint64_t hdr[3]; + memcpy(hdr,c->inbuf.data(),24); + c->inbuf.erase(c->inbuf.begin(),c->inbuf.begin() + 24 + C::overhead()); + + if (htonl(1) != 1) { + hdr[0] = ztVsdmInternal::_swap64(hdr[0]); + hdr[1] = ztVsdmInternal::_swap64(hdr[1]); + hdr[2] = ztVsdmInternal::_swap64(hdr[2]); + } + + if ((hdr[0] == _node)||(hdr[1] != _id)) { // don't connect to self, and don't connect to other map IDs + _connections.erase(sock); + _phy.close(sock,false); + break; + } else { + c->gotHello = true; + c->node = hdr[0]; + + if (hdr[2] > _rev) + _rev = hdr[2]; + + for(typename M<std::string,vsdm_entry>::const_iterator i(_m.begin());i!=_m.end();++i) { + if (i->second.rev >= hdr[2]) { + sendUpdate(*c,i->first,i->second); +#ifdef VSDM_DEBUG + fprintf(stderr,">> %lu: %s=%s (new link)\n",(unsigned long)c->node,i->first.c_str(),i->second.v.c_str()); fflush(stderr); +#endif + } + } + _phy.whack(); + } + + // continue and process more messages in queue, if any + } else { // still waiting on hello header + break; + } + } + } + + inline void phyOnTcpWritable(ztVsdmInternal::PhySocket *sock,void **uptr) + { + std::lock_guard<std::mutex> l(_lock); + _connection *c = (_connection *)*uptr; + if (c) { + if (c->outbuf.length() > 0) { + long n = _phy.streamSend(sock,c->outbuf.data(),c->outbuf.length()); + if (n <= 0) { + _connections.erase(sock); + _phy.close(sock,false); + return; + } else if (n == (long)c->outbuf.length()) { + c->outbuf.clear(); + } else { + c->outbuf.erase(c->outbuf.begin(),c->outbuf.begin() + n); + } + } + if (c->outbuf.length() == 0) { + _phy.setNotifyWritable(c->sock,false); + } + } + } + + inline void phyOnDatagram(ztVsdmInternal::PhySocket *sock,void **uptr,const struct sockaddr *localAddr,const struct sockaddr *from,void *data,unsigned long len) {} + inline void phyOnFileDescriptorActivity(ztVsdmInternal::PhySocket *sock,void **uptr,bool readable,bool writable) {} + inline void phyOnUnixAccept(ztVsdmInternal::PhySocket *sockL,ztVsdmInternal::PhySocket *sockN,void **uptrL,void **uptrN) {} + inline void phyOnUnixClose(ztVsdmInternal::PhySocket *sock,void **uptr) {} + inline void phyOnUnixData(ztVsdmInternal::PhySocket *sock,void **uptr,void *data,unsigned long len) {} + inline void phyOnUnixWritable(ztVsdmInternal::PhySocket *sock,void **uptr) {} + + const uint64_t _node; + const uint64_t _id; + uint64_t _rev; + std::unordered_map<uint64_t,struct sockaddr_storage> _peers; + std::unordered_map<ztVsdmInternal::PhySocket *,_connection> _connections; + M<K,vsdm_entry> _m; + mutable std::mutex _lock; + ztVsdmInternal::Phy<vsdm *> _phy; + C _cryptor; + W _watcher; + volatile bool _run; + bool _restrictInbound; + std::thread _t; +}; + +#endif diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 5126c5a2..4709b116 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -158,21 +158,6 @@ extern "C" { #define ZT_CIRCUIT_TEST_REPORT_FLAGS_UPSTREAM_AUTHORIZED_IN_PATH 0x0000000000000001ULL /** - * Maximum number of cluster members (and max member ID plus one) - */ -#define ZT_CLUSTER_MAX_MEMBERS 128 - -/** - * Maximum number of physical ZeroTier addresses a cluster member can report - */ -#define ZT_CLUSTER_MAX_ZT_PHYSICAL_ADDRESSES 16 - -/** - * Maximum allowed cluster message length in bytes - */ -#define ZT_CLUSTER_MAX_MESSAGE_LENGTH (1500 - 48) - -/** * Maximum value for link quality (min is 0) */ #define ZT_PATH_LINK_QUALITY_MAX 0xff @@ -1105,78 +1090,6 @@ typedef struct } ZT_PeerList; /** - * A cluster member's status - */ -typedef struct { - /** - * This cluster member's ID (from 0 to 1-ZT_CLUSTER_MAX_MEMBERS) - */ - unsigned int id; - - /** - * Number of milliseconds since last 'alive' heartbeat message received via cluster backplane address - */ - unsigned int msSinceLastHeartbeat; - - /** - * Non-zero if cluster member is alive - */ - int alive; - - /** - * X, Y, and Z coordinates of this member (if specified, otherwise zero) - * - * What these mean depends on the location scheme being used for - * location-aware clustering. At present this is GeoIP and these - * will be the X, Y, and Z coordinates of the location on a spherical - * approximation of Earth where Earth's core is the origin (in km). - * They don't have to be perfect and need only be comparable with others - * to find shortest path via the standard vector distance formula. - */ - int x,y,z; - - /** - * Cluster member's last reported load - */ - uint64_t load; - - /** - * Number of peers - */ - uint64_t peers; - - /** - * Physical ZeroTier endpoints for this member (where peers are sent when directed here) - */ - struct sockaddr_storage zeroTierPhysicalEndpoints[ZT_CLUSTER_MAX_ZT_PHYSICAL_ADDRESSES]; - - /** - * Number of physical ZeroTier endpoints this member is announcing - */ - unsigned int numZeroTierPhysicalEndpoints; -} ZT_ClusterMemberStatus; - -/** - * ZeroTier cluster status - */ -typedef struct { - /** - * My cluster member ID (a record for 'self' is included in member[]) - */ - unsigned int myId; - - /** - * Number of cluster members - */ - unsigned int clusterSize; - - /** - * Cluster member statuses - */ - ZT_ClusterMemberStatus members[ZT_CLUSTER_MAX_MEMBERS]; -} ZT_ClusterStatus; - -/** * An instance of a ZeroTier One node (opaque) */ typedef void ZT_Node; @@ -1766,116 +1679,6 @@ int ZT_Node_sendUserMessage(ZT_Node *node,void *tptr,uint64_t dest,uint64_t type void ZT_Node_setNetconfMaster(ZT_Node *node,void *networkConfigMasterInstance); /** - * Initialize cluster operation - * - * This initializes the internal structures and state for cluster operation. - * It takes two function pointers. The first is to a function that can be - * used to send data to cluster peers (mechanism is not defined by Node), - * and the second is to a function that can be used to get the location of - * a physical address in X,Y,Z coordinate space (e.g. as cartesian coordinates - * projected from the center of the Earth). - * - * Send function takes an arbitrary pointer followed by the cluster member ID - * to send data to, a pointer to the data, and the length of the data. The - * maximum message length is ZT_CLUSTER_MAX_MESSAGE_LENGTH (65535). Messages - * must be delivered whole and may be dropped or transposed, though high - * failure rates are undesirable and can cause problems. Validity checking or - * CRC is also not required since the Node validates the authenticity of - * cluster messages using cryptogrphic methods and will silently drop invalid - * messages. - * - * Address to location function is optional and if NULL geo-handoff is not - * enabled (in this case x, y, and z in clusterInit are also unused). It - * takes an arbitrary pointer followed by a physical address and three result - * parameters for x, y, and z. It returns zero on failure or nonzero if these - * three coordinates have been set. Coordinate space is arbitrary and can be - * e.g. coordinates on Earth relative to Earth's center. These can be obtained - * from latitutde and longitude with versions of the Haversine formula. - * - * See: http://stackoverflow.com/questions/1185408/converting-from-longitude-latitude-to-cartesian-coordinates - * - * Neither the send nor the address to location function should block. If the - * address to location function does not have a location for an address, it - * should return zero and then look up the address for future use since it - * will be called again in (typically) 1-3 minutes. - * - * Note that both functions can be called from any thread from which the - * various Node functions are called, and so must be thread safe if multiple - * threads are being used. - * - * @param node Node instance - * @param myId My cluster member ID (less than or equal to ZT_CLUSTER_MAX_MEMBERS) - * @param zeroTierPhysicalEndpoints Preferred physical address(es) for ZeroTier clients to contact this cluster member (for peer redirect) - * @param numZeroTierPhysicalEndpoints Number of physical endpoints in zeroTierPhysicalEndpoints[] (max allowed: 255) - * @param x My cluster member's X location - * @param y My cluster member's Y location - * @param z My cluster member's Z location - * @param sendFunction Function to be called to send data to other cluster members - * @param sendFunctionArg First argument to sendFunction() - * @param addressToLocationFunction Function to be called to get the location of a physical address or NULL to disable geo-handoff - * @param addressToLocationFunctionArg First argument to addressToLocationFunction() - * @return OK or UNSUPPORTED_OPERATION if this Node was not built with cluster support - */ -enum ZT_ResultCode ZT_Node_clusterInit( - ZT_Node *node, - unsigned int myId, - const struct sockaddr_storage *zeroTierPhysicalEndpoints, - unsigned int numZeroTierPhysicalEndpoints, - int x, - int y, - int z, - void (*sendFunction)(void *,unsigned int,const void *,unsigned int), - void *sendFunctionArg, - int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), - void *addressToLocationFunctionArg); - -/** - * Add a member to this cluster - * - * Calling this without having called clusterInit() will do nothing. - * - * @param node Node instance - * @param memberId Member ID (must be less than or equal to ZT_CLUSTER_MAX_MEMBERS) - * @return OK or error if clustering is disabled, ID invalid, etc. - */ -enum ZT_ResultCode ZT_Node_clusterAddMember(ZT_Node *node,unsigned int memberId); - -/** - * Remove a member from this cluster - * - * Calling this without having called clusterInit() will do nothing. - * - * @param node Node instance - * @param memberId Member ID to remove (nothing happens if not present) - */ -void ZT_Node_clusterRemoveMember(ZT_Node *node,unsigned int memberId); - -/** - * Handle an incoming cluster state message - * - * The message itself contains cluster member IDs, and invalid or badly - * addressed messages will be silently discarded. - * - * Calling this without having called clusterInit() will do nothing. - * - * @param node Node instance - * @param msg Cluster message - * @param len Length of cluster message - */ -void ZT_Node_clusterHandleIncomingMessage(ZT_Node *node,const void *msg,unsigned int len); - -/** - * Get the current status of the cluster from this node's point of view - * - * Calling this without clusterInit() or without cluster support will just - * zero out the structure and show a cluster size of zero. - * - * @param node Node instance - * @param cs Cluster status structure to fill with data - */ -void ZT_Node_clusterStatus(ZT_Node *node,ZT_ClusterStatus *cs); - -/** * Set trusted paths * * A trusted path is a physical network (network/bits) over which both diff --git a/make-mac.mk b/make-mac.mk index de66f49c..5622a41b 100644 --- a/make-mac.mk +++ b/make-mac.mk @@ -54,7 +54,7 @@ ifeq ($(ZT_DEBUG),1) # C25519 in particular is almost UNUSABLE in heavy testing without it. node/Salsa20.o node/SHA512.o node/C25519.o node/Poly1305.o: CFLAGS = -Wall -O2 -g $(INCLUDES) $(DEFS) else - CFLAGS?=-Ofast -fstack-protector-strong + CFLAGS?=-Os -fstack-protector-strong CFLAGS+=$(ARCH_FLAGS) -Wall -Werror -flto -fPIE -mmacosx-version-min=10.7 -DNDEBUG -Wno-unused-private-field $(INCLUDES) $(DEFS) STRIP=strip endif diff --git a/node/Node.cpp b/node/Node.cpp index 911c9c4b..8849e0f4 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -503,6 +503,7 @@ void Node::setNetconfMaster(void *networkControllerInstance) RR->localNetworkController->init(RR->identity,this); } +/* ZT_ResultCode Node::clusterInit( unsigned int myId, const struct sockaddr_storage *zeroTierPhysicalEndpoints, @@ -570,6 +571,7 @@ void Node::clusterStatus(ZT_ClusterStatus *cs) #endif memset(cs,0,sizeof(ZT_ClusterStatus)); } +*/ /****************************************************************************/ /* Node methods used only within node/ */ @@ -998,56 +1000,6 @@ void ZT_Node_setNetconfMaster(ZT_Node *node,void *networkControllerInstance) } catch ( ... ) {} } -enum ZT_ResultCode ZT_Node_clusterInit( - ZT_Node *node, - unsigned int myId, - const struct sockaddr_storage *zeroTierPhysicalEndpoints, - unsigned int numZeroTierPhysicalEndpoints, - int x, - int y, - int z, - void (*sendFunction)(void *,unsigned int,const void *,unsigned int), - void *sendFunctionArg, - int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), - void *addressToLocationFunctionArg) -{ - try { - return reinterpret_cast<ZeroTier::Node *>(node)->clusterInit(myId,zeroTierPhysicalEndpoints,numZeroTierPhysicalEndpoints,x,y,z,sendFunction,sendFunctionArg,addressToLocationFunction,addressToLocationFunctionArg); - } catch ( ... ) { - return ZT_RESULT_FATAL_ERROR_INTERNAL; - } -} - -enum ZT_ResultCode ZT_Node_clusterAddMember(ZT_Node *node,unsigned int memberId) -{ - try { - return reinterpret_cast<ZeroTier::Node *>(node)->clusterAddMember(memberId); - } catch ( ... ) { - return ZT_RESULT_FATAL_ERROR_INTERNAL; - } -} - -void ZT_Node_clusterRemoveMember(ZT_Node *node,unsigned int memberId) -{ - try { - reinterpret_cast<ZeroTier::Node *>(node)->clusterRemoveMember(memberId); - } catch ( ... ) {} -} - -void ZT_Node_clusterHandleIncomingMessage(ZT_Node *node,const void *msg,unsigned int len) -{ - try { - reinterpret_cast<ZeroTier::Node *>(node)->clusterHandleIncomingMessage(msg,len); - } catch ( ... ) {} -} - -void ZT_Node_clusterStatus(ZT_Node *node,ZT_ClusterStatus *cs) -{ - try { - reinterpret_cast<ZeroTier::Node *>(node)->clusterStatus(cs); - } catch ( ... ) {} -} - void ZT_Node_setTrustedPaths(ZT_Node *node,const struct sockaddr_storage *networks,const uint64_t *ids,unsigned int count) { try { diff --git a/node/Node.hpp b/node/Node.hpp index 57b5489e..006551fa 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -117,21 +117,6 @@ public: void clearLocalInterfaceAddresses(); int sendUserMessage(void *tptr,uint64_t dest,uint64_t typeId,const void *data,unsigned int len); void setNetconfMaster(void *networkControllerInstance); - ZT_ResultCode clusterInit( - unsigned int myId, - const struct sockaddr_storage *zeroTierPhysicalEndpoints, - unsigned int numZeroTierPhysicalEndpoints, - int x, - int y, - int z, - void (*sendFunction)(void *,unsigned int,const void *,unsigned int), - void *sendFunctionArg, - int (*addressToLocationFunction)(void *,const struct sockaddr_storage *,int *,int *,int *), - void *addressToLocationFunctionArg); - ZT_ResultCode clusterAddMember(unsigned int memberId); - void clusterRemoveMember(unsigned int memberId); - void clusterHandleIncomingMessage(const void *msg,unsigned int len); - void clusterStatus(ZT_ClusterStatus *cs); // Internal functions ------------------------------------------------------ |