1 module maxmind.db; 2 public import maxmind.data; 3 import std.algorithm.mutation; 4 import std.algorithm.searching; 5 import std.conv : to; 6 import std.mmfile; 7 import std.regex; 8 import std..string; 9 10 /** Constant byte sequence that marks the beginning of the global metadata section */ 11 protected const ubyte[] METADATA_MARKER = [0xAB, 0xCD, 0xEF, 'M', 'a', 'x', 'M', 'i', 'n', 'd', '.', 'c', 'o', 'm']; 12 13 /** Maximum size of the global metadata section at the end of the file */ 14 protected const ulong METADATA_MAX_SIZE = 128*1024; 15 16 /** The number of padding bytes between the search tree and the data section itself */ 17 protected const ulong DATA_SEPARATOR = 16; 18 19 20 /** 21 * Holds the standard global database metadata 22 */ 23 public struct Metadata { 24 uint node_count; 25 ushort record_size; 26 ushort ip_version; 27 string database_type; 28 string[] languages; 29 ushort binary_format_major_version; 30 ushort binary_format_minor_version; 31 32 33 public this(DataNode.Map m) { 34 this.node_count = m.node_count.get!uint; 35 this.record_size = m.record_size.get!ushort; 36 this.ip_version = m.ip_version.get!ushort; 37 this.database_type = m.database_type.get!string; 38 this.languages = m.languages.getArray!string; 39 40 this.binary_format_major_version = m.binary_format_major_version.get!ushort; 41 this.binary_format_minor_version = m.binary_format_minor_version.get!ushort; 42 } 43 44 @property public ulong node_size() const { 45 return this.record_size / 4; 46 } 47 48 @property public ulong data_size() const { 49 return this.node_size * this.node_count; 50 } 51 } 52 53 54 /** 55 * Handles opening, reading and accessing the MaxMind GeoIP database 56 */ 57 class Database { 58 /** The memory-mapped database file for easy and fast access */ 59 protected MmFile dbfile; 60 protected Metadata _metadata; 61 protected Reader dataReader; 62 63 /** 64 * Opens the database located at $filename on the system. 65 */ 66 public this(string filename) { 67 this.dbfile = new MmFile(filename); 68 this.readMetadata(); 69 70 this.dataReader = Reader( 71 cast(ubyte[]) this.dbfile[this._metadata.data_size+DATA_SEPARATOR .. this.dbfile.length] 72 ); 73 } 74 75 /** 76 * Read-only accessor for the metadata 77 */ 78 @property public const(Metadata) metadata() const { 79 return this._metadata; 80 } 81 82 83 /** 84 * Reads the database metadata from the file and sets it in the instance 85 */ 86 protected void readMetadata() { 87 ulong metadata_start = this.findMetadataBlockStart(); 88 DataNode.Map metadata_node = cast(DataNode.Map) DataNode.create( 89 cast(ubyte[]) this.dbfile[metadata_start .. this.dbfile.length] 90 ); 91 92 this._metadata = Metadata(metadata_node.asMap()); 93 } 94 95 96 /** 97 * Scans the database file from the end to locate the metadata block 98 * (Yes, this is how MaxMind says to do) 99 */ 100 protected ulong findMetadataBlockStart() { 101 ulong position = this.dbfile.length - METADATA_MARKER.length - 1; 102 ulong minposition = // Don't attempt reading before the beginning of the file 103 this.dbfile.length > METADATA_MAX_SIZE ? 104 this.dbfile.length - METADATA_MAX_SIZE : 0; 105 106 // Scan the file from the end 107 while(position > minposition) { 108 if(this.dbfile[position..position+METADATA_MARKER.length] == METADATA_MARKER) { 109 return position + METADATA_MARKER.length; 110 } 111 else { 112 position--; 113 } 114 } 115 116 // Metadata not found: database is unusable. 117 throw new Exception( 118 "Cannot find the metadata marker. This file doesn't appear to be a valid MaxMind v2 database." 119 ); 120 } 121 122 123 /** 124 * Looks up any IP address in the database by delegating the search to the appropriate function 125 */ 126 public DataNode lookup(string address) { 127 return address.canFind(':') ? this.lookupV6(address) : this.lookupV4(address); 128 } 129 130 131 /** 132 * Looks up an IPv4 address in the database, or return null if the record is not found. 133 */ 134 public DataNode lookupV4(string address) { 135 if(!address.matchFirst(ctRegex!("^(?:[0-9]{1,3}\\.){3}[0-9]{1,3}$"))) { 136 throw new Exception("Invalid IPv4 address format"); 137 } 138 139 string[4] parts = address.split('.'); 140 return this.lookupV4(parts.to!(ubyte[4])); 141 } 142 143 /// ditto 144 public DataNode lookupV4(ubyte[4] address) { 145 if(this._metadata.ip_version == 4) { 146 return this.lookup_impl(address); 147 } 148 else { 149 ubyte[16] address6 = 0; 150 address6[12..16] = address; 151 return this.lookupV6(address6); 152 } 153 } 154 155 156 /** 157 * Looks up an IPv6 address in the database, or return null if the record is not found. 158 */ 159 public DataNode lookupV6(string address) { 160 return this.lookupV6(parseIPv6(address)); 161 } 162 163 /// ditto 164 public DataNode lookupV6(ubyte[16] address) { 165 if(this._metadata.ip_version != 6) { 166 throw new Exception("This database does not support IPv6 addresses."); 167 } 168 169 return this.lookup_impl(address); 170 } 171 172 173 /** 174 * Internal implementation of the database search algorithm: takes the address as an array and searches the tree. 175 */ 176 protected DataNode lookup_impl(ubyte[] address) { 177 DatabaseNode node = this.getNodeAt(0); 178 179 // Loop over each group 180 foreach(group; address) { 181 ubyte mask = 0b10000000; 182 183 // Move the bit mask by one for each iteration 184 while(mask) { 185 uint next = (group & mask) ? node.right : node.left; 186 187 // Link to another node 188 if(next < this._metadata.node_count) { 189 node = this.getNodeAt(next); 190 } 191 192 // Record not found special value 193 else if(next == this._metadata.node_count) { 194 return null; 195 } 196 197 // Found data in the data section, read it 198 else { 199 next -= this._metadata.node_count + DATA_SEPARATOR; 200 return DataNode.create(this.dataReader.newReader(next)); 201 } 202 203 mask >>= 1; 204 } 205 } 206 207 return null; 208 } 209 210 211 /** 212 * Returns a database node at $position (in node count) 213 */ 214 protected DatabaseNode getNodeAt(ulong position) { 215 ulong node_size = this._metadata.node_size; 216 ulong offset = position * node_size; 217 218 return new DatabaseNode( 219 cast(ubyte[]) this.dbfile[offset..offset+node_size] 220 ); 221 } 222 } 223 224 /** 225 * Holds an IP search node of the binary tree 226 */ 227 protected class DatabaseNode { 228 uint left; 229 uint right; 230 231 /** Loads the raw node data into a convenient structure */ 232 public this(ubyte[] node) { 233 /** Helper function to read a slice of arbitrary length into an uint */ 234 uint readSlice(ubyte[] slice) { 235 ubyte[4] value; 236 value[$-slice.length..$] = slice; 237 return bigEndianToNative!uint(value); 238 } 239 240 // Decode the variable-sized node 241 switch(node.length) { 242 case 6: 243 left = readSlice(node[0..3]); 244 right = readSlice(node[3..6]); 245 break; 246 247 case 7: 248 left = readSlice(node[0..3]) + ((node[3] & 0b11110000) << 20); 249 right = readSlice(node[4..7]) + ((node[3] & 0b00001111) << 24); 250 break; 251 252 case 8: 253 left = readSlice(node[0..4]); 254 right = readSlice(node[4..8]); 255 break; 256 257 default: 258 throw new Exception("Cannot decode a node of an invalid size. Nodes must be 24, 28 or 32 bits long."); 259 } 260 } 261 } 262 263 264 /** 265 * Utility function to parse an IPv6 address 266 */ 267 public ubyte[16] parseIPv6(string address) { 268 ubyte[16] output = 0; 269 int current = 31; 270 int split = -1; 271 int groupCount = 0; 272 int groupCurrent = 0; 273 int colons = 0; 274 275 /** 276 * Helper function to convert a hex char to a number 277 */ 278 ubyte hexToByte(char c) { 279 if(c >= '0' && c <= '9') return cast(ubyte)(c-'0'); 280 if(c >= 'A' && c <= 'F') return cast(ubyte)(c-'A'+10); 281 if(c >= 'a' && c <= 'f') return cast(ubyte)(c-'a'+10); 282 else throw new Exception("Invalid IPv6 address: invalid hexadecimal character."); 283 } 284 285 // Loop over each character from the end 286 foreach_reverse(char c; address) { 287 // If that's a colon, count it 288 if(c == ':') { 289 if(++colons > 2) { 290 throw new Exception("Invalid IPv6 address: too many colons in a row."); 291 } 292 293 // A colon means a group is closed, so we align back to the next 2 byte boundary 294 current -= (current+1) % 4; // Need +1 here because we 0-index 295 296 // Keep track of the number of groups for validation 297 groupCurrent = 0; 298 if(++groupCount > 8) { 299 throw new Exception("IPv6: Too many groups"); 300 } 301 302 // If that's the second colon in a row, mark where it is 303 if(colons == 2) { 304 split = current; 305 } 306 } 307 else { 308 colons = 0; // Reset the counter 309 if(++groupCurrent > 4) { 310 throw new Exception("Invalid IPv6 address: too many characters in a group."); 311 } 312 313 if(current < 0) { 314 throw new Exception("Invalid IPv6 address: input address is too long."); 315 } 316 317 output[current/2] |= hexToByte(c) << (current & 1 ? 0 : 4); 318 current--; 319 } 320 } 321 322 // Align group being worked on 323 current -= ((current+1) % 4) - 1; // +1 - Align to upper bound so /2 works 324 split += 1; // +1 - Same as above 325 326 // If we found a :: split, move the bytes to the right position and fill zeros 327 if(split > 0) { 328 copy(output[current/2..split/2], output[0..(split-current)/2]); 329 output[(split-current)/2..split/2] = 0; 330 } 331 332 return output; 333 }