1 module maxmind.db;
2 public import maxmind.data;
3 import std.algorithm.mutation;
4 import std.algorithm.searching;
5 import std.conv : to;
6 import std.mmfile;
7 import std.regex;
8 import std..string;
9 
10 /** Constant byte sequence that marks the beginning of the global metadata section */
11 protected const ubyte[] METADATA_MARKER = [0xAB, 0xCD, 0xEF, 'M', 'a', 'x', 'M', 'i', 'n', 'd', '.', 'c', 'o', 'm'];
12 
13 /** Maximum size of the global metadata section at the end of the file */
14 protected const ulong   METADATA_MAX_SIZE = 128*1024;
15 
16 /** The number of padding bytes between the search tree and the data section itself */
17 protected const ulong   DATA_SEPARATOR = 16;
18 
19 
20 /**
21  * Holds the standard global database metadata
22  */
23 public struct Metadata {
24 	uint           node_count;
25 	ushort         record_size;
26 	ushort         ip_version;
27 	string         database_type;
28 	string[]       languages;
29 	ushort         binary_format_major_version;
30 	ushort         binary_format_minor_version;
31 	
32 	
33 	public this(DataNode.Map m) {
34 		this.node_count    = m.node_count.get!uint;
35 		this.record_size   = m.record_size.get!ushort;
36 		this.ip_version    = m.ip_version.get!ushort;
37 		this.database_type = m.database_type.get!string;
38 		this.languages     = m.languages.getArray!string;
39 		
40 		this.binary_format_major_version = m.binary_format_major_version.get!ushort;
41 		this.binary_format_minor_version = m.binary_format_minor_version.get!ushort;
42 	}
43 	
44 	@property public ulong node_size() const {
45 		return this.record_size / 4;
46 	}
47 	
48 	@property public ulong data_size() const {
49 		return this.node_size * this.node_count;
50 	}
51 }
52 
53 
54 /**
55  * Handles opening, reading and accessing the MaxMind GeoIP database
56  */
57 class Database {
58 	/** The memory-mapped database file for easy and fast access */
59 	protected MmFile   dbfile;
60 	protected Metadata _metadata;
61 	protected Reader   dataReader;
62 	
63 	/**
64 	 * Opens the database located at $filename on the system.
65 	 */
66 	public this(string filename) {
67 		this.dbfile = new MmFile(filename);
68 		this.readMetadata();
69 		
70 		this.dataReader = Reader(
71 			cast(ubyte[]) this.dbfile[this._metadata.data_size+DATA_SEPARATOR .. this.dbfile.length]
72 		);
73 	}
74 	
75 	/**
76 	 * Read-only accessor for the metadata
77 	 */
78 	@property public const(Metadata) metadata() const {
79 		return this._metadata;
80 	}
81 	
82 	
83 	/**
84 	 * Reads the database metadata from the file and sets it in the instance
85 	 */
86 	protected void readMetadata() {
87 		ulong metadata_start = this.findMetadataBlockStart();
88 		DataNode.Map metadata_node = cast(DataNode.Map) DataNode.create(
89 			cast(ubyte[]) this.dbfile[metadata_start .. this.dbfile.length]
90 		);
91 		
92 		this._metadata = Metadata(metadata_node.asMap());
93 	}
94 	
95 	
96 	/**
97 	 * Scans the database file from the end to locate the metadata block
98 	 * (Yes, this is how MaxMind says to do)
99 	 */
100 	protected ulong findMetadataBlockStart() {
101 		ulong position = this.dbfile.length - METADATA_MARKER.length - 1;
102 		ulong minposition = // Don't attempt reading before the beginning of the file
103 			this.dbfile.length > METADATA_MAX_SIZE ?
104 			this.dbfile.length - METADATA_MAX_SIZE : 0;
105 		
106 		// Scan the file from the end
107 		while(position > minposition) {
108 			if(this.dbfile[position..position+METADATA_MARKER.length] == METADATA_MARKER) {
109 				return position + METADATA_MARKER.length;
110 			}
111 			else {
112 				position--;
113 			}
114 		}
115 		
116 		// Metadata not found: database is unusable.
117 		throw new Exception(
118 			"Cannot find the metadata marker. This file doesn't appear to be a valid MaxMind v2 database."
119 		);
120 	}
121 	
122 	
123 	/**
124 	 * Looks up any IP address in the database by delegating the search to the appropriate function
125 	 */
126 	public DataNode lookup(string address) {
127 		return address.canFind(':') ? this.lookupV6(address) : this.lookupV4(address);
128 	}
129 	
130 	
131 	/**
132 	 * Looks up an IPv4 address in the database, or return null if the record is not found.
133 	 */
134 	public DataNode lookupV4(string address) {
135 		if(!address.matchFirst(ctRegex!("^(?:[0-9]{1,3}\\.){3}[0-9]{1,3}$"))) {
136 			throw new Exception("Invalid IPv4 address format");
137 		}
138 		
139 		string[4] parts = address.split('.');
140 		return this.lookupV4(parts.to!(ubyte[4]));
141 	}
142 	
143 	/// ditto
144 	public DataNode lookupV4(ubyte[4] address) {
145 		if(this._metadata.ip_version == 4) {
146 			return this.lookup_impl(address);
147 		}
148 		else {
149 			ubyte[16] address6 = 0;
150 			address6[12..16] = address;
151 			return this.lookupV6(address6);
152 		}
153 	}
154 	
155 	
156 	/**
157 	 * Looks up an IPv6 address in the database, or return null if the record is not found.
158 	 */
159 	public DataNode lookupV6(string address) {
160 		return this.lookupV6(parseIPv6(address));
161 	}
162 	
163 	/// ditto
164 	public DataNode lookupV6(ubyte[16] address) {
165 		if(this._metadata.ip_version != 6) {
166 			throw new Exception("This database does not support IPv6 addresses.");
167 		}
168 		
169 		return this.lookup_impl(address);
170 	}
171 	
172 	
173 	/**
174 	 * Internal implementation of the database search algorithm: takes the address as an array and searches the tree.
175 	 */
176 	protected DataNode lookup_impl(ubyte[] address) {
177 		DatabaseNode node = this.getNodeAt(0);
178 		
179 		// Loop over each group
180 		foreach(group; address) {
181 			ubyte mask = 0b10000000;
182 			
183 			// Move the bit mask by one for each iteration
184 			while(mask) {
185 				uint next = (group & mask) ? node.right : node.left;
186 				
187 				// Link to another node
188 				if(next < this._metadata.node_count) {
189 					node = this.getNodeAt(next);
190 				}
191 				
192 				// Record not found special value
193 				else if(next == this._metadata.node_count) {
194 					return null;
195 				}
196 				
197 				// Found data in the data section, read it
198 				else {
199 					next -= this._metadata.node_count + DATA_SEPARATOR;
200 					return DataNode.create(this.dataReader.newReader(next));
201 				}
202 				
203 				mask >>= 1;
204 			}
205 		}
206 		
207 		return null;
208 	}
209 	
210 	
211 	/**
212 	 * Returns a database node at $position (in node count)
213 	 */
214 	protected DatabaseNode getNodeAt(ulong position) {
215 		ulong  node_size = this._metadata.node_size;
216 		ulong offset    = position * node_size;
217 		
218 		return new DatabaseNode(
219 			cast(ubyte[]) this.dbfile[offset..offset+node_size]
220 		);
221 	}
222 }
223 
224 /**
225  * Holds an IP search node of the binary tree
226  */
227 protected class DatabaseNode {
228 	uint left;
229 	uint right;
230 	
231 	/** Loads the raw node data into a convenient structure */
232 	public this(ubyte[] node) {
233 		/** Helper function to read a slice of arbitrary length into an uint */
234 		uint readSlice(ubyte[] slice) {
235 			ubyte[4] value;
236 			value[$-slice.length..$] = slice;
237 			return bigEndianToNative!uint(value);
238 		}
239 		
240 		// Decode the variable-sized node
241 		switch(node.length) {
242 			case 6:
243 				left  = readSlice(node[0..3]);
244 				right = readSlice(node[3..6]);
245 				break;
246 			
247 			case 7:
248 				left  = readSlice(node[0..3]) + ((node[3] & 0b11110000) << 20);
249 				right = readSlice(node[4..7]) + ((node[3] & 0b00001111) << 24);
250 				break;
251 			
252 			case 8:
253 				left  = readSlice(node[0..4]);
254 				right = readSlice(node[4..8]);
255 				break;
256 			
257 			default:
258 				throw new Exception("Cannot decode a node of an invalid size. Nodes must be 24, 28 or 32 bits long.");
259 		}
260 	}
261 }
262 
263 
264 /**
265  * Utility function to parse an IPv6 address
266  */
267 public ubyte[16] parseIPv6(string address) {
268 	ubyte[16] output = 0;
269 	int current = 31;
270 	int split = -1;
271 	int groupCount = 0;
272 	int groupCurrent = 0;
273 	int colons = 0;
274 	
275 	/**
276 	 * Helper function to convert a hex char to a number
277 	 */
278 	ubyte hexToByte(char c) {
279 		if(c >= '0' && c <= '9') return cast(ubyte)(c-'0');
280 		if(c >= 'A' && c <= 'F') return cast(ubyte)(c-'A'+10);
281 		if(c >= 'a' && c <= 'f') return cast(ubyte)(c-'a'+10);
282 		else throw new Exception("Invalid IPv6 address: invalid hexadecimal character.");
283 	}
284 	
285 	// Loop over each character from the end
286 	foreach_reverse(char c; address) {
287 		// If  that's a colon, count it
288 		if(c == ':') {
289 			if(++colons > 2) {
290 				throw new Exception("Invalid IPv6 address: too many colons in a row.");
291 			}
292 			
293 			// A colon means a group is closed, so we align back to the next 2 byte boundary
294 			current -= (current+1) % 4; // Need +1 here because we 0-index
295 			
296 			// Keep track of the number of groups for validation
297 			groupCurrent = 0;
298 			if(++groupCount > 8) {
299 				throw new Exception("IPv6: Too many groups");
300 			}
301 			
302 			// If that's the second colon in a row, mark where it is
303 			if(colons == 2) {
304 				split = current;
305 			}
306 		}
307 		else {
308 			colons = 0; // Reset the counter
309 			if(++groupCurrent > 4) {
310 				throw new Exception("Invalid IPv6 address: too many characters in a group.");
311 			}
312 			
313 			if(current < 0) {
314 				throw new Exception("Invalid IPv6 address: input address is too long.");
315 			}
316 			
317 			output[current/2] |= hexToByte(c) << (current & 1 ? 0 : 4);
318 			current--;
319 		}
320 	}
321 	
322 	// Align group being worked on
323 	current -= ((current+1) % 4) - 1; // +1 - Align to upper bound so /2 works
324 	split   += 1;                     // +1 - Same as above
325 	
326 	// If we found a :: split, move the bytes to the right position and fill zeros
327 	if(split > 0) {
328 		copy(output[current/2..split/2], output[0..(split-current)/2]);
329 		output[(split-current)/2..split/2] = 0;
330 	}
331 	
332 	return output;
333 }