From 172ace9750f5244f7dd084bcf78ac4acfb4a984f Mon Sep 17 00:00:00 2001 From: Bram Matthys Date: Sun, 22 Mar 2026 10:24:02 +0100 Subject: [PATCH] geoip_maxmind: use our own mmdb implementation This is mainly due to licensing. The libmaxminddb library uses the Apache license, which meant if we would compile it in by default it would effectively transform our "GPLv2 or later" to "GPLv3 or later". Our implementation is ISC licensed, so we can include and enable it by default and keep things at "GPLv2 or later". This is also why we used geoip_classic in the first place as default and compiled in, and not the mmdb variant. The mmdb.c is based on the specification, using the Go implementation as a reference during development (ISC licensed), initially implemented with the help of Claude Opus 4.6. After that substantial changes were made to make it match UnrealIRCd's style and to make things less error prone: C style changes, allocation and zero termination of strings in the library, auto-NULL in variadic functions so the caller cannot forget NULL there (similar to our unreal_log/do_unreal_log), using enums as the return type instead of int (similar to curl), adding doxygen docs, etc. This also means the old mmdb library dependency has been dropped, including from configure/autoconf. At the moment we still use the geoip classic library by default, including those DB files. The idea is we will switch over sometime later after this current new MMDB stuff has received more testing. This also makes us more flexible, since .mmdb files have become the de-facto standard for pretty much all geoip vendors. --- Makefile.in | 8 +- Makefile.windows | 7 +- autoconf/m4/unreal.m4 | 26 - configure | 135 +--- configure.ac | 2 - include/mmdb.h | 170 +++++ src/modules/Makefile.in | 9 +- src/modules/geoip_maxmind.c | 167 ++--- src/modules/mmdb.c | 1211 +++++++++++++++++++++++++++++++++++ 9 files changed, 1456 insertions(+), 279 deletions(-) create mode 100644 include/mmdb.h create mode 100644 src/modules/mmdb.c diff --git a/Makefile.in b/Makefile.in index 1028f97be..25083ef0b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -94,9 +94,6 @@ URL=@URL@ GEOIP_CLASSIC_OBJECTS=@GEOIP_CLASSIC_OBJECTS@ GEOIP_CLASSIC_LIBS=@GEOIP_CLASSIC_LIBS@ GEOIP_CLASSIC_CFLAGS=@GEOIP_CLASSIC_CFLAGS@ -GEOIP_MAXMIND_OBJECTS=@GEOIP_MAXMIND_OBJECTS@ -LIBMAXMINDDB_CFLAGS=@LIBMAXMINDDB_CFLAGS@ -LIBMAXMINDDB_LIBS=@LIBMAXMINDDB_LIBS@ # Where is your openssl binary OPENSSLPATH=@OPENSSLPATH@ @@ -126,10 +123,7 @@ MAKEARGS = 'CFLAGS=${CFLAGS}' 'CC=${CC}' 'IRCDLIBS=${IRCDLIBS}' \ 'URL=${URL}' \ 'GEOIP_CLASSIC_OBJECTS=${GEOIP_CLASSIC_OBJECTS}' \ 'GEOIP_CLASSIC_LIBS=${GEOIP_CLASSIC_LIBS}' \ - 'GEOIP_CLASSIC_CFLAGS=${GEOIP_CLASSIC_CFLAGS}' \ - 'GEOIP_MAXMIND_OBJECTS=${GEOIP_MAXMIND_OBJECTS}' \ - 'LIBMAXMINDDB_CFLAGS=${LIBMAXMINDDB_CFLAGS}' \ - 'LIBMAXMINDDB_LIBS=${LIBMAXMINDDB_LIBS}' + 'GEOIP_CLASSIC_CFLAGS=${GEOIP_CLASSIC_CFLAGS}' custommodule: @if test -z "${MODULEFILE}"; then echo "Please set MODULEFILE when calling \`\`make custommodule''. For example, \`\`make custommodule MODULEFILE=callerid''." >&2; exit 1; fi diff --git a/Makefile.windows b/Makefile.windows index f9d005053..f8b0d59f7 100644 --- a/Makefile.windows +++ b/Makefile.windows @@ -275,6 +275,7 @@ DLL_FILES=\ src/modules/geoip_base.dll \ src/modules/geoip_classic.dll \ src/modules/geoip_csv.dll \ + src/modules/geoip_maxmind.dll \ src/modules/geoip-tag.dll \ src/modules/globops.dll \ src/modules/help.dll \ @@ -966,12 +967,12 @@ src/modules/geoip_classic.dll: src/modules/geoip_classic.c $(INCLUDES) src/modules/geoip_csv.dll: src/modules/geoip_csv.c $(INCLUDES) $(CC) $(MODCFLAGS) src/modules/geoip_csv.c /Fesrc/modules/ /Fosrc/modules/ /Fdsrc/modules/geoip_csv.pdb $(MODLFLAGS) +src/modules/geoip_maxmind.dll: src/modules/geoip_maxmind.c $(INCLUDES) + $(CC) $(MODCFLAGS) src/modules/geoip_maxmind.c src/modules/mmdb.c /Fesrc/modules/ /Fosrc/modules/ /Fdsrc/modules/geoip_maxmind.pdb $(MODLFLAGS) + src/modules/geoip-tag.dll: src/modules/geoip-tag.c $(INCLUDES) $(CC) $(MODCFLAGS) src/modules/geoip-tag.c /Fesrc/modules/ /Fosrc/modules/ /Fdsrc/modules/geoip-tag.pdb $(MODLFLAGS) -src/modules/geoip_maxmind.dll: src/modules/geoip_maxmind.c $(INCLUDES) - $(CC) $(MODCFLAGS) src/modules/geoip_maxmind.c /Fesrc/modules/ /Fosrc/modules/ /Fdsrc/modules/geoip_maxmind.pdb $(MODLFLAGS) - src/modules/globops.dll: src/modules/globops.c $(INCLUDES) $(CC) $(MODCFLAGS) src/modules/globops.c /Fesrc/modules/ /Fosrc/modules/ /Fdsrc/modules/globops.pdb $(MODLFLAGS) diff --git a/autoconf/m4/unreal.m4 b/autoconf/m4/unreal.m4 index 6d8da8000..69899da97 100644 --- a/autoconf/m4/unreal.m4 +++ b/autoconf/m4/unreal.m4 @@ -469,29 +469,3 @@ AC_DEFUN([CHECK_GEOIP_CLASSIC], AC_SUBST(GEOIP_CLASSIC_OBJECTS) ]) dnl AS_IF(enable_geoip_classic) ]) - -AC_DEFUN([CHECK_LIBMAXMINDDB], -[ - AC_ARG_ENABLE(libmaxminddb, - [AC_HELP_STRING([--enable-libmaxminddb=no/yes],[enable GeoIP libmaxminddb support])], - [enable_libmaxminddb=$enableval], - [enable_libmaxminddb=no]) - - AS_IF([test "x$enable_libmaxminddb" = "xyes"], - [ - dnl see if the system provides it - has_system_libmaxminddb="no" - PKG_CHECK_MODULES([LIBMAXMINDDB], [libmaxminddb >= 1.4.3], - [has_system_libmaxminddb=yes]) - AS_IF([test "x$has_system_libmaxminddb" = "xyes"], - [ - - AC_SUBST(LIBMAXMINDDB_LIBS) - AC_SUBST(LIBMAXMINDDB_CFLAGS) - - GEOIP_MAXMIND_OBJECTS="geoip_maxmind.so" - AC_SUBST(GEOIP_MAXMIND_OBJECTS) - ]) - ]) -]) - diff --git a/configure b/configure index cd3001c0b..6618a2d4d 100755 --- a/configure +++ b/configure @@ -646,9 +646,6 @@ ac_subst_vars='LTLIBOBJS LIBOBJS UNRLINCDIR IRCDLIBS -GEOIP_MAXMIND_OBJECTS -LIBMAXMINDDB_LIBS -LIBMAXMINDDB_CFLAGS GEOIP_CLASSIC_OBJECTS GEOIP_CLASSIC_LIBS GEOIP_CLASSIC_CFLAGS @@ -787,7 +784,6 @@ enable_werror enable_asan enable_libcurl enable_geoip_classic -enable_libmaxminddb ' ac_precious_vars='build_alias host_alias @@ -812,9 +808,7 @@ CARES_LIBS JANSSON_CFLAGS JANSSON_LIBS GEOIP_CLASSIC_CFLAGS -GEOIP_CLASSIC_LIBS -LIBMAXMINDDB_CFLAGS -LIBMAXMINDDB_LIBS' +GEOIP_CLASSIC_LIBS' # Initialize some variables set by options. @@ -1452,8 +1446,6 @@ Optional Features: --enable-libcurl=DIR enable libcurl (remote include) support --enable-geoip-classic=no/yes enable GeoIP Classic support - --enable-libmaxminddb=no/yes - enable GeoIP libmaxminddb support Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1534,10 +1526,6 @@ Some influential environment variables: C compiler flags for GEOIP_CLASSIC, overriding pkg-config GEOIP_CLASSIC_LIBS linker flags for GEOIP_CLASSIC, overriding pkg-config - LIBMAXMINDDB_CFLAGS - C compiler flags for LIBMAXMINDDB, overriding pkg-config - LIBMAXMINDDB_LIBS - linker flags for LIBMAXMINDDB, overriding pkg-config Use these variables to override the choices made by 'configure' or to help it to find libraries and programs with nonstandard names/locations. @@ -10147,127 +10135,6 @@ fi fi - - # Check whether --enable-libmaxminddb was given. -if test ${enable_libmaxminddb+y} -then : - enableval=$enable_libmaxminddb; enable_libmaxminddb=$enableval -else case e in #( - e) enable_libmaxminddb=no ;; -esac -fi - - - if test "x$enable_libmaxminddb" = "xyes" -then : - - has_system_libmaxminddb="no" - -pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libmaxminddb >= 1.4.3" >&5 -printf %s "checking for libmaxminddb >= 1.4.3... " >&6; } - -if test -n "$LIBMAXMINDDB_CFLAGS"; then - pkg_cv_LIBMAXMINDDB_CFLAGS="$LIBMAXMINDDB_CFLAGS" - elif test -n "$PKG_CONFIG"; then - if test -n "$PKG_CONFIG" && \ - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libmaxminddb >= 1.4.3\""; } >&5 - ($PKG_CONFIG --exists --print-errors "libmaxminddb >= 1.4.3") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then - pkg_cv_LIBMAXMINDDB_CFLAGS=`$PKG_CONFIG --cflags "libmaxminddb >= 1.4.3" 2>/dev/null` - test "x$?" != "x0" && pkg_failed=yes -else - pkg_failed=yes -fi - else - pkg_failed=untried -fi -if test -n "$LIBMAXMINDDB_LIBS"; then - pkg_cv_LIBMAXMINDDB_LIBS="$LIBMAXMINDDB_LIBS" - elif test -n "$PKG_CONFIG"; then - if test -n "$PKG_CONFIG" && \ - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libmaxminddb >= 1.4.3\""; } >&5 - ($PKG_CONFIG --exists --print-errors "libmaxminddb >= 1.4.3") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then - pkg_cv_LIBMAXMINDDB_LIBS=`$PKG_CONFIG --libs "libmaxminddb >= 1.4.3" 2>/dev/null` - test "x$?" != "x0" && pkg_failed=yes -else - pkg_failed=yes -fi - else - pkg_failed=untried -fi - - - -if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - -if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then - _pkg_short_errors_supported=yes -else - _pkg_short_errors_supported=no -fi - if test $_pkg_short_errors_supported = yes; then - LIBMAXMINDDB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libmaxminddb >= 1.4.3" 2>&1` - else - LIBMAXMINDDB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libmaxminddb >= 1.4.3" 2>&1` - fi - # Put the nasty error message in config.log where it belongs - echo "$LIBMAXMINDDB_PKG_ERRORS" >&5 - - as_fn_error $? "Package requirements (libmaxminddb >= 1.4.3) were not met: - -$LIBMAXMINDDB_PKG_ERRORS - -Consider adjusting the PKG_CONFIG_PATH environment variable if you -installed software in a non-standard prefix. - -Alternatively, you may set the environment variables LIBMAXMINDDB_CFLAGS -and LIBMAXMINDDB_LIBS to avoid the need to call pkg-config. -See the pkg-config man page for more details." "$LINENO" 5 -elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it -is in your PATH or set the PKG_CONFIG environment variable to the full -path to pkg-config. - -Alternatively, you may set the environment variables LIBMAXMINDDB_CFLAGS -and LIBMAXMINDDB_LIBS to avoid the need to call pkg-config. -See the pkg-config man page for more details. - -To get pkg-config, see . -See 'config.log' for more details" "$LINENO" 5; } -else - LIBMAXMINDDB_CFLAGS=$pkg_cv_LIBMAXMINDDB_CFLAGS - LIBMAXMINDDB_LIBS=$pkg_cv_LIBMAXMINDDB_LIBS - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } - has_system_libmaxminddb=yes -fi - if test "x$has_system_libmaxminddb" = "xyes" -then : - - - - - - GEOIP_MAXMIND_OBJECTS="geoip_maxmind.so" - - -fi - -fi - - UNRLINCDIR="`pwd`/include" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if explicit -std=gnu17 is needed" >&5 diff --git a/configure.ac b/configure.ac index f46390c93..a678f67db 100644 --- a/configure.ac +++ b/configure.ac @@ -930,8 +930,6 @@ CHECK_LIBCURL CHECK_GEOIP_CLASSIC -CHECK_LIBMAXMINDDB - UNRLINCDIR="`pwd`/include" dnl This is at the end so the (potential) -std=gnu17 is not used diff --git a/include/mmdb.h b/include/mmdb.h new file mode 100644 index 000000000..f9d0e3dfa --- /dev/null +++ b/include/mmdb.h @@ -0,0 +1,170 @@ +/* + * mmdb.h - Minimal MMDB (MaxMind DB) reader library + * + * Written from the MaxMind DB file format specification + * (https://maxmind.github.io/MaxMind-DB/). + * + * This C implementation was written by the UnrealIRCd team, + * using the Go MMDB reader oschwald/maxminddb-golang by + * Gregory J. Oschwald as a reference during development. + * + * Copyright (c) 2015 Gregory J. Oschwald (Go implementation) + * Copyright (c) 2026 UnrealIRCd team + * + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that + * the above copyright notice and this permission notice appear in + * all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef MMDB_H +#define MMDB_H + +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#endif + +/** Status/error codes returned by mmdb functions */ +typedef enum { + MMDB_OK = 0, /**< Success */ + MMDB_ERR_OPEN, /**< Could not open or mmap the file */ + MMDB_ERR_INVALID_DB, /**< Not a valid MMDB file */ + MMDB_ERR_CORRUPT, /**< Search tree or data section corruption */ + MMDB_ERR_NODATA, /**< IP found but requested path doesn't exist */ + MMDB_ERR_TYPE, /**< Type mismatch (asked for string, got uint, etc) */ + MMDB_ERR_IPV6_IN_V4, /**< Tried to look up an IPv6 address in an IPv4-only db */ + MMDB_ERR_BADARG, /**< Invalid argument (e.g. unparseable IP address) */ +} MMDB_Status; + +/** Database metadata */ +typedef struct { + uint32_t node_count; /**< Number of nodes in the search tree */ + uint16_t record_size; /**< Size of each record in bits */ + uint16_t ip_version; /**< IP version the database covers (4 or 6) */ + uint64_t build_epoch; /**< Unix timestamp when the database was built */ + char database_type[128]; /**< Database type string (e.g. "GeoLite2-Country") */ +} MMDB_Metadata; + +/** Database handle */ +typedef struct { + uint8_t *data; /**< mmap'd (or malloc'd) file contents */ + size_t data_size; /**< Total file size */ + size_t data_section_offset; /**< Offset where data section starts */ + size_t data_section_size; /**< Size of data section */ + MMDB_Metadata metadata; /**< Parsed database metadata */ + uint32_t ipv4_start_node; /**< Cached start node for IPv4 lookups in IPv6 dbs */ + int ipv4_start_bit_depth; /**< Bit depth at ipv4_start_node */ + int is_mmap; /**< 1 if data was mmap'd, 0 if malloc'd */ +} MMDB_DB; + +/** Lookup result */ +typedef struct { + MMDB_DB *db; /**< Database this result belongs to */ + size_t offset; /**< Offset into data section, or 0 if not found */ + int has_data; /**< 1 if IP was found and has data */ +} MMDB_Result; + +/** Open an MMDB database file. + * Uses mmap where available, falls back to malloc+read. + * @param db Database handle to initialize + * @param filename Path to the .mmdb file + * @returns MMDB_OK on success, or an error code + */ +MMDB_Status mmdb_open(MMDB_DB *db, const char *filename); + +/** Close the database and release resources. + * Safe to call on an already-closed or zero-initialized handle. + * @param db Database handle to close + */ +void mmdb_close(MMDB_DB *db); + +/** Look up an IP address given as a string (IPv4 or IPv6). + * On success, check result->has_data to see if the IP was + * actually found in the database. + * @param db Database handle + * @param ip_str IP address string (e.g. "1.2.3.4" or "2001:db8::1") + * @param result Lookup result (output) + * @returns MMDB_OK on success, or an error code + */ +MMDB_Status mmdb_lookup(MMDB_DB *db, const char *ip_str, MMDB_Result *result); + +/** Look up an IP address given as a sockaddr. + * Supports sockaddr_in (IPv4) and sockaddr_in6 (IPv6). + * @param db Database handle + * @param sa Socket address to look up + * @param result Lookup result (output) + * @returns MMDB_OK on success, or an error code + */ +MMDB_Status mmdb_lookup_sockaddr(MMDB_DB *db, const struct sockaddr *sa, + MMDB_Result *result); + +/** Retrieve a string value from a lookup result by path. + * Returns a malloc'd, null-terminated copy. Caller must free(). + * On error, *out is set to NULL. + * @param result Lookup result from mmdb_lookup() + * @param out Receives a malloc'd null-terminated string (output) + * @param ... Path of map keys (NULL sentinel is added automatically) + * @returns MMDB_OK on success, or an error code + * @note Example: mmdb_get_str(&result, &val, "country", "iso_code"); + */ +MMDB_Status mmdb_do_get_str(MMDB_Result *result, char **out, ...); +#define mmdb_get_str(result, out, ...) mmdb_do_get_str(result, out, __VA_ARGS__, NULL) + +/** Retrieve a uint32 value from a lookup result by path. + * Also accepts uint16 values (promoted to uint32). + * On error, *out is set to 0. + * @param result Lookup result from mmdb_lookup() + * @param out Receives the uint32 value (output) + * @param ... Path of map keys (NULL sentinel is added automatically) + * @returns MMDB_OK on success, or an error code + * @note Example: mmdb_get_uint32(&result, &asn, "autonomous_system_number"); + */ +MMDB_Status mmdb_do_get_uint32(MMDB_Result *result, uint32_t *out, ...); +#define mmdb_get_uint32(result, out, ...) mmdb_do_get_uint32(result, out, __VA_ARGS__, NULL) + +/** Retrieve a boolean value from a lookup result by path. + * On error, *out is set to 0. + * @param result Lookup result from mmdb_lookup() + * @param out Receives the boolean value (0 or 1) (output) + * @param ... Path of map keys (NULL sentinel is added automatically) + * @returns MMDB_OK on success, or an error code + * @note Example: mmdb_get_bool(&result, &is_vpn, "is_anonymous_vpn"); + */ +MMDB_Status mmdb_do_get_bool(MMDB_Result *result, int *out, ...); +#define mmdb_get_bool(result, out, ...) mmdb_do_get_bool(result, out, __VA_ARGS__, NULL) + +/** Retrieve a double (float64) value from a lookup result by path. + * Also accepts float32 values (promoted to double). + * On error, *out is set to 0. + * @param result Lookup result from mmdb_lookup() + * @param out Receives the double value (output) + * @param ... Path of map keys (NULL sentinel is added automatically) + * @returns MMDB_OK on success, or an error code + * @note Example: mmdb_get_double(&result, &lat, "location", "latitude"); + */ +MMDB_Status mmdb_do_get_double(MMDB_Result *result, double *out, ...); +#define mmdb_get_double(result, out, ...) mmdb_do_get_double(result, out, __VA_ARGS__, NULL) + +/** Return a human-readable error string for a status code. + * @param err Status code to describe + * @returns Static string describing the error (never NULL) + */ +const char *mmdb_strerror(MMDB_Status err); + +#endif /* MMDB_H */ diff --git a/src/modules/Makefile.in b/src/modules/Makefile.in index 199c87b51..66c9be0b7 100644 --- a/src/modules/Makefile.in +++ b/src/modules/Makefile.in @@ -87,7 +87,8 @@ MODULES= \ central-api.so central-blocklist.so \ no-implicit-names.so maxperip.so utf8functions.so utf8only.so \ isupport.so extended-isupport.so \ - $(GEOIP_CLASSIC_OBJECTS) $(GEOIP_MAXMIND_OBJECTS) + $(GEOIP_CLASSIC_OBJECTS) \ + geoip_maxmind.so MODULEFLAGS=@MODULEFLAGS@ RM=@RM@ @@ -122,7 +123,7 @@ geoip_classic.so: geoip_classic.c $(INCLUDES) $(CC) $(CFLAGS) $(MODULEFLAGS) $(GEOIP_CLASSIC_CFLAGS) -DDYNAMIC_LINKING \ -o geoip_classic.so geoip_classic.c @LDFLAGS_PRIVATELIBS@ $(GEOIP_CLASSIC_LIBS) -# geoip_maxmind requires another extra library +# geoip_maxmind uses shipped mmdb.c geoip_maxmind.so: geoip_maxmind.c $(INCLUDES) - $(CC) $(CFLAGS) $(MODULEFLAGS) $(LIBMAXMINDDB_CFLAGS) -DDYNAMIC_LINKING \ - -o geoip_maxmind.so geoip_maxmind.c @LDFLAGS_PRIVATELIBS@ $(LIBMAXMINDDB_LIBS) + $(CC) $(CFLAGS) $(MODULEFLAGS) -DDYNAMIC_LINKING \ + -o geoip_maxmind.so geoip_maxmind.c mmdb.c diff --git a/src/modules/geoip_maxmind.c b/src/modules/geoip_maxmind.c index 20ba2aa8b..01d577038 100644 --- a/src/modules/geoip_maxmind.c +++ b/src/modules/geoip_maxmind.c @@ -4,12 +4,12 @@ */ #include "unrealircd.h" -#include +#include "mmdb.h" ModuleHeader MOD_HEADER = { "geoip_maxmind", - "5.1", + "5.2", "GEOIP using maxmind databases", "UnrealIRCd Team", "unrealircd-6", @@ -27,7 +27,7 @@ struct geoip_maxmind_config_s { /* Variables */ struct geoip_maxmind_config_s geoip_maxmind_config; -MMDB_s mmdb, asn_mmdb; +MMDB_DB mmdb, asn_mmdb; /* Forward declarations */ int geoip_maxmind_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs); @@ -40,8 +40,7 @@ int geoip_maxmind_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *err { ConfigEntry *cep; int errors = 0; - int i; - + if (type != CONFIG_SET) return 0; @@ -105,7 +104,7 @@ int geoip_maxmind_configposttest(int *errs) errors++; } if (!geoip_maxmind_config.have_asn_database) - safe_free(geoip_maxmind_config.db_file); /* at this point we aren't going to use ASN at all */ + safe_free(geoip_maxmind_config.asn_db_file); /* at this point we aren't going to use ASN at all */ } else { @@ -117,9 +116,8 @@ int geoip_maxmind_configposttest(int *errs) geoip_maxmind_config.have_database = 1; } else { - config_error("[geoip_maxmind] cannot open database file \"%s/%s\" for reading (%s)", PERMDATADIR, geoip_maxmind_config.db_file, strerror(errno)); + config_warn("[geoip_maxmind] cannot open database file \"%s/%s\" for reading (%s)", PERMDATADIR, geoip_maxmind_config.db_file, strerror(errno)); safe_free(geoip_maxmind_config.db_file); - errors++; } if (is_file_readable(geoip_maxmind_config.asn_db_file, PERMDATADIR)) @@ -184,36 +182,36 @@ MOD_INIT() MOD_LOAD() { - geoip_maxmind_free(); - convert_to_absolute_path(&geoip_maxmind_config.db_file, PERMDATADIR); - - int status = MMDB_open(geoip_maxmind_config.db_file, MMDB_MODE_MMAP, &mmdb); + int status; - if (status != MMDB_SUCCESS) { - int save_err = errno; - unreal_log(ULOG_WARNING, "geoip_maxmind", "GEOIP_CANNOT_OPEN_DB", NULL, - "Could not open '$filename' - $maxmind_error; IO error: $io_error", - log_data_string("filename", geoip_maxmind_config.db_file), - log_data_string("maxmind_error", MMDB_strerror(status)), - log_data_string("io_error", (status == MMDB_IO_ERROR)?strerror(save_err):"none")); - return MOD_FAILED; + geoip_maxmind_free(); + + if (geoip_maxmind_config.db_file) + { + convert_to_absolute_path(&geoip_maxmind_config.db_file, PERMDATADIR); + status = mmdb_open(&mmdb, geoip_maxmind_config.db_file); + if (status != MMDB_OK) + { + unreal_log(ULOG_WARNING, "geoip_maxmind", "GEOIP_CANNOT_OPEN_DB", NULL, + "Could not open '$filename' - $mmdb_error", + log_data_string("filename", geoip_maxmind_config.db_file), + log_data_string("mmdb_error", mmdb_strerror(status))); + geoip_maxmind_config.have_database = 0; + } } - if (!geoip_maxmind_config.asn_db_file) /* if ASN file is unavailable, ignore it */ - return MOD_SUCCESS; - - convert_to_absolute_path(&geoip_maxmind_config.asn_db_file, PERMDATADIR); - - status = MMDB_open(geoip_maxmind_config.asn_db_file, MMDB_MODE_MMAP, &asn_mmdb); - - if (status != MMDB_SUCCESS) { - int save_err = errno; - unreal_log(ULOG_WARNING, "geoip_maxmind", "GEOIP_CANNOT_OPEN_ASN_DB", NULL, - "Could not open '$filename' - $maxmind_error; IO error: $io_error", - log_data_string("filename", geoip_maxmind_config.db_file), - log_data_string("maxmind_error", MMDB_strerror(status)), - log_data_string("io_error", (status == MMDB_IO_ERROR)?strerror(save_err):"none")); - return MOD_FAILED; + if (geoip_maxmind_config.asn_db_file) + { + convert_to_absolute_path(&geoip_maxmind_config.asn_db_file, PERMDATADIR); + status = mmdb_open(&asn_mmdb, geoip_maxmind_config.asn_db_file); + if (status != MMDB_OK) + { + unreal_log(ULOG_WARNING, "geoip_maxmind", "GEOIP_CANNOT_OPEN_ASN_DB", NULL, + "Could not open '$filename' - $mmdb_error", + log_data_string("filename", geoip_maxmind_config.asn_db_file), + log_data_string("mmdb_error", mmdb_strerror(status))); + geoip_maxmind_config.have_asn_database = 0; + } } return MOD_SUCCESS; @@ -229,17 +227,15 @@ MOD_UNLOAD() void geoip_maxmind_free(void) { - MMDB_close(&mmdb); - MMDB_close(&asn_mmdb); + mmdb_close(&mmdb); + mmdb_close(&asn_mmdb); } GeoIPResult *geoip_lookup_maxmind(char *ip) { - int gai_error, mmdb_error, status; - MMDB_lookup_result_s result; - MMDB_entry_data_s country_code, country_name, asn, asn_org; - char *country_code_str, *country_name_str, *asn_org_str; - GeoIPResult *r = NULL; + MMDB_Status status; + MMDB_Result result; + GeoIPResult *r; if (!ip) return NULL; @@ -248,91 +244,56 @@ GeoIPResult *geoip_lookup_maxmind(char *ip) return NULL; /* Country database */ - result = MMDB_lookup_string(&mmdb, ip, &gai_error, &mmdb_error); - if (gai_error) + status = mmdb_lookup(&mmdb, ip, &result); + if (status != MMDB_OK) { unreal_log(ULOG_DEBUG, "geoip_maxmind", "GEOIP_DB_ERROR", NULL, - "libmaxminddb: getaddrinfo error for $ip: $error", + "mmdb: lookup error for $ip: $error", log_data_string("ip", ip), - log_data_string("error", gai_strerror(gai_error))); - return NULL; - } - - if (mmdb_error != MMDB_SUCCESS) - { - unreal_log(ULOG_DEBUG, "geoip_maxmind", "GEOIP_DB_ERROR", NULL, - "libmaxminddb: library error for $ip: $error", - log_data_string("ip", ip), - log_data_string("error", MMDB_strerror(mmdb_error))); + log_data_string("error", mmdb_strerror(status))); return NULL; } - if (!result.found_entry) /* no result */ + if (!result.has_data) /* no result */ return NULL; - status = MMDB_get_value(&result.entry, &country_code, "country", "iso_code", NULL); - if (status != MMDB_SUCCESS || !country_code.has_data || country_code.type != MMDB_DATA_TYPE_UTF8_STRING) - return NULL; - status = MMDB_get_value(&result.entry, &country_name, "country", "names", "en", NULL); - if (status != MMDB_SUCCESS || !country_name.has_data || country_name.type != MMDB_DATA_TYPE_UTF8_STRING) - return NULL; - - /* these results are not null-terminated */ - country_code_str = safe_alloc(country_code.data_size + 1); - country_name_str = safe_alloc(country_name.data_size + 1); - memcpy(country_code_str, country_code.utf8_string, country_code.data_size); - country_code_str[country_code.data_size] = '\0'; - memcpy(country_name_str, country_name.utf8_string, country_name.data_size); - country_name_str[country_name.data_size] = '\0'; - r = safe_alloc(sizeof(GeoIPResult)); - r->country_code = country_code_str; - r->country_name = country_name_str; - /* ASN database */ + if (mmdb_get_str(&result, &r->country_code, "country", "iso_code") != MMDB_OK) + { + free_geoip_result(r); + return NULL; + } + + if (mmdb_get_str(&result, &r->country_name, "country", "names", "en") != MMDB_OK) + { + free_geoip_result(r); + return NULL; + } + + /* No ASN database? Then we are done. */ if (!geoip_maxmind_config.have_asn_database) return r; - result = MMDB_lookup_string(&asn_mmdb, ip, &gai_error, &mmdb_error); + status = mmdb_lookup(&asn_mmdb, ip, &result); - if (gai_error) + if (status != MMDB_OK) { unreal_log(ULOG_DEBUG, "geoip_maxmind", "GEOIP_ASN_DB_ERROR", NULL, - "libmaxminddb: getaddrinfo error for $ip: $error", + "mmdb: lookup error for $ip: $error", log_data_string("ip", ip), - log_data_string("error", gai_strerror(gai_error))); + log_data_string("error", mmdb_strerror(status))); return r; } - if (mmdb_error != MMDB_SUCCESS) - { - unreal_log(ULOG_DEBUG, "geoip_maxmind", "GEOIP_ASN_DB_ERROR", NULL, - "libmaxminddb: library error for $ip: $error", - log_data_string("ip", ip), - log_data_string("error", MMDB_strerror(mmdb_error))); - return r; - } + if (!result.has_data) + return r; /* no ASN result, we are done. */ - if (!result.found_entry) /* no result */ + if (mmdb_get_uint32(&result, &r->asn, "autonomous_system_number") != MMDB_OK) return r; - status = MMDB_get_value(&result.entry, &asn, "autonomous_system_number", NULL); - if (status != MMDB_SUCCESS || !asn.has_data || asn.type != MMDB_DATA_TYPE_UINT32) + if (mmdb_get_str(&result, &r->asname, "autonomous_system_organization") != MMDB_OK) return r; - status = MMDB_get_value(&result.entry, &asn_org, "autonomous_system_organization", NULL); - if (status != MMDB_SUCCESS || !asn_org.has_data || asn_org.type != MMDB_DATA_TYPE_UTF8_STRING) - return r; - - if (!r) - r = safe_alloc(sizeof(GeoIPResult)); - - asn_org_str = safe_alloc(asn_org.data_size + 1); - memcpy(asn_org_str, asn_org.utf8_string, asn_org.data_size); - asn_org_str[asn_org.data_size] = '\0'; - - r->asn = asn.uint32; - r->asname = asn_org_str; return r; } - diff --git a/src/modules/mmdb.c b/src/modules/mmdb.c new file mode 100644 index 000000000..e322efe2a --- /dev/null +++ b/src/modules/mmdb.c @@ -0,0 +1,1211 @@ +/* + * mmdb.c - Minimal MMDB (MaxMind DB) reader library + * + * Written from the MaxMind DB file format specification + * (https://maxmind.github.io/MaxMind-DB/). + * + * This C implementation was written by the UnrealIRCd team, + * using the Go MMDB reader oschwald/maxminddb-golang by + * Gregory J. Oschwald as a reference during development. + * + * Copyright (c) 2015 Gregory J. Oschwald (Go implementation) + * Copyright (c) 2026 UnrealIRCd team + * + * Permission to use, copy, modify, and/or distribute this software for + * any purpose with or without fee is hereby granted, provided that + * the above copyright notice and this permission notice appear in + * all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "mmdb.h" + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#include +#include +#include +#endif + +/* Constants */ + +static const uint8_t METADATA_MARKER[] = "\xAB\xCD\xEF" "MaxMind.com"; +#define METADATA_MARKER_LEN 14 +#define METADATA_MAX_SIZE (128 * 1024) +#define DATA_SEPARATOR_SIZE 16 +#define MAX_DECODE_DEPTH 64 + +/* MMDB data types from the spec */ +enum { + DT_EXTENDED = 0, + DT_POINTER = 1, + DT_STRING = 2, + DT_FLOAT64 = 3, + DT_BYTES = 4, + DT_UINT16 = 5, + DT_UINT32 = 6, + DT_MAP = 7, + DT_INT32 = 8, + DT_UINT64 = 9, + DT_UINT128 = 10, + DT_ARRAY = 11, + DT_BOOL = 14, + DT_FLOAT32 = 15, +}; + +/* Platform: mmap / file I/O */ + +#ifdef _WIN32 + +static uint8_t *mmdb_mmap_file(const char *filename, size_t *size_out, int *is_mmap) +{ + HANDLE hFile, hMap; + LARGE_INTEGER fileSize; + uint8_t *data; + + hFile = CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) + return NULL; + + if (!GetFileSizeEx(hFile, &fileSize) || fileSize.QuadPart == 0 || + (uint64_t)fileSize.QuadPart > SIZE_MAX) + { + CloseHandle(hFile); + return NULL; + } + + hMap = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL); + CloseHandle(hFile); + if (!hMap) + return NULL; + + data = (uint8_t *)MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0); + CloseHandle(hMap); + if (!data) + return NULL; + + *size_out = (size_t)fileSize.QuadPart; + *is_mmap = 1; + return data; +} + +static void mmdb_munmap(uint8_t *data, size_t size) +{ + (void)size; + UnmapViewOfFile(data); +} + +#else /* Unix */ + +static uint8_t *mmdb_mmap_file(const char *filename, size_t *size_out, int *is_mmap) +{ + int fd; + struct stat st; + uint8_t *data; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return NULL; + + if (fstat(fd, &st) < 0 || st.st_size <= 0) + { + close(fd); + return NULL; + } + + data = (uint8_t *)mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (data == MAP_FAILED) + return NULL; + + *size_out = (size_t)st.st_size; + *is_mmap = 1; + return data; +} + +static void mmdb_munmap(uint8_t *data, size_t size) +{ + munmap(data, size); +} + +#endif + +/* Data section decoder + * + * These functions decode values from the MMDB data section. + * The "buffer" and "buflen" refer to the data section (or + * metadata section) only, not the whole file. + */ + +/* Decode size from control byte per the spec's payload size rules. */ +static int decode_size(const uint8_t *buf, size_t buflen, + uint32_t raw_size, size_t offset, + uint32_t *size_out, size_t *new_offset) +{ + if (raw_size < 29) + { + *size_out = raw_size; + *new_offset = offset; + return MMDB_OK; + } + if (raw_size == 29) + { + if (offset >= buflen) + return MMDB_ERR_CORRUPT; + *size_out = 29 + (uint32_t)buf[offset]; + *new_offset = offset + 1; + return MMDB_OK; + } + if (raw_size == 30) + { + if (offset + 2 > buflen) + return MMDB_ERR_CORRUPT; + *size_out = 285 + ((uint32_t)buf[offset] << 8) + (uint32_t)buf[offset + 1]; + *new_offset = offset + 2; + return MMDB_OK; + } + /* raw_size == 31 */ + if (offset + 3 > buflen) + return MMDB_ERR_CORRUPT; + *size_out = 65821 + + ((uint32_t)buf[offset] << 16) + + ((uint32_t)buf[offset + 1] << 8) + + (uint32_t)buf[offset + 2]; + *new_offset = offset + 3; + return MMDB_OK; +} + +/* Decode a control byte: returns data type, payload size, and new offset. */ +static int decode_ctrl(const uint8_t *buf, size_t buflen, size_t offset, + int *type_out, uint32_t *size_out, size_t *data_offset) +{ + int type; + uint32_t raw_size; + size_t off; + + if (offset >= buflen) + return MMDB_ERR_CORRUPT; + + type = (buf[offset] >> 5) & 0x7; + raw_size = buf[offset] & 0x1F; + off = offset + 1; + + if (type == DT_EXTENDED) + { + if (off >= buflen) + return MMDB_ERR_CORRUPT; + type = (int)buf[off] + 7; + off++; + } + + if (type == DT_POINTER) + { + /* For pointers, the size field encodes the pointer value, not a payload size. + * We return raw_size in size_out for the caller to decode. */ + *type_out = DT_POINTER; + *size_out = raw_size; + *data_offset = off; + return MMDB_OK; + } + + *type_out = type; + return decode_size(buf, buflen, raw_size, off, size_out, data_offset); +} + +/* Decode a pointer value per the spec. Returns the offset it points to. */ +static int decode_pointer(const uint8_t *buf, size_t buflen, + uint32_t ctrl_size, size_t offset, + size_t *pointer_out, size_t *new_offset) +{ + uint32_t ptr_size = ((ctrl_size >> 3) & 0x3) + 1; + size_t end = offset + ptr_size; + size_t pointer; + uint32_t prefix; + size_t i; + + if (end > buflen) + return MMDB_ERR_CORRUPT; + + prefix = (ptr_size == 4) ? 0 : (ctrl_size & 0x7); + + /* Build pointer from prefix + pointer bytes (big-endian) */ + pointer = prefix; + for (i = offset; i < end; i++) + pointer = (pointer << 8) | (size_t)buf[i]; + + /* Add base offset per pointer size */ + switch (ptr_size) + { + case 1: break; + case 2: pointer += 2048; break; + case 3: pointer += 526336; break; + case 4: break; + } + + *pointer_out = pointer; + *new_offset = end; + return MMDB_OK; +} + +/* Read a uint32 from variable-length big-endian bytes in the data section. */ +static int decode_uint32(const uint8_t *buf, size_t buflen, + uint32_t size, size_t offset, uint32_t *out) +{ + uint32_t i; + + if (size > 4 || offset + size > buflen) + return MMDB_ERR_CORRUPT; + *out = 0; + for (i = 0; i < size; i++) + *out = (*out << 8) | (uint32_t)buf[offset + i]; + return MMDB_OK; +} + +/* Read a uint64 from variable-length big-endian bytes. */ +static int decode_uint64(const uint8_t *buf, size_t buflen, + uint32_t size, size_t offset, uint64_t *out) +{ + uint32_t i; + + if (size > 8 || offset + size > buflen) + return MMDB_ERR_CORRUPT; + *out = 0; + for (i = 0; i < size; i++) + *out = (*out << 8) | (uint64_t)buf[offset + i]; + return MMDB_OK; +} + +/* Read a uint16 from variable-length big-endian bytes. */ +static int decode_uint16(const uint8_t *buf, size_t buflen, + uint32_t size, size_t offset, uint16_t *out) +{ + uint32_t i; + + if (size > 2 || offset + size > buflen) + return MMDB_ERR_CORRUPT; + *out = 0; + for (i = 0; i < size; i++) + *out = (uint16_t)((*out << 8) | (uint16_t)buf[offset + i]); + return MMDB_OK; +} + +/* Read a float64 (double) stored as IEEE-754 big-endian. */ +static int decode_float64(const uint8_t *buf, size_t buflen, + uint32_t size, size_t offset, double *out) +{ + union { uint64_t u; double d; } conv; + int i; + + if (size != 8 || offset + 8 > buflen) + return MMDB_ERR_CORRUPT; + conv.u = 0; + for (i = 0; i < 8; i++) + conv.u = (conv.u << 8) | (uint64_t)buf[offset + i]; + *out = conv.d; + return MMDB_OK; +} + +/* Read a float32 stored as IEEE-754 big-endian. */ +static int decode_float32(const uint8_t *buf, size_t buflen, + uint32_t size, size_t offset, float *out) +{ + union { uint32_t u; float f; } conv; + int i; + + if (size != 4 || offset + 4 > buflen) + return MMDB_ERR_CORRUPT; + conv.u = 0; + for (i = 0; i < 4; i++) + conv.u = (conv.u << 8) | (uint32_t)buf[offset + i]; + *out = conv.f; + return MMDB_OK; +} + +/* Skip over a data field without decoding it. Used to skip values + * in maps when searching for a specific key. */ +static int skip_value(const uint8_t *buf, size_t buflen, size_t offset, + size_t *new_offset, int depth) +{ + int type; + uint32_t size; + size_t off; + int err; + uint32_t i; + uint32_t ptr_size; + + if (depth > MAX_DECODE_DEPTH) + return MMDB_ERR_CORRUPT; + + err = decode_ctrl(buf, buflen, offset, &type, &size, &off); + if (err) + return err; + + if (type == DT_POINTER) + { + /* Pointer is self-contained; just skip past the pointer bytes */ + ptr_size = ((size >> 3) & 0x3) + 1; + if (off + ptr_size > buflen) + return MMDB_ERR_CORRUPT; + *new_offset = off + ptr_size; + return MMDB_OK; + } + + if (type == DT_MAP) + { + /* Skip 2*size entries (key + value for each pair) */ + for (i = 0; i < size * 2; i++) + { + err = skip_value(buf, buflen, off, &off, depth + 1); + if (err) + return err; + } + *new_offset = off; + return MMDB_OK; + } + + if (type == DT_ARRAY) + { + for (i = 0; i < size; i++) + { + err = skip_value(buf, buflen, off, &off, depth + 1); + if (err) + return err; + } + *new_offset = off; + return MMDB_OK; + } + + if (type == DT_BOOL) + { + /* Bool has no payload; size encodes the value */ + *new_offset = off; + return MMDB_OK; + } + + /* Scalar types: payload is 'size' bytes */ + if (off + size > buflen) + return MMDB_ERR_CORRUPT; + *new_offset = off + size; + return MMDB_OK; +} + +/* Resolve a data entry at a given offset, following pointers + * if needed. Returns the type, size, and offset of the actual + * data payload. */ +static int resolve_entry(const uint8_t *buf, size_t buflen, size_t offset, + int *type_out, uint32_t *size_out, size_t *data_offset) +{ + int type; + uint32_t size; + size_t off; + int err; + size_t pointer; + + err = decode_ctrl(buf, buflen, offset, &type, &size, &off); + if (err) + return err; + + if (type == DT_POINTER) + { + err = decode_pointer(buf, buflen, size, off, &pointer, &off); + if (err) + return err; + /* Follow the pointer (only one level allowed per spec) */ + err = decode_ctrl(buf, buflen, pointer, &type, &size, &off); + if (err) + return err; + if (type == DT_POINTER) + return MMDB_ERR_CORRUPT; /* pointer to pointer is illegal */ + } + + *type_out = type; + *size_out = size; + *data_offset = off; + return MMDB_OK; +} + +/* Find a key in a map. + * + * Given the data section buffer and an offset pointing to a map, + * find the entry with the given key. Returns the offset of the + * value's control byte. */ +static int map_find_key(const uint8_t *buf, size_t buflen, + size_t map_offset, const char *key, + size_t *value_offset) +{ + int type; + uint32_t map_size; + size_t off; + int err; + size_t key_len = strlen(key); + uint32_t i; + int ktype; + uint32_t ksize; + size_t koff; + size_t next_off; + + /* Decode the map entry at map_offset, following pointers */ + err = resolve_entry(buf, buflen, map_offset, &type, &map_size, &off); + if (err) + return err; + if (type != DT_MAP) + return MMDB_ERR_TYPE; + + for (i = 0; i < map_size; i++) + { + /* Decode key - may be a pointer */ + err = resolve_entry(buf, buflen, off, &ktype, &ksize, &koff); + if (err) + return err; + if (ktype != DT_STRING) + return MMDB_ERR_CORRUPT; + + /* We need to advance past the key in the stream. + * The key's position in the stream is at 'off', so skip it. */ + err = skip_value(buf, buflen, off, &next_off, 0); + if (err) + return err; + + /* Compare key */ + if (koff + ksize <= buflen && + ksize == (uint32_t)key_len && + memcmp(buf + koff, key, key_len) == 0) + { + *value_offset = next_off; + return MMDB_OK; + } + + /* Skip the value */ + err = skip_value(buf, buflen, next_off, &off, 0); + if (err) + return err; + } + + return MMDB_ERR_NODATA; +} + +/* Walk a path of keys through nested maps */ +static int walk_path(const uint8_t *buf, size_t buflen, + size_t start_offset, va_list ap, + size_t *final_offset) +{ + size_t offset = start_offset; + const char *key; + int err; + + while ((key = va_arg(ap, const char *)) != NULL) + { + err = map_find_key(buf, buflen, offset, key, &offset); + if (err) + return err; + } + + *final_offset = offset; + return MMDB_OK; +} + +/* Metadata parsing */ + +/* Find the metadata marker by searching backwards from the end of file. */ +static const uint8_t *find_metadata(const uint8_t *data, size_t data_size) +{ + size_t scan_size; + const uint8_t *p; + + if (data_size < METADATA_MARKER_LEN) + return NULL; + + scan_size = data_size; + if (scan_size > METADATA_MAX_SIZE) + scan_size = METADATA_MAX_SIZE; + + /* Search backwards for the last occurrence */ + p = data + data_size - METADATA_MARKER_LEN; + while (p >= data + data_size - scan_size) + { + if (memcmp(p, METADATA_MARKER, METADATA_MARKER_LEN) == 0) + return p; + if (p == data) + break; + p--; + } + return NULL; +} + +/* Parse metadata from the metadata section into db->metadata. + * The metadata is itself an MMDB data section containing a map. */ +static int parse_metadata(MMDB_DB *db, const uint8_t *meta_buf, size_t meta_len) +{ + int type; + uint32_t map_size; + size_t off; + int err; + uint32_t i; + int ktype; + uint32_t ksize; + size_t koff; + size_t val_off; + int vtype; + uint32_t vsize; + size_t voff; + uint32_t val_u32; + uint16_t val_u16; + uint64_t val_u64; + size_t copy_len; + + err = decode_ctrl(meta_buf, meta_len, 0, &type, &map_size, &off); + if (err) + return MMDB_ERR_INVALID_DB; + if (type != DT_MAP) + return MMDB_ERR_INVALID_DB; + + memset(&db->metadata, 0, sizeof(db->metadata)); + + for (i = 0; i < map_size; i++) + { + /* Decode key */ + err = resolve_entry(meta_buf, meta_len, off, &ktype, &ksize, &koff); + if (err) + return MMDB_ERR_INVALID_DB; + if (ktype != DT_STRING) + return MMDB_ERR_INVALID_DB; + if (koff + ksize > meta_len) + return MMDB_ERR_INVALID_DB; + + /* Skip key in stream */ + err = skip_value(meta_buf, meta_len, off, &val_off, 0); + if (err) + return MMDB_ERR_INVALID_DB; + + /* Decode value depending on which key this is */ + err = resolve_entry(meta_buf, meta_len, val_off, &vtype, &vsize, &voff); + if (err) + return MMDB_ERR_INVALID_DB; + + if (ksize == 10 && memcmp(meta_buf + koff, "node_count", 10) == 0) + { + if (vtype != DT_UINT32 && vtype != DT_UINT16) + return MMDB_ERR_INVALID_DB; + err = decode_uint32(meta_buf, meta_len, vsize, voff, &val_u32); + if (err) + return MMDB_ERR_INVALID_DB; + db->metadata.node_count = val_u32; + } else if (ksize == 11 && memcmp(meta_buf + koff, "record_size", 11) == 0) + { + if (vtype != DT_UINT16 && vtype != DT_UINT32) + return MMDB_ERR_INVALID_DB; + err = decode_uint16(meta_buf, meta_len, vsize, voff, &val_u16); + if (err) + return MMDB_ERR_INVALID_DB; + db->metadata.record_size = val_u16; + } else if (ksize == 10 && memcmp(meta_buf + koff, "ip_version", 10) == 0) + { + if (vtype != DT_UINT16 && vtype != DT_UINT32) + return MMDB_ERR_INVALID_DB; + err = decode_uint16(meta_buf, meta_len, vsize, voff, &val_u16); + if (err) + return MMDB_ERR_INVALID_DB; + db->metadata.ip_version = val_u16; + } else if (ksize == 11 && memcmp(meta_buf + koff, "build_epoch", 11) == 0) + { + if (vtype != DT_UINT64 && vtype != DT_UINT32) + return MMDB_ERR_INVALID_DB; + err = decode_uint64(meta_buf, meta_len, vsize, voff, &val_u64); + if (err) + return MMDB_ERR_INVALID_DB; + db->metadata.build_epoch = val_u64; + } else if (ksize == 13 && memcmp(meta_buf + koff, "database_type", 13) == 0) + { + if (vtype != DT_STRING) + return MMDB_ERR_INVALID_DB; + copy_len = vsize; + if (copy_len >= sizeof(db->metadata.database_type)) + copy_len = sizeof(db->metadata.database_type) - 1; + if (voff + copy_len > meta_len) + return MMDB_ERR_INVALID_DB; + memcpy(db->metadata.database_type, meta_buf + voff, copy_len); + db->metadata.database_type[copy_len] = '\0'; + } + + /* Skip the value in the stream to advance to the next key */ + err = skip_value(meta_buf, meta_len, val_off, &off, 0); + if (err) + return MMDB_ERR_INVALID_DB; + } + + /* Validate required fields */ + if (db->metadata.node_count == 0 || + db->metadata.record_size == 0 || + (db->metadata.ip_version != 4 && db->metadata.ip_version != 6)) + { + return MMDB_ERR_INVALID_DB; + } + + /* Only 24, 28, 32 bit records are supported */ + if (db->metadata.record_size != 24 && + db->metadata.record_size != 28 && + db->metadata.record_size != 32) + { + return MMDB_ERR_INVALID_DB; + } + + return MMDB_OK; +} + +/* Search tree traversal */ + +/* Read a single record from a node. bit=0 for left, bit=1 for right. */ +static int read_node(const uint8_t *buf, size_t buflen, + uint32_t node, uint32_t bit, uint32_t record_size, + uint32_t node_offset_mult, uint32_t *value) +{ + size_t offset = (size_t)node * node_offset_mult; + size_t o; + + switch (record_size) + { + case 24: + { + o = offset + bit * 3; + if (o + 3 > buflen) + return MMDB_ERR_CORRUPT; + *value = ((uint32_t)buf[o] << 16) | + ((uint32_t)buf[o + 1] << 8) | + (uint32_t)buf[o + 2]; + return MMDB_OK; + } + case 28: + { + if (offset + 7 > buflen) + return MMDB_ERR_CORRUPT; + if (bit == 0) + { + *value = (((uint32_t)buf[offset + 3] & 0xF0) << 20) | + ((uint32_t)buf[offset] << 16) | + ((uint32_t)buf[offset + 1] << 8) | + (uint32_t)buf[offset + 2]; + } else + { + *value = (((uint32_t)buf[offset + 3] & 0x0F) << 24) | + ((uint32_t)buf[offset + 4] << 16) | + ((uint32_t)buf[offset + 5] << 8) | + (uint32_t)buf[offset + 6]; + } + return MMDB_OK; + } + case 32: + { + o = offset + bit * 4; + if (o + 4 > buflen) + return MMDB_ERR_CORRUPT; + *value = ((uint32_t)buf[o] << 24) | + ((uint32_t)buf[o + 1] << 16) | + ((uint32_t)buf[o + 2] << 8) | + (uint32_t)buf[o + 3]; + return MMDB_OK; + } + } + return MMDB_ERR_CORRUPT; +} + +/* Traverse the search tree for a 128-bit IP (IPv6 or IPv4-mapped). */ +static int traverse_tree(MMDB_DB *db, const uint8_t ip[16], + int start_bit, uint32_t start_node, + uint32_t *result_node, int *prefix_len) +{ + uint32_t node = start_node; + uint32_t node_count = db->metadata.node_count; + uint32_t record_size = db->metadata.record_size; + uint32_t node_offset_mult = record_size / 4; + uint32_t bit; + int i; + int err; + + for (i = start_bit; i < 128 && node < node_count; i++) + { + bit = (ip[i >> 3] >> (7 - (i & 7))) & 1; + err = read_node(db->data, db->data_size, node, bit, + record_size, node_offset_mult, &node); + if (err) + return err; + } + + *result_node = node; + *prefix_len = i; + return MMDB_OK; +} + +/* Pre-walk the first 96 zero bits to find the IPv4 subtree start + * in an IPv6 database. */ +static int find_ipv4_start(MMDB_DB *db) +{ + uint32_t node = 0; + uint32_t node_count = db->metadata.node_count; + uint32_t record_size = db->metadata.record_size; + uint32_t node_offset_mult = record_size / 4; + int i; + int err; + + db->ipv4_start_bit_depth = 96; + + for (i = 0; i < 96 && node < node_count; i++) + { + err = read_node(db->data, db->data_size, node, 0, + record_size, node_offset_mult, &node); + if (err) + return err; + } + + db->ipv4_start_node = node; + db->ipv4_start_bit_depth = i; + return MMDB_OK; +} + +/* Lookup core */ + +static int lookup_ip128(MMDB_DB *db, const uint8_t ip[16], int is_ipv4, + MMDB_Result *result) +{ + uint32_t node; + int prefix_len; + int err; + int start_bit; + uint32_t start_node; + size_t data_offset; + + result->db = db; + result->offset = 0; + result->has_data = 0; + + if (is_ipv4) + { + start_bit = db->ipv4_start_bit_depth; + start_node = db->ipv4_start_node; + } else + { + start_bit = 0; + start_node = 0; + } + + err = traverse_tree(db, ip, start_bit, start_node, &node, &prefix_len); + if (err) + return err; + + if (node == db->metadata.node_count) + { + /* No data for this IP */ + return MMDB_OK; + } + if (node > db->metadata.node_count) + { + /* Pointer into data section */ + data_offset = (size_t)(node - db->metadata.node_count) - DATA_SEPARATOR_SIZE; + if (data_offset >= db->data_section_size) + return MMDB_ERR_CORRUPT; + result->offset = data_offset; + result->has_data = 1; + return MMDB_OK; + } + + return MMDB_ERR_CORRUPT; +} + +/* Public API */ + +MMDB_Status mmdb_open(MMDB_DB *db, const char *filename) +{ + const uint8_t *meta_start; + size_t meta_offset; + size_t search_tree_size; + size_t meta_marker_offset; + int err; + + if (!db || !filename) + return MMDB_ERR_BADARG; + + memset(db, 0, sizeof(*db)); + + db->data = mmdb_mmap_file(filename, &db->data_size, &db->is_mmap); + if (!db->data) + return MMDB_ERR_OPEN; + + /* Find metadata marker */ + meta_start = find_metadata(db->data, db->data_size); + if (!meta_start) + { + mmdb_close(db); + return MMDB_ERR_INVALID_DB; + } + + meta_offset = (size_t)(meta_start - db->data) + METADATA_MARKER_LEN; + + /* Parse metadata */ + err = parse_metadata(db, db->data + meta_offset, + db->data_size - meta_offset); + if (err) + { + mmdb_close(db); + return err; + } + + /* Calculate section offsets. Per the spec: + * search_tree_size = (record_size * 2 / 8) * node_count + * = (record_size / 4) * node_count + */ + if (db->metadata.node_count > SIZE_MAX / (db->metadata.record_size / 4)) + { + mmdb_close(db); + return MMDB_ERR_INVALID_DB; + } + search_tree_size = (size_t)(db->metadata.record_size / 4) * + (size_t)db->metadata.node_count; + db->data_section_offset = search_tree_size + DATA_SEPARATOR_SIZE; + + /* data section ends where the metadata marker begins */ + meta_marker_offset = (size_t)(meta_start - db->data); + if (db->data_section_offset > meta_marker_offset) + { + mmdb_close(db); + return MMDB_ERR_INVALID_DB; + } + db->data_section_size = meta_marker_offset - db->data_section_offset; + + /* For IPv6 databases, find the IPv4 subtree start */ + if (db->metadata.ip_version == 6) + { + err = find_ipv4_start(db); + if (err) + { + mmdb_close(db); + return err; + } + } else + { + db->ipv4_start_node = 0; + db->ipv4_start_bit_depth = 96; + } + + return MMDB_OK; +} + +void mmdb_close(MMDB_DB *db) +{ + if (!db) + return; + if (db->data) + { + if (db->is_mmap) + mmdb_munmap(db->data, db->data_size); + else + free(db->data); + db->data = NULL; + } + db->data_size = 0; +} + +MMDB_Status mmdb_lookup(MMDB_DB *db, const char *ip_str, MMDB_Result *result) +{ + uint8_t ip128[16]; + struct in_addr addr4; + struct in6_addr addr6; + + if (!db || !db->data || !ip_str || !result) + return MMDB_ERR_BADARG; + + memset(ip128, 0, sizeof(ip128)); + + if (inet_pton(AF_INET, ip_str, &addr4) == 1) + { + /* MMDB always uses a 128-bit search buffer. + * IPv4 goes in the last 4 bytes (offset 12). + * The is_ipv4 flag skips the first 96 bits + * so traversal begins at the IPv4 address. + */ + memcpy(ip128 + 12, &addr4.s_addr, 4); + return lookup_ip128(db, ip128, 1, result); + } + + if (inet_pton(AF_INET6, ip_str, &addr6) == 1) + { + if (db->metadata.ip_version == 4) + return MMDB_ERR_IPV6_IN_V4; + memcpy(ip128, addr6.s6_addr, 16); + return lookup_ip128(db, ip128, 0, result); + } + + return MMDB_ERR_BADARG; +} + +MMDB_Status mmdb_lookup_sockaddr(MMDB_DB *db, const struct sockaddr *sa, + MMDB_Result *result) +{ + uint8_t ip128[16]; + const struct sockaddr_in *sa4; + const struct sockaddr_in6 *sa6; + + if (!db || !db->data || !sa || !result) + return MMDB_ERR_BADARG; + + memset(ip128, 0, sizeof(ip128)); + + if (sa->sa_family == AF_INET) + { + sa4 = (const struct sockaddr_in *)sa; + memcpy(ip128 + 12, &sa4->sin_addr.s_addr, 4); + return lookup_ip128(db, ip128, 1, result); + } + + if (sa->sa_family == AF_INET6) + { + sa6 = (const struct sockaddr_in6 *)sa; + if (db->metadata.ip_version == 4) + return MMDB_ERR_IPV6_IN_V4; + memcpy(ip128, sa6->sin6_addr.s6_addr, 16); + return lookup_ip128(db, ip128, 0, result); + } + + return MMDB_ERR_BADARG; +} + +static MMDB_Status mmdb_get_str_raw_va(MMDB_Result *result, const char **out, size_t *len, va_list ap) +{ + size_t offset; + int err; + int type; + uint32_t size; + size_t data_off; + const uint8_t *dsec; + size_t dsec_len; + + if (!result || !result->db || !out || !len) + return MMDB_ERR_BADARG; + + *out = NULL; + *len = 0; + + if (!result->has_data) + return MMDB_ERR_NODATA; + + dsec = result->db->data + result->db->data_section_offset; + dsec_len = result->db->data_section_size; + + /* Walk the path */ + err = walk_path(dsec, dsec_len, result->offset, ap, &offset); + if (err) + return err; + + /* Resolve the final value */ + err = resolve_entry(dsec, dsec_len, offset, &type, &size, &data_off); + if (err) + return err; + if (type != DT_STRING) + return MMDB_ERR_TYPE; + if (data_off + size > dsec_len) + return MMDB_ERR_CORRUPT; + + *out = (const char *)(dsec + data_off); + *len = (size_t)size; + return MMDB_OK; +} + +MMDB_Status mmdb_do_get_str(MMDB_Result *result, char **out, ...) +{ + const char *ptr; + size_t len; + va_list ap; + int err; + + if (!out) + return MMDB_ERR_BADARG; + + *out = NULL; + va_start(ap, out); + err = mmdb_get_str_raw_va(result, &ptr, &len, ap); + va_end(ap); + if (err) + return err; + + *out = malloc(len + 1); + if (!*out) + return MMDB_ERR_OPEN; + memcpy(*out, ptr, len); + (*out)[len] = '\0'; + return MMDB_OK; +} + +MMDB_Status mmdb_do_get_uint32(MMDB_Result *result, uint32_t *out, ...) +{ + va_list ap; + size_t offset; + int err; + int type; + uint32_t size; + size_t data_off; + const uint8_t *dsec; + size_t dsec_len; + + if (!result || !result->db || !out) + return MMDB_ERR_BADARG; + + *out = 0; + + if (!result->has_data) + return MMDB_ERR_NODATA; + + dsec = result->db->data + result->db->data_section_offset; + dsec_len = result->db->data_section_size; + + /* Walk the path */ + va_start(ap, out); + err = walk_path(dsec, dsec_len, result->offset, ap, &offset); + va_end(ap); + if (err) + return err; + + /* Resolve the final value */ + err = resolve_entry(dsec, dsec_len, offset, &type, &size, &data_off); + if (err) + return err; + + /* Accept both uint16 and uint32 */ + if (type == DT_UINT32 || type == DT_UINT16) + { + return decode_uint32(dsec, dsec_len, size, data_off, out); + } + + return MMDB_ERR_TYPE; +} + +MMDB_Status mmdb_do_get_bool(MMDB_Result *result, int *out, ...) +{ + va_list ap; + size_t offset; + int err; + int type; + uint32_t size; + size_t data_off; + const uint8_t *dsec; + size_t dsec_len; + + if (!result || !result->db || !out) + return MMDB_ERR_BADARG; + + *out = 0; + + if (!result->has_data) + return MMDB_ERR_NODATA; + + dsec = result->db->data + result->db->data_section_offset; + dsec_len = result->db->data_section_size; + + va_start(ap, out); + err = walk_path(dsec, dsec_len, result->offset, ap, &offset); + va_end(ap); + if (err) + return err; + + err = resolve_entry(dsec, dsec_len, offset, &type, &size, &data_off); + if (err) + return err; + + if (type != DT_BOOL) + return MMDB_ERR_TYPE; + + /* Per spec, boolean size is 0 (false) or 1 (true), no payload */ + *out = (size != 0) ? 1 : 0; + return MMDB_OK; +} + +MMDB_Status mmdb_do_get_double(MMDB_Result *result, double *out, ...) +{ + va_list ap; + size_t offset; + int err; + int type; + uint32_t size; + size_t data_off; + const uint8_t *dsec; + size_t dsec_len; + float f; + + if (!result || !result->db || !out) + return MMDB_ERR_BADARG; + + *out = 0; + + if (!result->has_data) + return MMDB_ERR_NODATA; + + dsec = result->db->data + result->db->data_section_offset; + dsec_len = result->db->data_section_size; + + va_start(ap, out); + err = walk_path(dsec, dsec_len, result->offset, ap, &offset); + va_end(ap); + if (err) + return err; + + err = resolve_entry(dsec, dsec_len, offset, &type, &size, &data_off); + if (err) + return err; + + if (type == DT_FLOAT64) + { + return decode_float64(dsec, dsec_len, size, data_off, out); + } + if (type == DT_FLOAT32) + { + /* Promote float32 to double */ + err = decode_float32(dsec, dsec_len, size, data_off, &f); + if (err) + return err; + *out = (double)f; + return MMDB_OK; + } + + return MMDB_ERR_TYPE; +} + +const char *mmdb_strerror(MMDB_Status err) +{ + switch (err) + { + case MMDB_OK: + return "Success"; + case MMDB_ERR_OPEN: + return "Could not open database file"; + case MMDB_ERR_INVALID_DB: + return "Invalid MMDB database"; + case MMDB_ERR_BADARG: + return "Invalid argument"; + case MMDB_ERR_CORRUPT: + return "Corrupt database (search tree or data section)"; + case MMDB_ERR_NODATA: + return "No data found for the requested path"; + case MMDB_ERR_TYPE: + return "Data type mismatch"; + case MMDB_ERR_IPV6_IN_V4: + return "Cannot look up IPv6 address in IPv4-only database"; + default: + return "Unknown error"; + } +}