skip to Main Content

Our IP address ranges table has ~2.8 million records, of the following structure:

{
  start_ip: integer,    // IPv4 converted to int32
  end_ip: integer,      // IPv4 converted to int32
  country_code: string,
  country: string,
  region: string,
  city: string
}

There is a compound index on start_ip and end_ip:

{ start_ip: 1, end_ip: 1 }

To get records from the table (read: to know what range the given IP belongs to) we use this query:

{"start_ip"=>{"$lte"=>3284004939}, "end_ip"=>{"$gte"=>3284004939}}

However we observe an unacceptably low performance of the query. Here is the explain example:

{
  "explainVersion": "2",
  "queryPlanner": {
    "namespace": "restclass_development.ip_ranges",
    "indexFilterSet": false,
    "parsedQuery": {
      "$and": [
        {
          "start_ip": {
            "$lte": 3284004939
          }
        },
        {
          "end_ip": {
            "$gte": 3284004939
          }
        }
      ]
    },
    "queryHash": "379E557D",
    "planCacheKey": "E1838ADA",
    "maxIndexedOrSolutionsReached": false,
    "maxIndexedAndSolutionsReached": false,
    "maxScansToExplodeReached": false,
    "winningPlan": {
      "queryPlan": {
        "stage": "FETCH",
        "planNodeId": 2,
        "inputStage": {
          "stage": "IXSCAN",
          "planNodeId": 1,
          "keyPattern": {
            "start_ip": 1,
            "end_ip": 1
          },
          "indexName": "start_ip_1_end_ip_1",
          "isMultiKey": false,
          "multiKeyPaths": {
            "start_ip": [],
            "end_ip": []
          },
          "isUnique": false,
          "isSparse": false,
          "isPartial": false,
          "indexVersion": 2,
          "direction": "forward",
          "indexBounds": {
            "start_ip": [
              "[-inf.0, 3284004939]"
            ],
            "end_ip": [
              "[3284004939, inf.0]"
            ]
          }
        }
      },
      "slotBasedPlan": {
        "slots": "$$RESULT=s25 env: { s9 = {"start_ip" : 1, "end_ip" : 1}, s5 = IndexBounds("field #0['start_ip']: [-inf.0, 3284004939], field #1['end_ip']: [3284004939, inf.0]"), s3 = 1699191077268 (NOW), s24 = true, s1 = TimeZoneDatabase(Africa/Ceuta...Etc/GMT-12) (timeZoneDB), s2 = Nothing (SEARCH_META), s13 = Nothing }",
        "stages": "[2] nlj inner [] [s19, s20, s21, s22, s23] n    left n        [1] branch {s24} [s19, s20, s21, s22, s23] n        [s4, s6, s7, s8, s9] [1] ixscan_generic s5 s8 s4 s6 s7 lowPriority [] @"99a3fa22-8459-4875-bbc9-3cf3ee9afc55" @"start_ip_1_end_ip_1" true n        [s10, s16, s17, s18, s9] [1] nlj inner [] [s11, s12] n            left n                [1] project [s11 = getField(s14, "l"), s12 = getField(s14, "h")] n                [1] unwind s14 s15 s13 false n                [1] limit 1 n                [1] coscan n            right n                [1] ixseek s11 s12 s18 s10 s16 s17 [] @"99a3fa22-8459-4875-bbc9-3cf3ee9afc55" @"start_ip_1_end_ip_1" true n    right n        [2] limit 1 n        [2] seek s19 s25 s26 s20 s21 s22 s23 [] @"99a3fa22-8459-4875-bbc9-3cf3ee9afc55" true false n"
      }
    },
    "rejectedPlans": []
  },
  "executionStats": {
    "executionSuccess": true,
    "nReturned": 1,
    "executionTimeMillis": 1276,
    "totalKeysExamined": 2422998,
    "totalDocsExamined": 1,
    "executionStages": {
      "stage": "nlj",
      "planNodeId": 2,
      "nReturned": 1,
      "executionTimeMillisEstimate": 1276,
      "opens": 1,
      "closes": 1,
      "saveState": 1,
      "restoreState": 1,
      "isEOF": 1,
      "totalDocsExamined": 1,
      "totalKeysExamined": 2422998,
      "collectionScans": 0,
      "collectionSeeks": 1,
      "indexScans": 0,
      "indexSeeks": 1,
      "indexesUsed": [
        "start_ip_1_end_ip_1",
        "start_ip_1_end_ip_1"
      ],
      "innerOpens": 1,
      "innerCloses": 1,
      "outerProjects": [],
      "outerCorrelated": [
        19,
        20,
        21,
        22,
        23
      ],
      "outerStage": {
        "stage": "branch",
        "planNodeId": 1,
        "nReturned": 1,
        "executionTimeMillisEstimate": 1276,
        "opens": 1,
        "closes": 1,
        "saveState": 1,
        "restoreState": 1,
        "isEOF": 1,
        "numTested": 1,
        "thenBranchOpens": 1,
        "thenBranchCloses": 1,
        "elseBranchOpens": 0,
        "elseBranchCloses": 0,
        "filter": "s24 ",
        "thenSlots": [
          4,
          6,
          7,
          8,
          9
        ],
        "elseSlots": [
          10,
          16,
          17,
          18,
          9
        ],
        "outputSlots": [
          19,
          20,
          21,
          22,
          23
        ],
        "thenStage": {
          "stage": "ixscan_generic",
          "planNodeId": 1,
          "nReturned": 1,
          "executionTimeMillisEstimate": 1276,
          "opens": 1,
          "closes": 1,
          "saveState": 1,
          "restoreState": 1,
          "isEOF": 1,
          "indexName": "start_ip_1_end_ip_1",
          "keysExamined": 2422998,
          "seeks": 2422997,
          "numReads": 2422998,
          "indexKeySlot": 8,
          "recordIdSlot": 4,
          "snapshotIdSlot": 6,
          "indexIdentSlot": 7,
          "outputSlots": [],
          "indexKeysToInclude": "00000000000000000000000000000000"
        },
        "elseStage": {
          "stage": "nlj",
          "planNodeId": 1,
          "nReturned": 0,
          "executionTimeMillisEstimate": 0,
          "opens": 0,
          "closes": 0,
          "saveState": 1,
          "restoreState": 1,
          "isEOF": 0,
          "totalDocsExamined": 0,
          "totalKeysExamined": 0,
          "collectionScans": 0,
          "collectionSeeks": 0,
          "indexScans": 0,
          "indexSeeks": 0,
          "indexesUsed": [
            "start_ip_1_end_ip_1"
          ],
          "innerOpens": 0,
          "innerCloses": 0,
          "outerProjects": [],
          "outerCorrelated": [
            11,
            12
          ],
          "outerStage": {
            "stage": "project",
            "planNodeId": 1,
            "nReturned": 0,
            "executionTimeMillisEstimate": 0,
            "opens": 0,
            "closes": 0,
            "saveState": 1,
            "restoreState": 1,
            "isEOF": 0,
            "projections": {
              "11": "getField(s14, "l") ",
              "12": "getField(s14, "h") "
            },
            "inputStage": {
              "stage": "unwind",
              "planNodeId": 1,
              "nReturned": 0,
              "executionTimeMillisEstimate": 0,
              "opens": 0,
              "closes": 0,
              "saveState": 1,
              "restoreState": 1,
              "isEOF": 0,
              "inputSlot": 13,
              "outSlot": 14,
              "outIndexSlot": 15,
              "preserveNullAndEmptyArrays": 0,
              "inputStage": {
                "stage": "limit",
                "planNodeId": 1,
                "nReturned": 0,
                "executionTimeMillisEstimate": 0,
                "opens": 0,
                "closes": 0,
                "saveState": 1,
                "restoreState": 1,
                "isEOF": 0,
                "limit": 1,
                "inputStage": {
                  "stage": "coscan",
                  "planNodeId": 1,
                  "nReturned": 0,
                  "executionTimeMillisEstimate": 0,
                  "opens": 0,
                  "closes": 0,
                  "saveState": 1,
                  "restoreState": 1,
                  "isEOF": 0
                }
              }
            }
          },
          "innerStage": {
            "stage": "ixseek",
            "planNodeId": 1,
            "nReturned": 0,
            "executionTimeMillisEstimate": 0,
            "opens": 0,
            "closes": 0,
            "saveState": 1,
            "restoreState": 1,
            "isEOF": 0,
            "indexName": "start_ip_1_end_ip_1",
            "keysExamined": 0,
            "seeks": 0,
            "numReads": 0,
            "indexKeySlot": 18,
            "recordIdSlot": 10,
            "snapshotIdSlot": 16,
            "indexIdentSlot": 17,
            "outputSlots": [],
            "indexKeysToInclude": "00000000000000000000000000000000",
            "seekKeyLow": "s11 ",
            "seekKeyHigh": "s12 "
          }
        }
      },
      "innerStage": {
        "stage": "limit",
        "planNodeId": 2,
        "nReturned": 1,
        "executionTimeMillisEstimate": 0,
        "opens": 1,
        "closes": 1,
        "saveState": 1,
        "restoreState": 1,
        "isEOF": 1,
        "limit": 1,
        "inputStage": {
          "stage": "seek",
          "planNodeId": 2,
          "nReturned": 1,
          "executionTimeMillisEstimate": 0,
          "opens": 1,
          "closes": 1,
          "saveState": 1,
          "restoreState": 1,
          "isEOF": 0,
          "numReads": 1,
          "recordSlot": 25,
          "recordIdSlot": 26,
          "seekKeySlot": 19,
          "snapshotIdSlot": 20,
          "indexIdentSlot": 21,
          "indexKeySlot": 22,
          "indexKeyPatternSlot": 23,
          "fields": [],
          "outputSlots": []
        }
      }
    },
    "allPlansExecution": []
  },
  "command": {
    "find": "ip_ranges",
    "filter": {
      "start_ip": {
        "$lte": 3284004939
      },
      "end_ip": {
        "$gte": 3284004939
      }
    },
    "$db": "restclass_development"
  },
  "serverInfo": {
    "host": "sun.arg.network",
    "port": 27017,
    "version": "7.0.2",
    "gitVersion": "02b3c655e1302209ef046da6ba3ef6749dd0b62a"
  },
  "serverParameters": {
    "internalQueryFacetBufferSizeBytes": 104857600,
    "internalQueryFacetMaxOutputDocSizeBytes": 104857600,
    "internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
    "internalDocumentSourceGroupMaxMemoryBytes": 104857600,
    "internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
    "internalQueryProhibitBlockingMergeOnMongoS": 0,
    "internalQueryMaxAddToSetBytes": 104857600,
    "internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
    "internalQueryFrameworkControl": "trySbeEngine"
  },
  "ok": 1
}

This query is executed in ~1.5 seconds. What would be a more time efficient query and/or indexing strategy for our use case?

2

Answers


  1. On a MacBook Pro M2 Max 64GB machine running mongod 6.0.4, 3m documents in a collection with a uniform distribution of IP ranges, executing

    db.foo.find({start_ip:{$lte:190000}, end_ip:{$gte:190000}}).explain(true);
    

    without an index yields:

      executionStats: {
        executionSuccess: true,
        nReturned: 1,
        **executionTimeMillis: 564**,
        totalKeysExamined: 0,
        totalDocsExamined: 3000000,
        executionStages: {
          stage: 'COLLSCAN',
    

    With index:

    db.foo.createIndex({start_ip:1, end_ip:1});                                  
    

    yields:

      executionStats: {
        executionSuccess: true,
        nReturned: 1,
        **executionTimeMillis: 17**,
        totalKeysExamined: 19002,
        totalDocsExamined: 1,
        executionStages: {
          stage: 'FETCH',
          nReturned: 1,
          executionTimeMillisEstimate: 1,
          works: 19002,
          advanced: 1,
          needTime: 19000,
          needYield: 0,
          saveState: 19,
          restoreState: 19,
          isEOF: 1,
          docsExamined: 1,
          alreadyHasObj: 0,
          inputStage: {
            stage: 'IXSCAN',
            nReturned: 1,
            executionTimeMillisEstimate: 1,
    

    3m docs of this size (avg 120 bytes) is "small" both non-indexed and indexed fetches reflect same. Are you sure you’re not somehow dragging back 1000s or more docs and the delay is in the network transmission?

    Login or Signup to reply.
  2. NOTE: This is a non-orthodox approach to solving the problem.

    First, acknowledgements to John Page (https://www.mongodb.com/developer/author/john-page), one of the all-time MongoDB greats, for first exploring this approach.

    Often we encounter data range challenges in our data design and querying. The OP has approached this in the traditional way: create two fields, start_THING and end_THING, set up a compound key on the fields, and use start_THING $lte target and end_THING $gte target to quickly find one doc. Or, to find things in a range, start_THING $gte lowerBound and end_THING $lt upperBound.

    Another way to do this is by using a single geospatial field with a 2dsphere index on it. Here’s how this approach would work in the context of the OP data:

    • IPv4 addresses start at 0.0.0.0 and go to 255.255.255.255. The OP notes that a conversion to integer is clearly possible. The range of IPs in integer terms is 0 to 4294967295 (256 ** 4).
    • Consider the IP range 10.3.232.0 to 10.3.232.5. This is 168028160 and 168028165 in integer terms, respectively.
    • Instead of storing 2 fields, we scale these integers to the range of longitudes (-180.0 to 180.0) as follows (all numbers including constants shown to provide clarity:
        geo_start = ((168028160 / 4294967295) * (180 - (-180))) + (-180);
        geo_end = ((168028165 / 4294967295) * (180 - (-180))) + (-180);
    
    • We then store these in a single geo field; note the latitude is essentially unused and set to 0.0:
        geo: { type: "LineString", coordinates: [ [ geo_start, 0.0 ], [geo_end, 0.0 ] ] }
    

    Here is an example of a data record fully populated:

    {
      _id: ObjectId("6549055e795d8927e4779193"),
      N: 1000,
      start_ip: '10.3.232.0',
      end_ip: '10.3.232.5',
      int_start_ip: 168028160,
      int_end_ip: 168028165,
      geo: {
        type: 'LineString',
        coordinates: [ [ -165.91604232460168, 0 ], [ -165.91604190550652, 0 ] ]
      },
      country_code: 'XXX',
      country: 'Xland',
      region: 'Mars',
      city: 'Foo'
    }
    
    • We index the field with:
        db.foo.createIndex({geo:"2dsphere"})  
    
    • Now it is possible to lookup an item by converting a start and/or end IP to scaled longitude and using $geoIntersects:
        function convertToGeo(sip) {
            q = sip.split('.');
    
            var iip = 0;
            for(var n = 0; n < 4; n++) {
                iip += (parseInt(q[n]) * (256 ** (3 - n)));
            }
            return ((iip / new NumberLong("4294967295")) * 360 ) - 180 ;
        }
    
        var gsip = convertToGeo('10.232.0.3'); // .3 is in between .0 and .5; see data record above
        var xx = db.foo.findOne({
            geo: {
                $geoIntersects: {
                    $geometry: { type: "LineString",
                         // Must create a second discrete point for a line, so make it 0.1
                         coordinates: [ [ gsip, 0.0 ], gsip, 0.1 ] ] }
                }
            }
        };
        // The doc above is found!
    

    Sampling 100 random IPs in a population of 3m items show the single geo index approach working much faster than the compound index approach, typically taking no more than 1 millisecond:

    100 samples of GEO approach:
    {
      count: 100,
      execMillis_min: 0,
      execMillis_max: 1,
      execMillis_tot: 16,
      execMillis_avg: 0,
      totKeysExamined_min: 296,
      totKeysExamined_max: 648,
      totKeysExamined_tot: 48327,
      totKeysExamined_avg: 3.744628643254086,
      totDocsExamined_min: 222,
      totDocsExamined_max: 556,
      totDocsExamined_tot: 39977,
      totDocsExamined_avg: 2.806450990888347
    }
    One of the explains:
    {
      explainVersion: '1',
      queryPlanner: {
      ...
          inputStage: {
            stage: 'IXSCAN',
            keyPattern: { geo: '2dsphere' },
            indexName: 'geo_2dsphere',
    
      executionStats: {
        executionSuccess: true,
        nReturned: 1,
        executionTimeMillis: 1,
        totalKeysExamined: 353,
        totalDocsExamined: 266,
            keysExamined: 353,
            seeks: 21,
            dupsTested: 332,
            dupsDropped: 66
          }
        },
     ...
    }
    
    100 samples of COMPOUND start/end approach:
    {
      count: 100,
      execMillis_min: 48,
      execMillis_max: 1360,
      execMillis_tot: 63096,
      execMillis_avg: 630.96,
      totKeysExamined_min: 10000,
      totKeysExamined_max: 2859339,
      totKeysExamined_tot: 137167079,
      totKeysExamined_avg: 19980.70323149996,
      totDocsExamined_min: 1,
      totDocsExamined_max: 1,
      totDocsExamined_tot: 100,
      totDocsExamined_avg: 0.010102051554122065
    }
    
    One of the explains:
    {
      explainVersion: '1',
      queryPlanner: {
          inputStage: {
            stage: 'IXSCAN',
            keyPattern: {
              int_start_ip: 1,
              int_end_ip: 1
            },
            indexName: 'int_start_ip_1_int_end_ip_1',
      ...
      executionStats: {
        executionSuccess: true,
        nReturned: 1,
        executionTimeMillis: 1051,
        totalKeysExamined: 2311153,
        totalDocsExamined: 1,
            keysExamined: 2311153,
            seeks: 2311152,
            dupsTested: 0,
            dupsDropped: 0
    }
    
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search