skip to Main Content

I’m creating a chatbot RAG using the content from my MongoDB and sending to create vectors on Pinecone.

So my user can ask stuff to my chatbot about his reflections.

Here is my code:

import os
from pymongo import MongoClient
from pinecone import Pinecone, ServerlessSpec
from pymongo.errors import OperationFailure
from sentence_transformers import SentenceTransformer, util
from certifi import where  # Import certifi library

# mongodb stuff
client = MongoClient(
    "my-mongodb-uri",
    tls=True,  # Enable TLS encryption
    tlsAllowInvalidCertificates=False,  # Don't allow invalid certificates
    tlsCAFile=where()  # Use certifi library for CA bundle
)
db = client['test']
collection = db['reflections']

# Pinecone initialization
pc = Pinecone(api_key='my-api-key')
index = pc.Index("langchain-demo")

# transformer stuff
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Watch for changes
try:
  cursor = collection.watch()
  for change in cursor:
    print("Change detected:", change)
    if change['operationType'] == 'insert':
      document = change['fullDocument']
      vector = model.encode(document['content'])  # Assuming 'content' is the field
      print("Extracted Vector:", vector)

      # Extract document ID from ObjectId
      document_id = str(document['_id']).split("'")[1]

      # Wrap upsert call with empty vector check
      if vector:  # Check if vector is not empty
        index.upsert(vectors={document_id: vector})

    elif change['operationType'] == 'update':
      document_id = str(change['documentKey']['_id'])
      updated_fields = change['updateDescription']['updatedFields']
      if 'content' in updated_fields:
        vector = model.encode(updated_fields['content'])
        index.upsert(vectors=[document_id], data=vector.tolist())

    elif change['operationType'] == 'delete':
      document_id = str(change['documentKey']['_id'])
      index.delete(ids=[document_id])

except OperationFailure as e:
  print("Error watching collection:", e)
except Exception as e:
  print("An error occurred:", e)

This is the log I received on my terminal:

Change detected: {'_id': {'_data': '82665622AE0000000B2B042C0100296E5A1004F1A0DC5D2C0C4EC2843048538C6B36F3463C6F7065726174696F6E54797065003C696E736572740046646F63756D656E744B65790046645F69640064665622AE13B3C25B81FE46C1000004'}, 'operationType': 'insert', 'clusterTime': Timestamp(1716921006, 11), 'wallTime': datetime.datetime(2024, 5, 28, 18, 30, 6, 916000), 'fullDocument': {'_id': ObjectId('665622ae13b3c25b81fe46c1'), 'user': ObjectId('65d8937f6408bf2c0ca8d264'), 'content': 'teste mongodb', 'createdAt': datetime.datetime(2024, 5, 28, 18, 30, 6, 908000), '__v': 0}, 'ns': {'db': 'test', 'coll': 'reflections'}, 'documentKey': {'_id': ObjectId('665622ae13b3c25b81fe46c1')}}
Extracted Vector: [ 2.64226589e-02  5.51917292e-02 -8.01229179e-02  6.16759956e-02
  2.97571346e-03 -5.25409095e-02 -2.06136722e-02  2.41196547e-02
  1.70215759e-02  4.23866622e-02  6.73603592e-03 -5.09259291e-02
 -1.48372846e-02  7.09723681e-03 -1.48236733e-02 -1.65749993e-02
  9.42820311e-03 -3.47889923e-02  4.76156734e-02 -1.14416014e-02
 -2.76810937e-02 -7.33586177e-02  2.79922988e-02  4.48221937e-02
 -3.42520475e-02 -7.56083280e-02 -1.88546516e-02  3.71571630e-02
 -3.63041870e-02 -5.30020148e-02  4.92156222e-02  3.24101970e-02
  1.43917967e-02  2.31850450e-03 -3.07541038e-03  1.03986263e-02
  6.79664016e-02 -5.86303510e-02 -1.68009251e-02 -3.78069915e-02
  2.32911427e-02 -4.27663438e-02 -2.12721266e-02 -5.84340282e-02
  1.03256971e-01 -7.78031126e-02 -4.44727167e-02  1.10542767e-01
  6.30531460e-02 -3.19500417e-02  2.60527879e-02 -1.16486132e-01
 -5.51996529e-02  4.62782234e-02  3.89385074e-02  1.58163980e-01
 -8.12400039e-03 -3.00704502e-02 -3.35364193e-02  3.37796435e-02
  5.67190908e-02 -3.78245488e-02 -3.72845195e-02  3.34226415e-02
 -2.56197937e-02 -1.38711361e-02  3.36623588e-03  3.23332138e-02
 -4.64090845e-03 -2.81529520e-02  7.84241222e-03  1.87840331e-02
 -4.04786393e-02 -9.18242242e-03  1.42984195e-02  9.59344432e-02
 -8.56031012e-03 -9.00166705e-02  6.34619594e-02  3.46942805e-02
 -1.21315375e-01 -1.27947167e-01  2.92107705e-02 -5.98839074e-02
 -6.66733552e-03  2.20386945e-02  1.06475495e-01 -5.25924191e-02
 -4.81234193e-02 -6.64262474e-03  2.43848264e-02  1.28781358e-02
 -4.63195667e-02  7.55516142e-02  1.91857126e-02  5.11478595e-02
  7.73477629e-02  5.94875030e-02  7.93703869e-02  2.19271239e-02
 -5.33815532e-04  1.04968296e-02  7.78110474e-02 -3.95663939e-02
 -7.16580264e-03  3.37898545e-02  2.74467710e-02 -8.29642192e-02
 -4.68915589e-02 -2.53224969e-02 -1.62706897e-02  2.37261020e-02
 -3.05816010e-02  6.37660455e-03 -6.75126612e-02 -4.52077389e-03
 -4.86059487e-02 -5.44997081e-02 -1.06597044e-01  9.05475393e-02
  5.58611341e-02  7.52945840e-02 -3.28133292e-02 -2.91952137e-02
  3.31597738e-02  3.51161021e-03  8.75394344e-02  1.03704995e-33
  1.38022542e-01 -9.83591303e-02  4.43550274e-02  1.05274946e-03
  2.88495906e-02  7.61957541e-02 -2.07854919e-02  7.20968395e-02
 -8.30703005e-02  1.15298852e-03 -3.55196968e-02  1.29330147e-03
  2.64357477e-02 -5.18404879e-02  6.31415769e-02  3.08009889e-02
  3.76578197e-02  3.31700668e-02  1.30407363e-02  2.17529833e-02
  2.64088046e-02 -2.77639963e-02 -5.22936359e-02 -1.95870139e-02
  6.81351684e-03  6.55588508e-02 -3.70829068e-02 -2.03726869e-02
 -1.98107120e-02  1.93433892e-02 -6.25248849e-02 -4.19677747e-03
 -8.86835158e-02  9.57719833e-02  9.21144336e-03  2.34254729e-02
 -4.18317653e-02 -1.78317651e-02 -9.96567160e-02  2.77951220e-03
  5.78196160e-02  2.66690087e-02  6.71238592e-03 -1.26469489e-02
 -5.32274581e-02  4.53201607e-02  4.15935442e-02 -7.02985674e-02
  8.65548104e-02  1.93077344e-02 -8.29852968e-02  1.86279765e-03
 -9.70464796e-02  3.69346216e-02  4.38100286e-02  1.50465965e-02
  1.20123737e-02 -1.99827086e-02  4.49663401e-02 -2.27664579e-02
 -5.99026829e-02  2.14360859e-02  5.63477119e-03  7.70357698e-02
  4.07660700e-04 -1.44859506e-02 -6.10246100e-02 -5.85204959e-02
  1.64570604e-02  6.53662756e-02  3.03732231e-02  3.93993221e-02
 -2.78256908e-02  1.81106180e-02 -9.54285823e-03 -4.35498394e-02
  1.26534468e-02  1.56740248e-02 -8.21447670e-02  6.25986466e-03
  6.70449436e-02 -8.75824168e-02 -8.16964507e-02  1.55098401e-02
  7.45111937e-03  1.05148785e-01 -7.09625939e-03  2.56238016e-03
  2.65282597e-02 -1.08919352e-01  3.68081091e-04  1.03041202e-01
 -1.69032291e-02 -9.65850055e-02  3.27670053e-02 -1.52392722e-33
  2.88561676e-02 -6.08335771e-02  2.32155789e-02  4.65114824e-02
  1.07367739e-01 -3.87591906e-02  3.08643673e-02  7.41644343e-03
 -5.42402901e-02 -1.43773090e-02  3.89164947e-02 -1.10371888e-01
  2.37809680e-03 -2.96618696e-02 -5.97673617e-02 -3.35118175e-02
 -5.04749045e-02 -1.19375162e-01 -8.40588752e-03 -8.33129417e-03
 -1.01422250e-01  1.81846786e-02  4.10847627e-02 -2.07867264e-03
 -1.45480633e-02 -9.40343514e-02 -3.80858555e-02 -9.28523913e-02
 -3.49474549e-02  3.57780121e-02  2.82644555e-02  5.27115576e-02
 -4.71878871e-02  7.05714822e-02  2.55910270e-02  9.02293995e-03
  8.85344148e-02  3.68806347e-02  7.09631816e-02  4.70345989e-02
 -1.22014368e-02  9.92123038e-02 -5.31965233e-02 -5.14485613e-02
  6.69255704e-02  4.21657562e-02  1.32231619e-02 -7.31633278e-03
  2.26458535e-02 -2.64296532e-02 -3.49785648e-02 -2.58285161e-02
  5.24073280e-02 -1.41270570e-02  3.76646109e-02 -5.85196391e-02
 -2.59447079e-02 -4.46911417e-02  5.75564057e-02  3.45758721e-02
  1.68277156e-02  3.87044102e-02 -1.67042874e-02  6.53192848e-02
 -1.53256878e-02 -3.99874747e-02 -1.04426391e-01  2.89602540e-02
  1.76746026e-02  6.27156952e-03 -4.18228246e-02 -1.63344350e-02
 -1.45597830e-02  7.30229691e-02  4.04479764e-02 -6.02601655e-02
 -4.42335121e-02 -1.17401704e-02  5.29759973e-02  1.76030397e-02
  1.29814809e-02 -1.15860929e-03  3.80812511e-02  2.16609016e-02
  1.23684702e-03 -7.47688487e-02  3.23086232e-02  1.71934050e-02
 -1.07854068e-01  4.13478501e-02 -1.69676244e-02  5.14116921e-02
 -6.50631189e-02  2.90679317e-02  2.16390658e-02 -1.40003591e-08
 -5.47276549e-02 -2.21079458e-02  1.12641910e-02  6.77396730e-02
  2.63435580e-02 -2.30627018e-03  4.84103598e-02  1.90388169e-02
  6.29420951e-02  4.62095030e-02 -2.75534745e-02  1.38814524e-02
 -1.55894198e-02  3.66799012e-02 -2.41456479e-02  8.84115696e-04
  3.62182893e-02 -5.34663617e-04  2.87991520e-02  7.80000463e-02
  6.44254833e-02 -1.21932197e-02  2.01403350e-02 -7.63562024e-02
  1.93959419e-02  6.84652850e-02  7.04346001e-02  8.58995169e-02
 -5.04256077e-02 -3.08988057e-02  1.17744971e-02  2.72314884e-02
  6.22073896e-02 -3.06474343e-02  1.02516115e-01  6.61610290e-02
  1.60890911e-02  7.22552836e-02 -5.08080684e-02  6.51256591e-02
 -3.40761431e-02 -1.58857908e-02  4.98002209e-02 -5.82708716e-02
 -3.21344063e-02 -1.43419847e-01  3.67835648e-02  4.03264500e-02
  4.75163683e-02 -1.04223825e-01  1.91467311e-02 -5.59284166e-02
  5.88361137e-02 -3.11761834e-02  4.66121845e-02  5.89613020e-02
  5.65763302e-02 -5.29688671e-02 -7.20504746e-02 -1.39309671e-02
  8.39550421e-02 -7.33920708e-02 -1.97879802e-02 -9.86750890e-03]

The problem is I’m receiving this error too: list index out of range
I searched on internet, try to use Gemini and OpenAI to help me and search on StackOverFlow.

Didn’t find yet someone with this particularly error in this context.

3

Answers


  1. Call the function and specify the length of the series

    prediction_series = generate_color_prediction_series(10)

    Print the generated series

    print(prediction_series)

    Login or Signup to reply.
  2. import os
    from pymongo import MongoClient
    import pinecone
    from pymongo.errors import OperationFailure
    from sentence_transformers import SentenceTransformer, util
    from certifi import where # Import certifi library

    MongoDB setup

    client = MongoClient(
    "my-mongodb-uri",
    tls=True, # Enable TLS encryption
    tlsAllowInvalidCertificates=False, # Don’t allow invalid certificates
    tlsCAFile=where() # Use certifi library for CA bundle
    )
    db = client[‘test’]
    collection = db[‘reflections’]

    Pinecone initialization

    pinecone.init(api_key=’my-api-key’, environment=’us-west1-gcp’) # Replace with your Pinecone environment
    index_name = ‘langchain-demo’
    if index_name not in pinecone.list_indexes():
    pinecone.create_index(index_name, dimension=384) # Adjust dimension according to your embedding model
    index = pinecone.Index(index_name)

    Sentence Transformer model

    model = SentenceTransformer(‘sentence-transformers/all-MiniLM-L6-v2’)

    Watch for changes

    try:
    cursor = collection.watch()
    for change in cursor:
    print("Change detected:", change)
    if change[‘operationType’] == ‘insert’:
    document = change[‘fullDocument’]
    vector = model.encode(document[‘content’]).tolist() # Assuming ‘content’ is the field
    print("Extracted Vector:", vector)

            document_id = str(document['_id'])
    
            # Upsert to Pinecone
            if vector:  # Check if vector is not empty
                index.upsert([(document_id, vector)])
    
        elif change['operationType'] == 'update':
            document_id = str(change['documentKey']['_id'])
            updated_fields = change['updateDescription']['updatedFields']
            if 'content' in updated_fields:
                vector = model.encode(updated_fields['content']).tolist()
                index.upsert([(document_id, vector)])
    
        elif change['operationType'] == 'delete':
            document_id = str(change['documentKey']['_id'])
            index.delete(ids=[document_id])
    

    except OperationFailure as e:
    print("Error watching collection:", e)
    except Exception as e:
    print("An error occurred:", e)

    Login or Signup to reply.
  3. The error is from the doc_id split line.

    Your fullDocument dict looks like this:

    {
        "_id": ObjectId("665622ae13b3c25b81fe46c1"),
        "user": ObjectId("65d8937f6408bf2c0ca8d264"),
        "content": "teste mongodb",
        "createdAt": datetime.datetime(2024, 5, 28, 18, 30, 6, 908000),
        "__v": 0,
    }
    

    So when you do

    document = change['fullDocument']
    ...
    document_id = str(document['_id']).split("'")[1]
    

    There’s no ' in the _id. So str(document['_id']).split("'") returns a list with only one element, the string ObjectId. Since there is no element at index 1, it raises IndexError.

    Also, why are you splitting _id on ' ?

    It should probably be just:

    document_id = str(document['_id'])
    

    You have two other lines where this is done correctly:

    document_id = str(change['documentKey']['_id'])
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search