I’m creating a chatbot RAG using the content from my MongoDB and sending to create vectors on Pinecone.
So my user can ask stuff to my chatbot about his reflections.
Here is my code:
import os
from pymongo import MongoClient
from pinecone import Pinecone, ServerlessSpec
from pymongo.errors import OperationFailure
from sentence_transformers import SentenceTransformer, util
from certifi import where # Import certifi library
# mongodb stuff
client = MongoClient(
"my-mongodb-uri",
tls=True, # Enable TLS encryption
tlsAllowInvalidCertificates=False, # Don't allow invalid certificates
tlsCAFile=where() # Use certifi library for CA bundle
)
db = client['test']
collection = db['reflections']
# Pinecone initialization
pc = Pinecone(api_key='my-api-key')
index = pc.Index("langchain-demo")
# transformer stuff
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Watch for changes
try:
cursor = collection.watch()
for change in cursor:
print("Change detected:", change)
if change['operationType'] == 'insert':
document = change['fullDocument']
vector = model.encode(document['content']) # Assuming 'content' is the field
print("Extracted Vector:", vector)
# Extract document ID from ObjectId
document_id = str(document['_id']).split("'")[1]
# Wrap upsert call with empty vector check
if vector: # Check if vector is not empty
index.upsert(vectors={document_id: vector})
elif change['operationType'] == 'update':
document_id = str(change['documentKey']['_id'])
updated_fields = change['updateDescription']['updatedFields']
if 'content' in updated_fields:
vector = model.encode(updated_fields['content'])
index.upsert(vectors=[document_id], data=vector.tolist())
elif change['operationType'] == 'delete':
document_id = str(change['documentKey']['_id'])
index.delete(ids=[document_id])
except OperationFailure as e:
print("Error watching collection:", e)
except Exception as e:
print("An error occurred:", e)
This is the log I received on my terminal:
Change detected: {'_id': {'_data': '82665622AE0000000B2B042C0100296E5A1004F1A0DC5D2C0C4EC2843048538C6B36F3463C6F7065726174696F6E54797065003C696E736572740046646F63756D656E744B65790046645F69640064665622AE13B3C25B81FE46C1000004'}, 'operationType': 'insert', 'clusterTime': Timestamp(1716921006, 11), 'wallTime': datetime.datetime(2024, 5, 28, 18, 30, 6, 916000), 'fullDocument': {'_id': ObjectId('665622ae13b3c25b81fe46c1'), 'user': ObjectId('65d8937f6408bf2c0ca8d264'), 'content': 'teste mongodb', 'createdAt': datetime.datetime(2024, 5, 28, 18, 30, 6, 908000), '__v': 0}, 'ns': {'db': 'test', 'coll': 'reflections'}, 'documentKey': {'_id': ObjectId('665622ae13b3c25b81fe46c1')}}
Extracted Vector: [ 2.64226589e-02 5.51917292e-02 -8.01229179e-02 6.16759956e-02
2.97571346e-03 -5.25409095e-02 -2.06136722e-02 2.41196547e-02
1.70215759e-02 4.23866622e-02 6.73603592e-03 -5.09259291e-02
-1.48372846e-02 7.09723681e-03 -1.48236733e-02 -1.65749993e-02
9.42820311e-03 -3.47889923e-02 4.76156734e-02 -1.14416014e-02
-2.76810937e-02 -7.33586177e-02 2.79922988e-02 4.48221937e-02
-3.42520475e-02 -7.56083280e-02 -1.88546516e-02 3.71571630e-02
-3.63041870e-02 -5.30020148e-02 4.92156222e-02 3.24101970e-02
1.43917967e-02 2.31850450e-03 -3.07541038e-03 1.03986263e-02
6.79664016e-02 -5.86303510e-02 -1.68009251e-02 -3.78069915e-02
2.32911427e-02 -4.27663438e-02 -2.12721266e-02 -5.84340282e-02
1.03256971e-01 -7.78031126e-02 -4.44727167e-02 1.10542767e-01
6.30531460e-02 -3.19500417e-02 2.60527879e-02 -1.16486132e-01
-5.51996529e-02 4.62782234e-02 3.89385074e-02 1.58163980e-01
-8.12400039e-03 -3.00704502e-02 -3.35364193e-02 3.37796435e-02
5.67190908e-02 -3.78245488e-02 -3.72845195e-02 3.34226415e-02
-2.56197937e-02 -1.38711361e-02 3.36623588e-03 3.23332138e-02
-4.64090845e-03 -2.81529520e-02 7.84241222e-03 1.87840331e-02
-4.04786393e-02 -9.18242242e-03 1.42984195e-02 9.59344432e-02
-8.56031012e-03 -9.00166705e-02 6.34619594e-02 3.46942805e-02
-1.21315375e-01 -1.27947167e-01 2.92107705e-02 -5.98839074e-02
-6.66733552e-03 2.20386945e-02 1.06475495e-01 -5.25924191e-02
-4.81234193e-02 -6.64262474e-03 2.43848264e-02 1.28781358e-02
-4.63195667e-02 7.55516142e-02 1.91857126e-02 5.11478595e-02
7.73477629e-02 5.94875030e-02 7.93703869e-02 2.19271239e-02
-5.33815532e-04 1.04968296e-02 7.78110474e-02 -3.95663939e-02
-7.16580264e-03 3.37898545e-02 2.74467710e-02 -8.29642192e-02
-4.68915589e-02 -2.53224969e-02 -1.62706897e-02 2.37261020e-02
-3.05816010e-02 6.37660455e-03 -6.75126612e-02 -4.52077389e-03
-4.86059487e-02 -5.44997081e-02 -1.06597044e-01 9.05475393e-02
5.58611341e-02 7.52945840e-02 -3.28133292e-02 -2.91952137e-02
3.31597738e-02 3.51161021e-03 8.75394344e-02 1.03704995e-33
1.38022542e-01 -9.83591303e-02 4.43550274e-02 1.05274946e-03
2.88495906e-02 7.61957541e-02 -2.07854919e-02 7.20968395e-02
-8.30703005e-02 1.15298852e-03 -3.55196968e-02 1.29330147e-03
2.64357477e-02 -5.18404879e-02 6.31415769e-02 3.08009889e-02
3.76578197e-02 3.31700668e-02 1.30407363e-02 2.17529833e-02
2.64088046e-02 -2.77639963e-02 -5.22936359e-02 -1.95870139e-02
6.81351684e-03 6.55588508e-02 -3.70829068e-02 -2.03726869e-02
-1.98107120e-02 1.93433892e-02 -6.25248849e-02 -4.19677747e-03
-8.86835158e-02 9.57719833e-02 9.21144336e-03 2.34254729e-02
-4.18317653e-02 -1.78317651e-02 -9.96567160e-02 2.77951220e-03
5.78196160e-02 2.66690087e-02 6.71238592e-03 -1.26469489e-02
-5.32274581e-02 4.53201607e-02 4.15935442e-02 -7.02985674e-02
8.65548104e-02 1.93077344e-02 -8.29852968e-02 1.86279765e-03
-9.70464796e-02 3.69346216e-02 4.38100286e-02 1.50465965e-02
1.20123737e-02 -1.99827086e-02 4.49663401e-02 -2.27664579e-02
-5.99026829e-02 2.14360859e-02 5.63477119e-03 7.70357698e-02
4.07660700e-04 -1.44859506e-02 -6.10246100e-02 -5.85204959e-02
1.64570604e-02 6.53662756e-02 3.03732231e-02 3.93993221e-02
-2.78256908e-02 1.81106180e-02 -9.54285823e-03 -4.35498394e-02
1.26534468e-02 1.56740248e-02 -8.21447670e-02 6.25986466e-03
6.70449436e-02 -8.75824168e-02 -8.16964507e-02 1.55098401e-02
7.45111937e-03 1.05148785e-01 -7.09625939e-03 2.56238016e-03
2.65282597e-02 -1.08919352e-01 3.68081091e-04 1.03041202e-01
-1.69032291e-02 -9.65850055e-02 3.27670053e-02 -1.52392722e-33
2.88561676e-02 -6.08335771e-02 2.32155789e-02 4.65114824e-02
1.07367739e-01 -3.87591906e-02 3.08643673e-02 7.41644343e-03
-5.42402901e-02 -1.43773090e-02 3.89164947e-02 -1.10371888e-01
2.37809680e-03 -2.96618696e-02 -5.97673617e-02 -3.35118175e-02
-5.04749045e-02 -1.19375162e-01 -8.40588752e-03 -8.33129417e-03
-1.01422250e-01 1.81846786e-02 4.10847627e-02 -2.07867264e-03
-1.45480633e-02 -9.40343514e-02 -3.80858555e-02 -9.28523913e-02
-3.49474549e-02 3.57780121e-02 2.82644555e-02 5.27115576e-02
-4.71878871e-02 7.05714822e-02 2.55910270e-02 9.02293995e-03
8.85344148e-02 3.68806347e-02 7.09631816e-02 4.70345989e-02
-1.22014368e-02 9.92123038e-02 -5.31965233e-02 -5.14485613e-02
6.69255704e-02 4.21657562e-02 1.32231619e-02 -7.31633278e-03
2.26458535e-02 -2.64296532e-02 -3.49785648e-02 -2.58285161e-02
5.24073280e-02 -1.41270570e-02 3.76646109e-02 -5.85196391e-02
-2.59447079e-02 -4.46911417e-02 5.75564057e-02 3.45758721e-02
1.68277156e-02 3.87044102e-02 -1.67042874e-02 6.53192848e-02
-1.53256878e-02 -3.99874747e-02 -1.04426391e-01 2.89602540e-02
1.76746026e-02 6.27156952e-03 -4.18228246e-02 -1.63344350e-02
-1.45597830e-02 7.30229691e-02 4.04479764e-02 -6.02601655e-02
-4.42335121e-02 -1.17401704e-02 5.29759973e-02 1.76030397e-02
1.29814809e-02 -1.15860929e-03 3.80812511e-02 2.16609016e-02
1.23684702e-03 -7.47688487e-02 3.23086232e-02 1.71934050e-02
-1.07854068e-01 4.13478501e-02 -1.69676244e-02 5.14116921e-02
-6.50631189e-02 2.90679317e-02 2.16390658e-02 -1.40003591e-08
-5.47276549e-02 -2.21079458e-02 1.12641910e-02 6.77396730e-02
2.63435580e-02 -2.30627018e-03 4.84103598e-02 1.90388169e-02
6.29420951e-02 4.62095030e-02 -2.75534745e-02 1.38814524e-02
-1.55894198e-02 3.66799012e-02 -2.41456479e-02 8.84115696e-04
3.62182893e-02 -5.34663617e-04 2.87991520e-02 7.80000463e-02
6.44254833e-02 -1.21932197e-02 2.01403350e-02 -7.63562024e-02
1.93959419e-02 6.84652850e-02 7.04346001e-02 8.58995169e-02
-5.04256077e-02 -3.08988057e-02 1.17744971e-02 2.72314884e-02
6.22073896e-02 -3.06474343e-02 1.02516115e-01 6.61610290e-02
1.60890911e-02 7.22552836e-02 -5.08080684e-02 6.51256591e-02
-3.40761431e-02 -1.58857908e-02 4.98002209e-02 -5.82708716e-02
-3.21344063e-02 -1.43419847e-01 3.67835648e-02 4.03264500e-02
4.75163683e-02 -1.04223825e-01 1.91467311e-02 -5.59284166e-02
5.88361137e-02 -3.11761834e-02 4.66121845e-02 5.89613020e-02
5.65763302e-02 -5.29688671e-02 -7.20504746e-02 -1.39309671e-02
8.39550421e-02 -7.33920708e-02 -1.97879802e-02 -9.86750890e-03]
The problem is I’m receiving this error too: list index out of range
I searched on internet, try to use Gemini and OpenAI to help me and search on StackOverFlow.
Didn’t find yet someone with this particularly error in this context.
3
Answers
Call the function and specify the length of the series
prediction_series = generate_color_prediction_series(10)
Print the generated series
print(prediction_series)
import os
from pymongo import MongoClient
import pinecone
from pymongo.errors import OperationFailure
from sentence_transformers import SentenceTransformer, util
from certifi import where # Import certifi library
MongoDB setup
client = MongoClient(
"my-mongodb-uri",
tls=True, # Enable TLS encryption
tlsAllowInvalidCertificates=False, # Don’t allow invalid certificates
tlsCAFile=where() # Use certifi library for CA bundle
)
db = client[‘test’]
collection = db[‘reflections’]
Pinecone initialization
pinecone.init(api_key=’my-api-key’, environment=’us-west1-gcp’) # Replace with your Pinecone environment
index_name = ‘langchain-demo’
if index_name not in pinecone.list_indexes():
pinecone.create_index(index_name, dimension=384) # Adjust dimension according to your embedding model
index = pinecone.Index(index_name)
Sentence Transformer model
model = SentenceTransformer(‘sentence-transformers/all-MiniLM-L6-v2’)
Watch for changes
try:
cursor = collection.watch()
for change in cursor:
print("Change detected:", change)
if change[‘operationType’] == ‘insert’:
document = change[‘fullDocument’]
vector = model.encode(document[‘content’]).tolist() # Assuming ‘content’ is the field
print("Extracted Vector:", vector)
except OperationFailure as e:
print("Error watching collection:", e)
except Exception as e:
print("An error occurred:", e)
The error is from the doc_id split line.
Your
fullDocument
dict looks like this:So when you do
There’s no
'
in the_id
. Sostr(document['_id']).split("'")
returns a list with only one element, the string ObjectId. Since there is no element atindex 1
, it raisesIndexError
.Also, why are you splitting
_id
on'
?It should probably be just:
You have two other lines where this is done correctly: