skip to Main Content

I want to create an Azure AI Search index with a vector field using the currently latest version of azure-search-documents v11.4.0.

Here is my code:

from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient  
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.text_splitter import TokenTextSplitter
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,
    SemanticField,  
    SearchField,  
    SemanticSearch,
    VectorSearch,
    VectorSearchAlgorithmConfiguration,
    HnswAlgorithmConfiguration
)  

index_name = AZURE_COGNITIVE_SEARCH_INDEX_NAME 
key = AZURE_COGNITIVE_SEARCH_KEY
credential = AzureKeyCredential(key)


def create_index(): 

    # Define the index fields
    client = SearchIndexClient(service_endpoint, credential)
    fields = [
    SimpleField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SimpleField(name="file_name", type=SearchFieldDataType.String),
    SimpleField(name="url_name", type=SearchFieldDataType.String),
    SimpleField(name="origin", type=SearchFieldDataType.String, sortable=True, filterable=True, facetable=True),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SearchField(name="content_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=1536, vector_search_configuration="my-vector-config"),
]
    vector_search=VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="my-vector-config",
            kind="hnsw",
            parameters={
                "m": 4,
                "efConstruction":400,
                "efSearch":500,
                "metric":"cosine"
            }
        )
    ]
)

    # Create the search index with the semantic settings
    index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)
    return client, index

search_client, search_index = create_index()
result = search_client.create_or_update_index(search_index)  
print(f"{result.name} created") 

This gives me the following error:

Message: The request is invalid. Details: definition : The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set.
Exception Details:  (InvalidField) The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set. Parameters: definition
    Code: InvalidField
    Message: The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set. Parameters: definition

I tried to copy exact solution provided here: https://learn.microsoft.com/en-us/answers/questions/1395031/how-to-configure-vectorsearchconfiguration-for-a-s
which gives me same error as above.

I also tried this sample which is part of the official documentation (linked on the pypi page): https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/search/azure-search-documents/samples/sample_vector_search.py
But here I get this error:

Code: InvalidRequestParameter
Message: The request is invalid. Details: definition : The field 'contentVector' uses a vector search algorithm configuration 'my-algorithms-config' which is not defined.
Exception Details:  (UnknownVectorAlgorithmConfiguration) The field 'contentVector' uses a vector search algorithm configuration 'my-algorithms-config' which is not defined. Parameters: definition
    Code: UnknownVectorAlgorithmConfiguration
    Message: The field 'contentVector' uses a vector search algorithm configuration 'my-algorithms-config' which is not defined. Parameters: definition

And I also found this other example notebooks from Microsoft about AI-Search: https://github.com/Azure/azure-search-vector-samples/blob/main/demo-python/code/azure-search-custom-vectorization-sample.ipynb
This code also gave me the exact same error as my initial code.

I’m trying to get this working for 2 days now and I’m about to give up. There are several different documentations/examples in various different places and every code looks different. Apparently Microsoft changes the function names constantly with almost every package update so most of the examples are probably outdated by now. I have no idea where to find the "latest" documentation that actually provides working code as all examples I tested did not work for me. This has to be the worst python documentation I have ever seen in my life. Even Langchain documenation is great compared to this…

EDIT:
I just checked the source code of the "SearchField". It takes the following arguments:

def __init__(self, **kwargs):
        super(SearchField, self).__init__(**kwargs)
        self.name = kwargs["name"]
        self.type = kwargs["type"]
        self.key = kwargs.get("key", None)
        self.hidden = kwargs.get("hidden", None)
        self.searchable = kwargs.get("searchable", None)
        self.filterable = kwargs.get("filterable", None)
        self.sortable = kwargs.get("sortable", None)
        self.facetable = kwargs.get("facetable", None)
        self.analyzer_name = kwargs.get("analyzer_name", None)
        self.search_analyzer_name = kwargs.get("search_analyzer_name", None)
        self.index_analyzer_name = kwargs.get("index_analyzer_name", None)
        self.synonym_map_names = kwargs.get("synonym_map_names", None)
        self.fields = kwargs.get("fields", None)
        self.vector_search_dimensions = kwargs.get("vector_search_dimensions", None)
        self.vector_search_profile_name = kwargs.get("vector_search_profile_name", None)

You can see that there is no "vector_search_configuration" nor "vectorSearchConfiguration" argument. I think they renamed it to "vector_search_profile_name" for some reason. Therefore I assume that the sample in the official documentation is the correct one and the other 2 are indeed outdated. But even so I’m still getting an error due to the "my-algorithms-config" not being defined.

2

Answers


  1. Chosen as BEST ANSWER

    I finally found the answer. Turns out at this moment there is not a single correct sample from Microsoft to properly create an index with a vector field. They renamed a few function names and argument names which makes most other answers (e.g. on Microsoft support pages) outdated. The sample in the official GitHub repo generally uses the correct function and argument names but it is still wrong as they pass the wrong value. A GitHub issue was opened by someone else for this exact problem. The issue got closed after someone claimed he fixed it, even though nothing was fixed. The issue was then reopened 3 weeks ago and as of today 07/12/2023 the issue is still open and the documentation is still incorrect.

    Long story short this is how to properly define an index with a vector field in azure-search-documents v.11.4.0:

    from azure.core.credentials import AzureKeyCredential  
    from azure.search.documents import SearchClient  
    from azure.search.documents.indexes import SearchIndexClient  
    from azure.search.documents.indexes.models import (  
        SearchIndex,  
        SearchField,  
        SearchFieldDataType,  
        SimpleField,  
        SearchableField,  
        SearchIndex,
        SearchField,  
        VectorSearch,
        VectorSearchProfile,
        HnswAlgorithmConfiguration
    ) 
    
    service_endpoint = AZURE_COGNITIVE_SEARCH_ENDPOINT
    index_name = AZURE_COGNITIVE_SEARCH_INDEX_NAME 
    key = AZURE_COGNITIVE_SEARCH_KEY
    credential = AzureKeyCredential(key)
    
    
    
    def create_index(): 
    
        # Define the index fields
        client = SearchIndexClient(service_endpoint, credential)
        fields = [
        SimpleField(name="chunk_id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
        SimpleField(name="file_name", type=SearchFieldDataType.String),
        SimpleField(name="url_name", type=SearchFieldDataType.String),
        SimpleField(name="origin", type=SearchFieldDataType.String, sortable=True, filterable=True, facetable=True),
        SearchableField(name="content", type=SearchFieldDataType.String),
        SearchField(name="content_vector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                    searchable=True, vector_search_dimensions=1536, vector_search_profile_name="my-vector-config"),
    ]
        vector_search = VectorSearch(
            profiles=[VectorSearchProfile(name="my-vector-config", algorithm_configuration_name="my-algorithms-config")],
            algorithms=[HnswAlgorithmConfiguration(name="my-algorithms-config")],
        )
    
        index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)
        return client, index
    

Please signup or login to give your own answer.
Back To Top
Search