classVectorIndexRetriever(BaseRetriever):""" Vector index retriever. Args: index (VectorStoreIndex): vector store index. similarity_top_k (int): number of top k results to return. vector_store_query_mode (str): vector store query mode See reference for VectorStoreQueryMode for full list of supported modes. filters (Optional[MetadataFilters]): metadata filters, defaults to None alpha (float): weight for sparse/dense retrieval, only used for hybrid query mode. doc_ids (Optional[List[str]]): list of documents to constrain search. vector_store_kwargs (dict): Additional vector store specific kwargs to pass through to the vector store at query time. """def__init__(self,index:VectorStoreIndex,similarity_top_k:int=DEFAULT_SIMILARITY_TOP_K,vector_store_query_mode:VectorStoreQueryMode=VectorStoreQueryMode.DEFAULT,filters:Optional[MetadataFilters]=None,alpha:Optional[float]=None,node_ids:Optional[List[str]]=None,doc_ids:Optional[List[str]]=None,sparse_top_k:Optional[int]=None,hybrid_top_k:Optional[int]=None,callback_manager:Optional[CallbackManager]=None,object_map:Optional[dict]=None,embed_model:Optional[BaseEmbedding]=None,verbose:bool=False,**kwargs:Any,)->None:"""Initialize params."""self._index=indexself._vector_store=self._index.vector_storeself._embed_model=embed_modelorself._index._embed_modelself._docstore=self._index.docstoreself._similarity_top_k=similarity_top_kself._vector_store_query_mode=VectorStoreQueryMode(vector_store_query_mode)self._alpha=alphaself._node_ids=node_idsself._doc_ids=doc_idsself._filters=filtersself._sparse_top_k=sparse_top_kself._hybrid_top_k=hybrid_top_kself._kwargs:Dict[str,Any]=kwargs.get("vector_store_kwargs",{})callback_manager=callback_managerorCallbackManager()super().__init__(callback_manager=callback_manager,object_map=object_map,verbose=verbose,)@propertydefsimilarity_top_k(self)->int:"""Return similarity top k."""returnself._similarity_top_k@similarity_top_k.setterdefsimilarity_top_k(self,similarity_top_k:int)->None:"""Set similarity top k."""self._similarity_top_k=similarity_top_k@dispatcher.spandef_retrieve(self,query_bundle:QueryBundle,)->List[NodeWithScore]:ifself._vector_store.is_embedding_query:ifquery_bundle.embeddingisNoneandlen(query_bundle.embedding_strs)>0:query_bundle.embedding=(self._embed_model.get_agg_embedding_from_queries(query_bundle.embedding_strs))returnself._get_nodes_with_embeddings(query_bundle)@dispatcher.spanasyncdef_aretrieve(self,query_bundle:QueryBundle)->List[NodeWithScore]:embedding=query_bundle.embeddingifself._vector_store.is_embedding_query:ifquery_bundle.embeddingisNoneandlen(query_bundle.embedding_strs)>0:embed_model=self._embed_modelembedding=awaitembed_model.aget_agg_embedding_from_queries(query_bundle.embedding_strs)returnawaitself._aget_nodes_with_embeddings(QueryBundle(query_str=query_bundle.query_str,embedding=embedding))def_build_vector_store_query(self,query_bundle_with_embeddings:QueryBundle)->VectorStoreQuery:returnVectorStoreQuery(query_embedding=query_bundle_with_embeddings.embedding,similarity_top_k=self._similarity_top_k,node_ids=self._node_ids,doc_ids=self._doc_ids,query_str=query_bundle_with_embeddings.query_str,mode=self._vector_store_query_mode,alpha=self._alpha,filters=self._filters,sparse_top_k=self._sparse_top_k,hybrid_top_k=self._hybrid_top_k,)def_build_node_list_from_query_result(self,query_result:VectorStoreQueryResult)->List[NodeWithScore]:ifquery_result.nodesisNone:# NOTE: vector store does not keep text and returns node indices.# Need to recover all nodes from docstoreifquery_result.idsisNone:raiseValueError("Vector store query result should return at ""least one of nodes or ids.")assertisinstance(self._index.index_struct,IndexDict)node_ids=[self._index.index_struct.nodes_dict[idx]foridxinquery_result.ids]nodes=self._docstore.get_nodes(node_ids)query_result.nodes=nodeselse:# NOTE: vector store keeps text, returns nodes.# Only need to recover image or index nodes from docstoreforiinrange(len(query_result.nodes)):source_node=query_result.nodes[i].source_nodeif(notself._vector_store.stores_text)or(source_nodeisnotNoneandsource_node.node_type!=ObjectType.TEXT):node_id=query_result.nodes[i].node_idifself._docstore.document_exists(node_id):query_result.nodes[i]=self._docstore.get_node(# type: ignorenode_id)log_vector_store_query_result(query_result)node_with_scores:List[NodeWithScore]=[]forind,nodeinenumerate(query_result.nodes):score:Optional[float]=Noneifquery_result.similaritiesisnotNone:score=query_result.similarities[ind]node_with_scores.append(NodeWithScore(node=node,score=score))returnnode_with_scoresdef_get_nodes_with_embeddings(self,query_bundle_with_embeddings:QueryBundle)->List[NodeWithScore]:query=self._build_vector_store_query(query_bundle_with_embeddings)query_result=self._vector_store.query(query,**self._kwargs)returnself._build_node_list_from_query_result(query_result)asyncdef_aget_nodes_with_embeddings(self,query_bundle_with_embeddings:QueryBundle)->List[NodeWithScore]:query=self._build_vector_store_query(query_bundle_with_embeddings)query_result=awaitself._vector_store.aquery(query,**self._kwargs)returnself._build_node_list_from_query_result(query_result)
classVectorIndexAutoRetriever(BaseAutoRetriever):""" Vector store auto retriever. A retriever for vector store index that uses an LLM to automatically set vector store query parameters. Args: index (VectorStoreIndex): vector store index vector_store_info (VectorStoreInfo): additional information about vector store content and supported metadata filters. The natural language description is used by an LLM to automatically set vector store query parameters. prompt_template_str: custom prompt template string for LLM. Uses default template string if None. similarity_top_k (int): number of top k results to return. empty_query_top_k (Optional[int]): number of top k results to return if the inferred query string is blank (uses metadata filters only). Can be set to None, which would use the similarity_top_k instead. By default, set to 10. max_top_k (int): the maximum top_k allowed. The top_k set by LLM or similarity_top_k will be clamped to this value. vector_store_query_mode (str): vector store query mode See reference for VectorStoreQueryMode for full list of supported modes. default_empty_query_vector (Optional[List[float]]): default empty query vector. Defaults to None. If not None, then this vector will be used as the query vector if the query is empty. callback_manager (Optional[CallbackManager]): callback manager verbose (bool): verbose mode """def__init__(self,index:VectorStoreIndex,vector_store_info:VectorStoreInfo,llm:Optional[LLM]=None,prompt_template_str:Optional[str]=None,max_top_k:int=10,similarity_top_k:int=DEFAULT_SIMILARITY_TOP_K,empty_query_top_k:Optional[int]=10,vector_store_query_mode:VectorStoreQueryMode=VectorStoreQueryMode.DEFAULT,default_empty_query_vector:Optional[List[float]]=None,callback_manager:Optional[CallbackManager]=None,verbose:bool=False,extra_filters:Optional[MetadataFilters]=None,object_map:Optional[dict]=None,objects:Optional[List[IndexNode]]=None,**kwargs:Any,)->None:self._index=indexself._vector_store_info=vector_store_infoself._default_empty_query_vector=default_empty_query_vectorself._llm=llmorSettings.llmcallback_manager=callback_managerorSettings.callback_manager# promptprompt_template_str=(prompt_template_strorDEFAULT_VECTOR_STORE_QUERY_PROMPT_TMPL)self._output_parser=VectorStoreQueryOutputParser()self._prompt:BasePromptTemplate=PromptTemplate(template=prompt_template_str)# additional configself._max_top_k=max_top_kself._similarity_top_k=similarity_top_kself._empty_query_top_k=empty_query_top_kself._vector_store_query_mode=vector_store_query_mode# if extra_filters is OR condition, we don't support that yetifextra_filtersisnotNoneandextra_filters.condition==FilterCondition.OR:raiseValueError("extra_filters cannot be OR condition")self._extra_filters=extra_filtersorMetadataFilters(filters=[])self._kwargs=kwargssuper().__init__(callback_manager=callback_manager,object_map=object_maporself._index._object_map,objects=objects,verbose=verbose,)def_get_prompts(self)->PromptDictType:"""Get prompts."""return{"prompt":self._prompt,}def_update_prompts(self,prompts:PromptDictType)->None:"""Get prompt modules."""if"prompt"inprompts:self._prompt=prompts["prompt"]def_get_query_bundle(self,query:str)->QueryBundle:"""Get query bundle."""ifnotqueryandself._default_empty_query_vectorisnotNone:returnQueryBundle(query_str="",embedding=self._default_empty_query_vector,)else:returnQueryBundle(query_str=query)def_parse_generated_spec(self,output:str,query_bundle:QueryBundle)->BaseModel:"""Parse generated spec."""try:structured_output=cast(StructuredOutput,self._output_parser.parse(output))query_spec=cast(VectorStoreQuerySpec,structured_output.parsed_output)exceptOutputParserException:_logger.warning("Failed to parse query spec, using defaults as fallback.")query_spec=VectorStoreQuerySpec(query=query_bundle.query_str,filters=[],top_k=None,)returnquery_specdefgenerate_retrieval_spec(self,query_bundle:QueryBundle,**kwargs:Any)->BaseModel:# prepare inputinfo_str=self._vector_store_info.model_dump_json(indent=4)schema_str=VectorStoreQuerySpec.model_json_schema()# call LLMoutput=self._llm.predict(self._prompt,schema_str=schema_str,info_str=info_str,query_str=query_bundle.query_str,)# parse outputreturnself._parse_generated_spec(output,query_bundle)asyncdefagenerate_retrieval_spec(self,query_bundle:QueryBundle,**kwargs:Any)->BaseModel:# prepare inputinfo_str=self._vector_store_info.model_dump_json(indent=4)schema_str=VectorStoreQuerySpec.model_json_schema()# call LLMoutput=awaitself._llm.apredict(self._prompt,schema_str=schema_str,info_str=info_str,query_str=query_bundle.query_str,)# parse outputreturnself._parse_generated_spec(output,query_bundle)def_build_retriever_from_spec(# type: ignoreself,spec:VectorStoreQuerySpec)->Tuple[BaseRetriever,QueryBundle]:# construct new query bundle from query_spec# insert 0 vector if query is empty and default_empty_query_vector is not Nonenew_query_bundle=self._get_query_bundle(spec.query)_logger.info(f"Using query str: {spec.query}")filter_list=[(filter.key,filter.operator.value,filter.value)forfilterinspec.filters]_logger.info(f"Using filters: {filter_list}")ifself._verbose:print(f"Using query str: {spec.query}")print(f"Using filters: {filter_list}")# define similarity_top_k# if query is specified, then use similarity_top_k# if query is blank, then use empty_query_top_kifspec.queryorself._empty_query_top_kisNone:similarity_top_k=self._similarity_top_kelse:similarity_top_k=self._empty_query_top_k# if query_spec.top_k is specified, then use it# as long as below max_top_k and similarity_top_kifspec.top_kisnotNone:similarity_top_k=min(spec.top_k,self._max_top_k,similarity_top_k)_logger.info(f"Using top_k: {similarity_top_k}")# avoid passing empty filters to retrieveriflen(spec.filters)+len(self._extra_filters.filters)==0:filters=Noneelse:filters=MetadataFilters(filters=[*spec.filters,*self._extra_filters.filters])return(VectorIndexRetriever(self._index,filters=filters,similarity_top_k=similarity_top_k,vector_store_query_mode=self._vector_store_query_mode,object_map=self.object_map,verbose=self._verbose,**self._kwargs,),new_query_bundle,)