[docs]classInMemoryDBDistanceMetric(str,Enum):"""Distance metrics for Redis vector fields."""l2="L2"cosine="COSINE"ip="IP"
[docs]classInMemoryDBField(BaseModel):"""Base class for Redis fields."""name:str=Field(...)
[docs]classTextFieldSchema(InMemoryDBField):"""Schema for text fields in Redis."""weight:float=1no_stem:bool=Falsephonetic_matcher:Optional[str]=Nonewithsuffixtrie:bool=Falseno_index:bool=Falsesortable:Optional[bool]=False
[docs]classTagFieldSchema(InMemoryDBField):"""Schema for tag fields in Redis."""separator:str=","case_sensitive:bool=Falseno_index:bool=Falsesortable:Optional[bool]=False
[docs]classInMemoryDBVectorField(InMemoryDBField):"""Base class for Redis vector fields."""dims:int=Field(...)algorithm:object=Field(...)datatype:str=Field(default="FLOAT32")distance_metric:InMemoryDBDistanceMetric=Field(default="COSINE")# type: ignoreinitial_cap:Optional[int]=None@field_validator("algorithm","datatype","distance_metric",mode="before")@classmethoddefuppercase_strings(cls,v:str)->str:returnv.upper()@field_validator("datatype",mode="before")@classmethoddefuppercase_and_check_dtype(cls,v:str)->str:ifv.upper()notinINMEMORYDB_VECTOR_DTYPE_MAP:raiseValueError(f"datatype must be one of {INMEMORYDB_VECTOR_DTYPE_MAP.keys()}. Got {v}")returnv.upper()def_fields(self)->Dict[str,Any]:field_data={"TYPE":self.datatype,"DIM":self.dims,"DISTANCE_METRIC":self.distance_metric,}ifself.initial_capisnotNone:# Only include it if it's setfield_data["INITIAL_CAP"]=self.initial_capreturnfield_data
[docs]classFlatVectorField(InMemoryDBVectorField):"""Schema for flat vector fields in Redis."""algorithm:Literal["FLAT"]="FLAT"block_size:Optional[int]=None
[docs]classHNSWVectorField(InMemoryDBVectorField):"""Schema for HNSW vector fields in Redis."""algorithm:Literal["HNSW"]="HNSW"m:int=Field(default=16)ef_construction:int=Field(default=200)ef_runtime:int=Field(default=10)epsilon:float=Field(default=0.01)
[docs]classInMemoryDBModel(BaseModel):"""Schema for MemoryDB index."""# always have a content field for texttext:List[TextFieldSchema]=[TextFieldSchema(name="content")]tag:Optional[List[TagFieldSchema]]=Nonenumeric:Optional[List[NumericFieldSchema]]=Noneextra:Optional[List[InMemoryDBField]]=None# filled by default_vector_schemavector:Optional[List[Union[FlatVectorField,HNSWVectorField]]]=Nonecontent_key:str="content"content_vector_key:str="content_vector"
[docs]defadd_vector_field(self,vector_field:Dict[str,Any])->None:# catch case where user inputted no vector field spec# in the index schemaifself.vectorisNone:self.vector=[]# ignore types as pydantic is handling type validation and conversionifvector_field["algorithm"]=="FLAT":self.vector.append(FlatVectorField(**vector_field))# type: ignoreelifvector_field["algorithm"]=="HNSW":self.vector.append(HNSWVectorField(**vector_field))# type: ignoreelse:raiseValueError(f"algorithm must be either FLAT or HNSW. Got "f"{vector_field['algorithm']}")
[docs]defas_dict(self)->Dict[str,List[Any]]:schemas:Dict[str,List[Any]]={"text":[],"tag":[],"numeric":[]}# iter over all class attributesforattr,attr_valueinself.__dict__.items():# only non-empty listsifisinstance(attr_value,list)andlen(attr_value)>0:field_values:List[Dict[str,Any]]=[]# iterate over all fields in each category (tag, text, etc)forvalinattr_value:value:Dict[str,Any]={}# iterate over values within each field to extract# settings for that field (i.e. name, weight, etc)forfield,field_valueinval.__dict__.items():# make enums into stringsifisinstance(field_value,Enum):value[field]=field_value.value# don't write null valueseliffield_valueisnotNone:value[field]=field_valuefield_values.append(value)schemas[attr]=field_valuesschema:Dict[str,List[Any]]={}# only write non-empty lists from defaultsfork,vinschemas.items():iflen(v)>0:schema[k]=vreturnschema
@propertydefcontent_vector(self)->Union[FlatVectorField,HNSWVectorField]:ifnotself.vector:raiseValueError("No vector fields found")forfieldinself.vector:iffield.name==self.content_vector_key:returnfieldraiseValueError("No content_vector field found")@propertydefvector_dtype(self)->np.dtype:# should only ever be called after pydantic has validated the schemareturnINMEMORYDB_VECTOR_DTYPE_MAP[self.content_vector.datatype]@propertydefis_empty(self)->bool:returnall(fieldisNoneforfieldin[self.tag,self.text,self.numeric,self.vector])
@propertydefmetadata_keys(self)->List[str]:keys:List[str]=[]ifself.is_empty:returnkeysforfield_nameinself.__fields__.keys():field_group=getattr(self,field_name)iffield_groupisnotNone:forfieldinfield_group:# check if it's a metadata field. exclude vector and content keyifnotisinstance(field,str)andfield.namenotin[self.content_key,self.content_vector_key,]:keys.append(field.name)returnkeys
[docs]defread_schema(index_schema:Optional[Union[Dict[str,List[Any]],str,os.PathLike]],)->Dict[str,Any]:"""Read in the index schema from a dict or yaml file. Check if it is a dict and return RedisModel otherwise, check if it's a path and read in the file assuming it's a yaml file and return a RedisModel """ifisinstance(index_schema,dict):returnindex_schemaelifisinstance(index_schema,Path):withopen(index_schema,"rb")asf:returnyaml.safe_load(f)elifisinstance(index_schema,str):ifPath(index_schema).resolve().is_file():withopen(index_schema,"rb")asf:returnyaml.safe_load(f)else:raiseFileNotFoundError(f"index_schema file {index_schema} does not exist")else:raiseTypeError(f"index_schema must be a dict, or path to a yaml file "f"Got {type(index_schema)}")