Skip to content

Nomic

NomicEmbedding #

Bases: BaseEmbedding

NomicEmbedding使用Nomic API生成嵌入。

Source code in llama_index/embeddings/nomic/base.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
class NomicEmbedding(BaseEmbedding):
    """NomicEmbedding使用Nomic API生成嵌入。"""

    query_task_type: Optional[NomicTaskType] = Field(
        description="Task type for queries",
    )
    document_task_type: Optional[NomicTaskType] = Field(
        description="Task type for documents",
    )
    dimensionality: Optional[int] = Field(
        description="Embedding dimension, for use with Matryoshka-capable models",
    )
    model_name: str = Field(description="Embedding model name")
    inference_mode: NomicInferenceMode = Field(
        description="Whether to generate embeddings locally",
    )
    device: Optional[str] = Field(description="Device to use for local embeddings")

    def __init__(
        self,
        model_name: str = "nomic-embed-text-v1",
        embed_batch_size: int = 32,
        api_key: Optional[str] = None,
        callback_manager: Optional[CallbackManager] = None,
        query_task_type: Optional[str] = "search_query",
        document_task_type: Optional[str] = "search_document",
        dimensionality: Optional[int] = 768,
        inference_mode: str = "remote",
        device: Optional[str] = None,
    ):
        if api_key is not None:
            nomic.login(api_key)

        super().__init__(
            model_name=model_name,
            embed_batch_size=embed_batch_size,
            callback_manager=callback_manager,
            query_task_type=query_task_type,
            document_task_type=document_task_type,
            dimensionality=dimensionality,
            inference_mode=inference_mode,
            device=device,
        )

    @classmethod
    def class_name(cls) -> str:
        return "NomicEmbedding"

    def _embed(
        self, texts: List[str], task_type: Optional[str] = None
    ) -> List[List[float]]:
        result = nomic.embed.text(
            texts,
            model=self.model_name,
            task_type=task_type,
            dimensionality=self.dimensionality,
            inference_mode=self.inference_mode,
            device=self.device,
        )
        return result["embeddings"]

    def _get_query_embedding(self, query: str) -> List[float]:
        return self._embed([query], task_type=self.query_task_type)[0]

    async def _aget_query_embedding(self, query: str) -> List[float]:
        self._warn_async()
        return self._get_query_embedding(query)

    def _get_text_embedding(self, text: str) -> List[float]:
        return self._embed([text], task_type=self.document_task_type)[0]

    async def _aget_text_embedding(self, text: str) -> List[float]:
        self._warn_async()
        return self._get_text_embedding(text)

    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        return self._embed(texts, task_type=self.document_task_type)

    def _warn_async() -> None:
        warnings.warn(
            f"{self.class_name()} does not implement async embeddings, falling back to sync method.",
        )