Skip to content

Llamafile

LlamafileEmbedding #

Bases: BaseEmbedding

用于llamafile嵌入的类。

llamafile允许您使用单个文件分发和运行大型语言模型。

要开始,请参阅:https://github.com/Mozilla-Ocho/llamafile

要使用此类,您首先需要:

  1. 下载一个llamafile。
  2. 使下载的文件可执行:chmod +x path/to/model.llamafile
  3. 以启用嵌入的服务器模式启动llamafile:

    ./path/to/model.llamafile --server --nobrowser --embedding

Source code in llama_index/embeddings/llamafile/base.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class LlamafileEmbedding(BaseEmbedding):
    """用于llamafile嵌入的类。

llamafile允许您使用单个文件分发和运行大型语言模型。

要开始,请参阅:https://github.com/Mozilla-Ocho/llamafile

要使用此类,您首先需要:

1. 下载一个llamafile。
2. 使下载的文件可执行:`chmod +x path/to/model.llamafile`
3. 以启用嵌入的服务器模式启动llamafile:

    `./path/to/model.llamafile --server --nobrowser --embedding`"""

    base_url: str = Field(
        description="base url of the llamafile server", default="http://localhost:8080"
    )

    request_timeout: float = Field(
        default=DEFAULT_REQUEST_TIMEOUT,
        description="The timeout for making http request to llamafile API server",
    )

    def __init__(
        self,
        base_url: str = "http://localhost:8080",
        callback_manager: Optional[CallbackManager] = None,
        **kwargs,
    ) -> None:
        super().__init__(base_url=base_url, callback_manager=callback_manager, **kwargs)

    @classmethod
    def class_name(cls) -> str:
        return "LlamafileEmbedding"

    def _get_query_embedding(self, query: str) -> Embedding:
        return self._get_text_embedding(query)

    async def _aget_query_embedding(self, query: str) -> Embedding:
        return await self._aget_text_embedding(query)

    def _get_text_embedding(self, text: str) -> Embedding:
        """
        同步嵌入输入文本。
        """
        request_body = {
            "content": text,
        }

        with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
            response = client.post(
                url=f"{self.base_url}/embedding",
                headers={"Content-Type": "application/json"},
                json=request_body,
            )
            response.encoding = "utf-8"
            response.raise_for_status()

            return response.json()["embedding"]

    async def _aget_text_embedding(self, text: str) -> Embedding:
        """
        将输入文本异步嵌入。
        """
        request_body = {
            "content": text,
        }

        async with httpx.AsyncClient(timeout=Timeout(self.request_timeout)) as client:
            response = await client.post(
                url=f"{self.base_url}/embedding",
                headers={"Content-Type": "application/json"},
                json=request_body,
            )
            response.encoding = "utf-8"
            response.raise_for_status()

            return response.json()["embedding"]

    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
        """
        同步嵌入输入文本。
        """
        request_body = {
            "content": texts,
        }

        with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
            response = client.post(
                url=f"{self.base_url}/embedding",
                headers={"Content-Type": "application/json"},
                json=request_body,
            )
            response.encoding = "utf-8"
            response.raise_for_status()

            return [output["embedding"] for output in response.json()["results"]]

    async def _aget_text_embeddings(self, texts: List[str]) -> Embedding:
        """
        将输入文本异步嵌入。
        """
        request_body = {
            "content": texts,
        }

        async with httpx.AsyncClient(timeout=Timeout(self.request_timeout)) as client:
            response = await client.post(
                url=f"{self.base_url}/embedding",
                headers={"Content-Type": "application/json"},
                json=request_body,
            )
            response.encoding = "utf-8"
            response.raise_for_status()

            return [output["embedding"] for output in response.json()["results"]]