Skip to content

Ragatouille retriever

RAGatouilleRetrieverPack #

Bases: BaseLlamaPack

RAGatouille Retriever 包。

Source code in llama_index/packs/ragatouille_retriever/base.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
class RAGatouilleRetrieverPack(BaseLlamaPack):
    """RAGatouille Retriever 包。"""

    def __init__(
        self,
        documents: List[Document],
        model_name: str = "colbert-ir/colbertv2.0",
        index_name: str = "my_index",
        llm: Optional[LLM] = None,
        index_path: Optional[str] = None,
        top_k: int = 10,
    ) -> None:
        """初始化参数。"""
        self._model_name = model_name
        try:
            import ragatouille  # noqa
            from ragatouille import RAGPretrainedModel
        except ImportError:
            raise ValueError(
                "RAGatouille is not installed. Please install it with `pip install ragatouille`."
            )

        doc_txts = [doc.get_content() for doc in documents]
        doc_ids = [doc.doc_id for doc in documents]
        doc_metadatas = [doc.metadata for doc in documents]

        # index the documents
        if index_path is None:
            RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
            index_path = RAG.index(
                index_name=index_name,
                collection=doc_txts,
                document_ids=doc_ids,
                document_metadatas=doc_metadatas,
            )
        else:
            RAG = RAGPretrainedModel.from_index(index_path)

        self.index_path = index_path

        self.custom_retriever = CustomRetriever(RAG, index_name=index_name, top_k=top_k)

        self.RAG = RAG
        self.documents = documents

        self.llm = llm or OpenAI(model="gpt-3.5-turbo")
        self.query_engine = RetrieverQueryEngine.from_args(
            self.custom_retriever, service_context=ServiceContext.from_defaults(llm=llm)
        )

    def add_documents(self, documents: List[Document]) -> None:
        """添加文档。"""
        doc_txts = [doc.get_content() for doc in documents]
        doc_ids = [doc.doc_id for doc in documents]
        doc_metadatas = [doc.metadata for doc in documents]

        self.RAG.add_to_index(
            new_collection=doc_txts,
            new_document_ids=doc_ids,
            new_document_metadatas=doc_metadatas,
        )

    def delete_documents(self, documents: List[Document]) -> None:
        """删除文档。"""
        doc_ids = [doc.doc_id for doc in documents]

        self.RAG.delete_from_index(document_ids=doc_ids)

    def get_modules(self) -> Dict[str, Any]:
        """获取模块。"""
        return {
            "RAG": self.RAG,
            "documents": self.documents,
            "retriever": self.custom_retriever,
            "llm": self.llm,
            "query_engine": self.query_engine,
            "index_path": self.index_path,
        }

    def run(self, *args: Any, **kwargs: Any) -> Any:
        """运行流水线。"""
        return self.query_engine.query(*args, **kwargs)

add_documents #

add_documents(documents: List[Document]) -> None

添加文档。

Source code in llama_index/packs/ragatouille_retriever/base.py
 97
 98
 99
100
101
102
103
104
105
106
107
def add_documents(self, documents: List[Document]) -> None:
    """添加文档。"""
    doc_txts = [doc.get_content() for doc in documents]
    doc_ids = [doc.doc_id for doc in documents]
    doc_metadatas = [doc.metadata for doc in documents]

    self.RAG.add_to_index(
        new_collection=doc_txts,
        new_document_ids=doc_ids,
        new_document_metadatas=doc_metadatas,
    )

delete_documents #

delete_documents(documents: List[Document]) -> None

删除文档。

Source code in llama_index/packs/ragatouille_retriever/base.py
109
110
111
112
113
def delete_documents(self, documents: List[Document]) -> None:
    """删除文档。"""
    doc_ids = [doc.doc_id for doc in documents]

    self.RAG.delete_from_index(document_ids=doc_ids)

get_modules #

get_modules() -> Dict[str, Any]

获取模块。

Source code in llama_index/packs/ragatouille_retriever/base.py
115
116
117
118
119
120
121
122
123
124
def get_modules(self) -> Dict[str, Any]:
    """获取模块。"""
    return {
        "RAG": self.RAG,
        "documents": self.documents,
        "retriever": self.custom_retriever,
        "llm": self.llm,
        "query_engine": self.query_engine,
        "index_path": self.index_path,
    }

run #

run(*args: Any, **kwargs: Any) -> Any

运行流水线。

Source code in llama_index/packs/ragatouille_retriever/base.py
126
127
128
def run(self, *args: Any, **kwargs: Any) -> Any:
    """运行流水线。"""
    return self.query_engine.query(*args, **kwargs)