Pandas AI

PandasAI阅读器 #

基类: BaseReader

Pandas AI 读取器。

轻量级封装于 https://github.com/gventuri/pandas-ai。

参数:

名称	类型	描述	默认值
`llm`	`Optional[llm]`	使用的LLM。默认为None。	required
`concat_rows`	`bool`	是否将所有行合并为一个文档。如果设为False，将为每一行创建一个文档。默认为True。	`True`
`col_joiner`	`str`	用于每行列连接的分隔符。默认设置为", "。	`', '`
`row_joiner`	`str`	用于连接每行的分隔符。仅在`concat_rows=True`时使用。默认为"\n"。	`'\n'`
`pandas_config`	`dict`	Options for the `pandas.read_csv` function call. Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html for more information. Set to empty dict by default, this means pandas will try to figure out the separators, table head, etc. on its own.	`{}`

Source code in llama-index-integrations/readers/llama-index-readers-pandas-ai/llama_index/readers/pandas_ai/base.py

class PandasAIReader(BaseReader):
    r"""
    Pandas AI reader.

    Light wrapper around https://github.com/gventuri/pandas-ai.

    Args:
        llm (Optional[pandas.llm]): LLM to use. Defaults to None.
        concat_rows (bool): whether to concatenate all rows into one document.
            If set to False, a Document will be created for each row.
            True by default.

        col_joiner (str): Separator to use for joining cols per row.
            Set to ", " by default.

        row_joiner (str): Separator to use for joining each row.
            Only used when `concat_rows=True`.
            Set to "\n" by default.

        pandas_config (dict): Options for the `pandas.read_csv` function call.
            Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
            for more information.
            Set to empty dict by default, this means pandas will try to figure
            out the separators, table head, etc. on its own.

    """

    def __init__(
        self,
        pandas_llm: Optional[PandasLLM] = None,
        concat_rows: bool = True,
        col_joiner: str = ", ",
        row_joiner: str = "\n",
        pandas_config: dict = {},
    ) -> None:
        """Init params."""
        self._llm = pandas_llm or OpenAI()
        self._pandasai_config = {"llm": self._llm}

        self._concat_rows = concat_rows
        self._col_joiner = col_joiner
        self._row_joiner = row_joiner
        self._pandas_config = pandas_config

    def run_pandas_ai(
        self,
        initial_df: pd.DataFrame,
        query: str,
        is_conversational_answer: bool = False,
    ) -> Any:
        """Load dataframe."""
        smart_df = SmartDataframe(initial_df, config=self._pandasai_config)
        return smart_df.chat(query=query)

    def load_data(
        self,
        initial_df: pd.DataFrame,
        query: str,
        is_conversational_answer: bool = False,
    ) -> List[Document]:
        """Parse file."""
        result = self.run_pandas_ai(
            initial_df, query, is_conversational_answer=is_conversational_answer
        )
        if is_conversational_answer:
            return [Document(text=result)]
        else:
            if isinstance(result, (np.generic)):
                result = pd.Series(result)
            elif isinstance(result, (pd.Series, pd.DataFrame)):
                pass
            else:
                raise ValueError(f"Unexpected type for result: {type(result)}")
            # if not conversational answer, use Pandas CSV Reader
            reader = PandasCSVReader(
                concat_rows=self._concat_rows,
                col_joiner=self._col_joiner,
                row_joiner=self._row_joiner,
                pandas_config=self._pandas_config,
            )

            with TemporaryDirectory() as tmpdir:
                outpath = Path(tmpdir) / "out.csv"
                with outpath.open("w") as f:
                    # TODO: add option to specify index=False
                    result.to_csv(f, index=False)

                return reader.load_data(outpath)

run_pandas_ai #

run_pandas_ai(initial_df: DataFrame, query: str, is_conversational_answer: bool = False) -> Any

加载数据框。

Source code in llama-index-integrations/readers/llama-index-readers-pandas-ai/llama_index/readers/pandas_ai/base.py

def run_pandas_ai(
    self,
    initial_df: pd.DataFrame,
    query: str,
    is_conversational_answer: bool = False,
) -> Any:
    """Load dataframe."""
    smart_df = SmartDataframe(initial_df, config=self._pandasai_config)
    return smart_df.chat(query=query)

加载数据 #

load_data(initial_df: DataFrame, query: str, is_conversational_answer: bool = False) -> List[Document]

解析文件。

Source code in llama-index-integrations/readers/llama-index-readers-pandas-ai/llama_index/readers/pandas_ai/base.py

def load_data(
    self,
    initial_df: pd.DataFrame,
    query: str,
    is_conversational_answer: bool = False,
) -> List[Document]:
    """Parse file."""
    result = self.run_pandas_ai(
        initial_df, query, is_conversational_answer=is_conversational_answer
    )
    if is_conversational_answer:
        return [Document(text=result)]
    else:
        if isinstance(result, (np.generic)):
            result = pd.Series(result)
        elif isinstance(result, (pd.Series, pd.DataFrame)):
            pass
        else:
            raise ValueError(f"Unexpected type for result: {type(result)}")
        # if not conversational answer, use Pandas CSV Reader
        reader = PandasCSVReader(
            concat_rows=self._concat_rows,
            col_joiner=self._col_joiner,
            row_joiner=self._row_joiner,
            pandas_config=self._pandas_config,
        )

        with TemporaryDirectory() as tmpdir:
            outpath = Path(tmpdir) / "out.csv"
            with outpath.open("w") as f:
                # TODO: add option to specify index=False
                result.to_csv(f, index=False)

            return reader.load_data(outpath)