importpandasaspdfromllama_index.experimental.query_engine.pandasimportPandasQueryEnginedf=pd.DataFrame({"city":["Toronto","Tokyo","Berlin"],"population":[2930000,13960000,3645000]})query_engine=PandasQueryEngine(df=df,verbose=True)response=query_engine.query("What is the population of Tokyo?")
Source code in llama-index-experimental/llama_index/experimental/query_engine/pandas/pandas_query_engine.py
classPandasQueryEngine(BaseQueryEngine):""" Pandas query engine. Convert natural language to Pandas python code. WARNING: This tool provides the Agent access to the `eval` function. Arbitrary code execution is possible on the machine running this tool. This tool is not recommended to be used in a production setting, and would require heavy sandboxing or virtual machines Args: df (pd.DataFrame): Pandas dataframe to use. instruction_str (Optional[str]): Instruction string to use. instruction_parser (Optional[PandasInstructionParser]): The output parser that takes the pandas query output string and returns a string. It defaults to PandasInstructionParser and takes pandas DataFrame, and any output kwargs as parameters. eg.kwargs["max_colwidth"] = [int] is used to set the length of text that each column can display during str(df). Set it to a higher number if there is possibly long text in the dataframe. pandas_prompt (Optional[BasePromptTemplate]): Pandas prompt to use. output_kwargs (dict): Additional output processor kwargs for the PandasInstructionParser. head (int): Number of rows to show in the table context. verbose (bool): Whether to print verbose output. llm (Optional[LLM]): Language model to use. synthesize_response (bool): Whether to synthesize a response from the query results. Defaults to False. response_synthesis_prompt (Optional[BasePromptTemplate]): A Response Synthesis BasePromptTemplate to use for the query. Defaults to DEFAULT_RESPONSE_SYNTHESIS_PROMPT. Examples: `pip install llama-index-experimental` ```python import pandas as pd from llama_index.experimental.query_engine.pandas import PandasQueryEngine df = pd.DataFrame( { "city": ["Toronto", "Tokyo", "Berlin"], "population": [2930000, 13960000, 3645000] } ) query_engine = PandasQueryEngine(df=df, verbose=True) response = query_engine.query("What is the population of Tokyo?") ``` """def__init__(self,df:pd.DataFrame,instruction_str:Optional[str]=None,instruction_parser:Optional[PandasInstructionParser]=None,pandas_prompt:Optional[BasePromptTemplate]=None,output_kwargs:Optional[dict]=None,head:int=5,verbose:bool=False,llm:Optional[LLM]=None,synthesize_response:bool=False,response_synthesis_prompt:Optional[BasePromptTemplate]=None,**kwargs:Any,)->None:"""Initialize params."""self._df=dfself._head=headself._pandas_prompt=pandas_promptorDEFAULT_PANDAS_PROMPTself._instruction_str=instruction_strorDEFAULT_INSTRUCTION_STRself._instruction_parser=instruction_parserorPandasInstructionParser(df,output_kwargsor{})self._verbose=verboseself._llm=llmorSettings.llmself._synthesize_response=synthesize_responseself._response_synthesis_prompt=(response_synthesis_promptorDEFAULT_RESPONSE_SYNTHESIS_PROMPT)super().__init__(callback_manager=Settings.callback_manager)def_get_prompt_modules(self)->PromptMixinType:"""Get prompt sub-modules."""return{}def_get_prompts(self)->Dict[str,Any]:"""Get prompts."""return{"pandas_prompt":self._pandas_prompt,"response_synthesis_prompt":self._response_synthesis_prompt,}def_update_prompts(self,prompts:PromptDictType)->None:"""Update prompts."""if"pandas_prompt"inprompts:self._pandas_prompt=prompts["pandas_prompt"]if"response_synthesis_prompt"inprompts:self._response_synthesis_prompt=prompts["response_synthesis_prompt"]@classmethoddeffrom_index(cls,index:PandasIndex,**kwargs:Any)->"PandasQueryEngine":logger.warning("PandasIndex is deprecated. ""Directly construct PandasQueryEngine with df instead.")returncls(df=index.df,**kwargs)def_get_table_context(self)->str:"""Get table context."""returnstr(self._df.head(self._head))def_query(self,query_bundle:QueryBundle)->Response:"""Answer a query."""context=self._get_table_context()pandas_response_str=self._llm.predict(self._pandas_prompt,df_str=context,query_str=query_bundle.query_str,instruction_str=self._instruction_str,)ifself._verbose:print_text(f"> Pandas Instructions:\n```\n{pandas_response_str}\n```\n")pandas_output=self._instruction_parser.parse(pandas_response_str)ifself._verbose:print_text(f"> Pandas Output: {pandas_output}\n")response_metadata={"pandas_instruction_str":pandas_response_str,"raw_pandas_output":pandas_output,}ifself._synthesize_response:response_str=str(self._llm.predict(self._response_synthesis_prompt,query_str=query_bundle.query_str,pandas_instructions=pandas_response_str,pandas_output=pandas_output,))else:response_str=str(pandas_output)returnResponse(response=response_str,metadata=response_metadata)asyncdef_aquery(self,query_bundle:QueryBundle)->Response:returnself._query(query_bundle)