from llama_index.core.query_pipeline import (
QueryPipeline as QP,
Link,
InputComponent,
)
from llama_index.experimental.query_engine.pandas import (
PandasInstructionParser,
)
from llama_index.llms.groq import Groq
from llama_index.core import PromptTemplate
import os
import pandas as pd
def query_search_csv(query, file):
# Step 1: Load the CSV file
df = pd.read_csv(file)
# print("CSV File Contents (First 5 Rows):")
# print(df.head())
instruction_str = (
"1. Convert the query to executable Python code using Pandas.\n"
"2. The final line of code should be a Python expression that can be called with the
`eval()` function.\n"
"3. The code should represent a solution to the query.\n"
"4. PRINT ONLY THE EXPRESSION.\n"
"5. Do not quote the expression.\n"
)
pandas_prompt_str = (
"You are working with a pandas dataframe in Python.\n"
"The name of the dataframe is `df`.\n"
"This is the result of `print(df.head())`:\n"
"{df_str}\n\n"
"Follow these instructions:\n"
"{instruction_str}\n"
"Query: {query_str}\n\n"
"Expression:"
)
response_synthesis_prompt_str = (
"Given an input question, synthesize a response from the query results.\n"
"Query: {query_str}\n\n"
"Pandas Instructions (optional):\n{pandas_instructions}\n\n"
"Pandas Output: {pandas_output}\n\n"
"Response: "
)
# Step 2: Generate the Pandas expression
pandas_prompt =
PromptTemplate(pandas_prompt_str).partial_format(instruction_str=instruction_str,
df_str=df.head(5))
llm = Groq(api_key="API_KEY", model='llama3-70b-8192', temperature=0)
pandas_expression = llmplete(pandas_prompt.format(query_str=query)).text
# print("Generated Pandas Expression:")
# print(pandas_expression)
# Step 3: Execute the Pandas expression
pandas_output_parser = PandasInstructionParser(df)
pandas_output = pandas_output_parser.parse(pandas_expression)
# print("Pandas Output:")
# print(pandas_output)
# Step 4: Generate the final response
response_synthesis_prompt = PromptTemplate(response_synthesis_prompt_str).format(
query_str=query,
pandas_instructions=pandas_expression,
pandas_output=pandas_output
)
final_response = llmplete(response_synthesis_prompt).text
print(final_response)
return final_response
for csv_file in os.listdir("./structured"):
if csv_file.endswith(".csv"):
df = os.path.join("./structured", csv_file)
try:
query_search_csv("What is the lowest rate?and for which product?",df)
except Exception as e:
print("No data")
This code works fine but my question is why I'm getting the different output for the same files in the Google Colab and on my laptop.
Here is output of google colab :
Based on the query results, the lowest rate is associated with the CHIP product. The lowest rate is 0.00312 for the product "QC ISOPROPYL ALCOHOL 91%". Based on the query results, the lowest rate is 0, and it is for the Medicaid product in Massachusetts.
Here is the output of From my laptop :
Based on the query results, the lowest rate is for the CHIP (Children's Health Insurance Program) in Colorado, with a rate of True (which likely represents a Boolean. value indicating availability or coverage, rather than a numerical rate). There is no Medicaid rate available for this state. The lowest rate is for the product AIRDUO RESPICLICK 232-14 MCG. Based on the query results, the lowest rate is 0, and it is for the product Medicaid in the state of Massachusetts.
Colab output is accurate but my laptop's output is not accurate according to me i don't know why it is like this.
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1745093414a4610828.html
评论列表(0条)