def standardize_column_names(df, target_columns):
"""
Uses a language model to standardize column names dynamically based on semantic similarity.
Args:
df (pd.DataFrame): The DataFrame whose columns need standardization.
target_columns (list): A list of target column names to map the DataFrame's columns to.
Returns:
pd.DataFrame: DataFrame with standardized column names.
"""
raw_columns = list(df.columns) # Extract the raw column names
raw_columns_str = ", ".join(raw_columns) # Convert to a comma-separated string
target_columns_str = ", ".join(target_columns) # Convert target columns to a string
# Define the LLM prompt
prompt = PromptTemplate(
input_variables=["raw_columns", "target_columns"], # Match keys exactly with dictionary passed to `invoke`
template=(
"You are tasked with standardizing column names. Here are the raw column names:\n"
"{raw_columns}\n"
"And here is the list of target column names to map to:\n"
"{target_columns}\n"
"Provide a mapping of raw column names to target column names as a dictionary in this format:\n"
"{'raw_column': 'target_column', ...}"
),
)
# Initialize LLMChain
chain = LLMChain(llm=llm, prompt=prompt)
try:
# Use `invoke` with correctly matched keys
response = chain.invoke({"raw_columns": raw_columns_str, "target_columns": target_columns_str})
mapping_result = response["text"] # Extract the LLM's generated text
column_mapping = eval(mapping_result) # Convert the string response into a Python dictionary
except Exception as e:
raise ValueError(f"Error in LLM-based column mapping: {e}")
# Apply the generated mapping to rename columns
df.rename(columns=column_mapping, inplace=True)
return df
The above code is projecting an error:
Exception has occurred: ValueError
Error in LLM-based column mapping: Missing some input keys: {"'raw_column'"}
File "/Users/pro/Desktop/Technology/Bicycle AI/Data_analysis_AI.py", line 57, in standardize_column_names
response = chain.invoke({"raw_columns": raw_columns_str, "target_columns": target_columns_str})
I don't know why it is mapping like that one.
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1745637697a4637496.html
评论列表(0条)