python - NER Results Empty in Query Processing Despite Parallel Intent and Entity Extraction (Showing empty NER Results) - Stack

This code is part of an API endpoint that processes a user's query in a conversational system. It

This code is part of an API endpoint that processes a user's query in a conversational system. It tracks the execution time for each step, including fetching user data, refining the query based on previous interactions, and extracting intents and entities. The NER (Named Entity Recognition) step uses an XLM-RoBERTa model to extract entities from the query, and this process is performed in parallel with intent analysis via OpenAI's service. The results from both steps are combined and used to filter intents and entities with a confidence score greater than 0.6. It then generates embeddings for the query, performs a search in a vector database, and processes the search results. The response is then generated, and the chat history is saved for future use.

the output currently I am getting is:

Intent Analysis: {'intents': [{'name': 'Search', 'score': 0.9}, {'name': 'Filter', 'score': 0.7}], 'entities': [{'name': 'People', 'score': 0.9}], 'keywords': ['data science', 'people', 'working', 'data'], 'person': None} NER Results: []

the code is:

@router.post("/v2", response_model=QueryResponse) async def query_handler(payload: QueryRequest, request: Request): start_time = time.time() # Track overall execution time logger.info(f"Received query: {payload.query} from user {payload.user_id}")

try:
    func_start = time.time()
    user_data = request.state.user_data
    upn = user_data.get('upn')
    query = payload.query
    logger.info(f"Fetched user data in {time.time() - func_start:.4f} seconds")
    
    # Step 1 : Get Chat data from redis
    func_start = time.time()
    user_chats = redis_client.get_last_chats(upn)
    logger.info(f"Fetched user chat data in {time.time() - func_start:.4f} seconds")
    
    # Step 2 : Refine the query using last chat of users
    refined_query = query
    if user_chats:
        func_start = time.time()
        refined_query = query_refinement_service.refine_query(query, user_chats)
        logger.info(f"Refined query: {refined_query} in {time.time() - func_start:.4f} seconds")
    else:
        logger.info("No user chats available, using the original query.")

    refined_query = clean_text(refined_query)
    print(f"Cleaned Query: {refined_query}")  # Debugging print

    # Step 3 : Get Intent & Entity parallel
    func_start = time.time()
    ner_entities = []  # Initialize to avoid "not associated with a value" error
    modelpath = r"**<Blurred Path>**"  # Blurred model path
    model = XLMRobertaForTokenClassification.from_pretrained(modelpath)
    tokenizer = XLMRobertaTokenizerFast.from_pretrained(modelpath)
    id2label = model.config.id2label

    async def get_ner_results(text):
        print(f"NER Model received query: {text}")  
        tokens = tokenizer(text, return_tensors="pt", return_offsets_mapping=True, truncation=True)
        offset_mapping = tokens.pop("offset_mapping") 

        with torch.no_grad():
            outputs = model(**tokens)
            logits = outputs.logits  
            predictions = torch.argmax(logits, dim=2).squeeze().tolist()
            confidences = torch.nn.functional.softmax(logits, dim=2)

            entities = []
            for i, (token_id, label_id) in enumerate(zip(tokens.input_ids.squeeze().tolist(), predictions)):
                if label_id != 0:
                    entity_text = tokenizer.decode([token_id], skip_special_tokens=True)  
                    confidence_score = confidences[0, i, label_id].item()
                    entity_label = id2label.get(label_id, "Unknown")

                    if entity_label.startswith('B-') or entity_label.startswith('I-'):
                        entity_label = entity_label[2:]  
                    entities.append({"entity": entity_label, "text": entity_text, "confidence": round(confidence_score, 4)})

        print(f"NER Results: {entities}")  # Debugging print
        return entities
    
    if not Config.IS_DATABRICKS_NER_ENABLED:
        intent_analysis = intent_entity_analyzer.analyze_query(refined_query)  # Get list of entities & intents from OpenAI
    else:
        intent_analysis, ner_entities = await asyncio.gather(
            asyncio.to_thread(intent_entity_analyzer.analyze_query_v2, refined_query),  # Get intents from OpenAI
            get_ner_results(refined_query)  # Extract entities using XLM-RoBERTa
        )
        intent_analysis["entities"] = ner_entities  # Attach extracted entities

    # Print the results to debug
    print("Intent Analysis:", intent_analysis)
    print("NER Results:", ner_entities)
    logger.info(f"Intent & Entity analysis completed in {time.time() - func_start:.4f} seconds")

    # Step 5: Azure Search Filters here there is no external call like azure
    func_start = time.time()
    acr_filters = ner_analyzer.build_azure_search_filter(intent_analysis) if intent_analysis.get("intents") else None
    logger.info(f"Built Azure search filters in {time.time() - func_start:.4f} seconds")
    logger.info(f"Azure search filters is : ${acr_filters}")

    intents = intent_analysis.get('intents', [])
    entities = intent_analysis.get('entities', [])
    keywords = intent_analysis.get('keywords', [])

    # Filtering intents & entities
    func_start = time.time()
    filtered_intents = [intent for intent in intents if intent["score"] >= 0.6]
    filtered_entities = [entity for entity in entities if entity["score"] >= 0.6]
    logger.info(f"Filtered intents/entities in {time.time() - func_start:.4f} seconds")

    # Generate Embeddings
    func_start = time.time()
    logger.info("Generating embeddings for the refined query")
    query_embedding = embedding_generator.generate_embedding(refined_query)["embedding"]
    logger.info(f"Generated embeddings in {time.time() - func_start:.4f} seconds")

    # Perform Search
    func_start = time.time()
    search_results = search_vector_documents(query=refined_query, embedding=query_embedding, filter_expression=acr_filters)
    processed_results = url_generator.process_results(search_results)
    logger.info(f"Search & processing completed in {time.time() - func_start:.4f} seconds")

    if not processed_results:
        logger.warning(f"No results found for query: {query}")
        raise HTTPException(status_code=404, detail="No search results found.")

    top_intent = filtered_intents[0]["name"] if filtered_intents else "Search"

    # Intent Handling
    func_start = time.time()
    response = intent_agent.handle_query(top_intent, query, processed_results)
    logger.info(f"Handled intent {top_intent} in {time.time() - func_start:.4f} seconds")

    # Save & Retrieve Chat History
    func_start = time.time()
    chat_data = {
        "query": query,
        "intents": intents,
        "entities": entities,
        "keywords": keywords,
        "response": response["results"]
    }

    save_chat_task = asyncio.to_thread(redis_client.save_chat, upn, chat_data)
    generate_response_task = asyncio.to_thread(web_response_agent.generate_response, query, user_chats, processed_results)
    saved_chat, final_response = await asyncio.gather(save_chat_task, generate_response_task)
    logger.info(f"Stored chat history and generated web response in {time.time() - func_start:.4f} seconds")

    if isinstance(final_response, str):
        func_start = time.time()
        final_response = sanitize_json_response(final_response)
        final_response["response"]["summary"] = generate_summary(final_response, filtered_intents, filtered_entities)
        logger.info(f"Generated summary in {time.time() - func_start:.4f} seconds")
        logger.info(f"Total execution time: {time.time() - start_time:.4f} seconds")
        return QueryResponse(response=final_response["response"], intent=filtered_intents, entity=filtered_entities, search_results=processed_results)

    logger.info(f"Total execution time: {time.time() - start_time:.4f} seconds")
    return {"intent": "Unknown", "message": "No suitable handler for this intent."}

except json.JSONDecodeError as e:
    logger.error(f"JSON decoding failed: {e}")
    raise HTTPException(status_code=500, detail="Invalid JSON response from web_response_agent")
except Exception as e:
    logger.error(f"Unexpected error occurred in query_handler: {e}")
    if isinstance(e, HTTPException):
        raise e 
    else:
        raise HTTPException(status_code=500, detail=str(e))

Let's say refined_query is: 'Show me people working on data science in XYZ Company?' I want output to be like: Intent Analysis: Intent Analysis: {'intents': [{'name': 'Search', 'score': 0.9}, {'name': 'Filter', 'score': 0.7}], 'entities': [{'name': 'People', 'score': 0.9}], 'keywords': ['data science', 'XYZ', 'people', 'working', 'data'], 'person': None} NER Results: [{'entity': 'People', 'score': 0.9999713}]

Can anybody help what is the issue and why I am getting empty NER Results. I think there is some change that needs to be done in Step3.

发布者:admin,转转请注明出处:http://www.yc00.com/questions/1745047413a4608182.html

相关推荐

发表回复

评论列表(0条)

  • 暂无评论

联系我们

400-800-8888

在线咨询: QQ交谈

邮件:admin@example.com

工作时间:周一至周五,9:30-18:30,节假日休息

关注微信