Skip to content

Commit 6acc475

Browse files
Metric table issues (#921)
* fixed the table issues * hiding context recall check * eval error with gemini resolved * context reacall metric fix --------- Co-authored-by: kaustubh-darekar <[email protected]>
1 parent 8c1348a commit 6acc475

File tree

5 files changed

+45
-55
lines changed

5 files changed

+45
-55
lines changed

backend/src/ragas_eval.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from datasets import Dataset
66
from dotenv import load_dotenv
77
from ragas import evaluate
8-
from ragas.metrics import answer_relevancy, faithfulness
8+
from ragas.metrics import answer_relevancy, faithfulness,context_entity_recall
99
from src.shared.common_fn import load_embedding_model
1010
from ragas.dataset_schema import SingleTurnSample
1111
from ragas.metrics import RougeScore, SemanticSimilarity, ContextEntityRecall
@@ -24,25 +24,29 @@ def get_ragas_metrics(question: str, context: list, answer: list, model: str):
2424
try:
2525
start_time = time.time()
2626
dataset = Dataset.from_dict(
27-
{"question": [question] * len(answer), "answer": answer, "contexts": [[ctx] for ctx in context]}
27+
{"question": [question] * len(answer),"reference": answer, "answer": answer, "contexts": [[ctx] for ctx in context]}
2828
)
2929
logging.info("Evaluation dataset created successfully.")
3030
if ("diffbot" in model) or ("ollama" in model):
3131
raise ValueError(f"Unsupported model for evaluation: {model}")
32+
elif ("gemini" in model):
33+
llm, model_name = get_llm(model=model)
34+
llm = LangchainLLMWrapper(llm,is_finished_parser=custom_is_finished_parser)
3235
else:
3336
llm, model_name = get_llm(model=model)
37+
llm = LangchainLLMWrapper(llm)
3438

3539
logging.info(f"Evaluating with model: {model_name}")
3640

3741
score = evaluate(
3842
dataset=dataset,
39-
metrics=[faithfulness, answer_relevancy],
43+
metrics=[faithfulness, answer_relevancy,context_entity_recall],
4044
llm=llm,
4145
embeddings=EMBEDDING_FUNCTION,
4246
)
4347

4448
score_dict = (
45-
score.to_pandas()[["faithfulness", "answer_relevancy"]]
49+
score.to_pandas()[["faithfulness", "answer_relevancy","context_entity_recall"]]
4650
.fillna(0)
4751
.round(4)
4852
.to_dict(orient="list")
@@ -67,13 +71,10 @@ async def get_additional_metrics(question: str, contexts: list, answers: list, r
6771
if ("diffbot" in model_name) or ("ollama" in model_name):
6872
raise ValueError(f"Unsupported model for evaluation: {model_name}")
6973
llm, model_name = get_llm(model=model_name)
70-
ragas_llm = LangchainLLMWrapper(llm)
7174
embeddings = EMBEDDING_FUNCTION
7275
embedding_model = LangchainEmbeddingsWrapper(embeddings=embeddings)
7376
rouge_scorer = RougeScore()
7477
semantic_scorer = SemanticSimilarity()
75-
entity_recall_scorer = ContextEntityRecall()
76-
entity_recall_scorer.llm = ragas_llm
7778
semantic_scorer.embeddings = embedding_model
7879
metrics = []
7980
for response, context in zip(answers, contexts):
@@ -82,18 +83,35 @@ async def get_additional_metrics(question: str, contexts: list, answers: list, r
8283
rouge_score = round(rouge_score,4)
8384
semantic_score = await semantic_scorer.single_turn_ascore(sample)
8485
semantic_score = round(semantic_score, 4)
85-
if "gemini" in model_name:
86-
entity_recall_score = "Not Available"
87-
else:
88-
entity_sample = SingleTurnSample(reference=reference, retrieved_contexts=[context])
89-
entity_recall_score = await entity_recall_scorer.single_turn_ascore(entity_sample)
90-
entity_recall_score = round(entity_recall_score, 4)
9186
metrics.append({
9287
"rouge_score": rouge_score,
9388
"semantic_score": semantic_score,
94-
"context_entity_recall_score": entity_recall_score
9589
})
9690
return metrics
9791
except Exception as e:
9892
logging.exception("Error in get_additional_metrics")
99-
return {"error": str(e)}
93+
return {"error": str(e)}
94+
95+
96+
def custom_is_finished_parser(response):
97+
is_finished_list = []
98+
for g in response.flatten():
99+
resp = g.generations[0][0]
100+
if resp.generation_info is not None:
101+
if resp.generation_info.get("finish_reason") is not None:
102+
is_finished_list.append(
103+
resp.generation_info.get("finish_reason") == "STOP"
104+
)
105+
106+
elif (
107+
isinstance(resp, ChatGeneration)
108+
and t.cast(ChatGeneration, resp).message is not None
109+
):
110+
resp_message: BaseMessage = t.cast(ChatGeneration, resp).message
111+
if resp_message.response_metadata.get("finish_reason") is not None:
112+
is_finished_list.append(
113+
resp_message.response_metadata.get("finish_reason") == "STOP"
114+
)
115+
else:
116+
is_finished_list.append(True)
117+
return all(is_finished_list)

frontend/src/components/ChatBot/ChatModesSwitch.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ export default function ChatModesSwitch({
2929
onClick={() => switchToOtherMode(currentModeIndex - 1)}
3030
ariaLabel='left'
3131
>
32-
<ChevronLeftIconSolid className='n-size-token-7' />
32+
<ChevronLeftIconSolid className='n-size-token-4' />
3333
</IconButton>
3434
<TipWrapper tooltip={chatmodetoshow} placement='top'>
3535
<div
@@ -45,7 +45,7 @@ export default function ChatModesSwitch({
4545
onClick={() => switchToOtherMode(currentModeIndex + 1)}
4646
ariaLabel='right'
4747
>
48-
<ChevronRightIconSolid className='n-size-token-7' />
48+
<ChevronRightIconSolid className='n-size-token-4' />
4949
</IconButton>
5050
</Flex>
5151
);

frontend/src/components/ChatBot/CommonChatActions.tsx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ export default function CommonActions({
4343
disabled={chat.isTyping || chat.isLoading}
4444
aria-label='copy text'
4545
>
46-
<ClipboardDocumentIconOutline />
46+
<ClipboardDocumentIconOutline className='n-size-token-4' />
4747
</IconButtonWithToolTip>
4848
<IconButtonWithToolTip
4949
placement='top'
@@ -54,7 +54,11 @@ export default function CommonActions({
5454
label={chat.speaking ? 'stop speaking' : 'text to speech'}
5555
aria-label='speech'
5656
>
57-
{chat.speaking ? <SpeakerXMarkIconOutline /> : <SpeakerWaveIconOutline />}
57+
{chat.speaking ? (
58+
<SpeakerXMarkIconOutline className='n-size-token-4' />
59+
) : (
60+
<SpeakerWaveIconOutline className='n-size-token-4' />
61+
)}
5862
</IconButtonWithToolTip>
5963
</>
6064
);

frontend/src/components/ChatBot/MetricsTab.tsx

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -119,23 +119,7 @@ function MetricsTab({
119119
}}
120120
/>
121121
),
122-
PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => {
123-
return (
124-
<DataGridComponents.PaginationNumericButton
125-
{...restProps}
126-
isSelected={isSelected}
127-
innerProps={{
128-
...innerProps,
129-
style: {
130-
...(isSelected && {
131-
backgroundSize: '200% auto',
132-
borderRadius: '10px',
133-
}),
134-
},
135-
}}
136-
/>
137-
);
138-
},
122+
Navigation: null,
139123
}}
140124
isKeyboardNavigable={false}
141125
/>

frontend/src/components/ChatBot/MultiModeMetrics.tsx

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ export default function MultiModeMetrics({
102102
</Flex>
103103
),
104104
}),
105-
columnHelper.accessor((row) => row.context_entity_recall_score as number, {
105+
columnHelper.accessor((row) => row.context_entity_recall as number, {
106106
id: 'Entity Recall Score',
107107
cell: (info) => {
108108
const value = isNaN(info.getValue()) ? 'N.A' : info.getValue()?.toFixed(2);
@@ -201,7 +201,7 @@ export default function MultiModeMetrics({
201201
});
202202
useEffect(() => {
203203
if (isWithAdditionalMetrics === false) {
204-
table.setColumnVisibility({ 'Recall Score': false, 'Semantic Score': false, 'Rouge Score': false });
204+
table.setColumnVisibility({ 'Semantic Score': false, 'Rouge Score': false });
205205
} else {
206206
table.resetColumnVisibility(true);
207207
}
@@ -235,23 +235,7 @@ export default function MultiModeMetrics({
235235
}}
236236
/>
237237
),
238-
PaginationNumericButton: ({ isSelected, innerProps, ...restProps }) => {
239-
return (
240-
<DataGridComponents.PaginationNumericButton
241-
{...restProps}
242-
isSelected={isSelected}
243-
innerProps={{
244-
...innerProps,
245-
style: {
246-
...(isSelected && {
247-
backgroundSize: '200% auto',
248-
borderRadius: '10px',
249-
}),
250-
},
251-
}}
252-
/>
253-
);
254-
},
238+
Navigation: null,
255239
}}
256240
isKeyboardNavigable={false}
257241
/>

0 commit comments

Comments
 (0)