@@ -60,8 +60,8 @@ class PairwiseStringEvalChain(LLMChain):
6060 >>> chain = PairwiseStringEvalChain.from_llm(llm=llm)
6161 >>> result = chain.evaluate_string_pairs(
6262 ... input = "What is the chemical formula for water?",
63- ... output_a = "H2O",
64- ... output_b = (
63+ ... prediction = "H2O",
64+ ... prediction_b = (
6565 ... "The chemical formula for water is H2O, which means"
6666 ... " there are two hydrogen atoms and one oxygen atom."
6767 ... referenc = "The chemical formula for water is H2O.",
@@ -101,7 +101,7 @@ def from_llm(
101101 Returns:
102102 PairwiseStringEvalChain: The initialized PairwiseStringEvalChain.
103103 """
104- expected_input_vars = {"output_a " , "output_b " , "input" }
104+ expected_input_vars = {"prediction " , "prediction_b " , "input" }
105105 if prompt is None :
106106 if require_reference :
107107 expected_input_vars .add ("reference" )
@@ -121,11 +121,11 @@ def from_llm(
121121 return cls (llm = llm , prompt = prompt_ , ** kwargs )
122122
123123 def _prepare_input (
124- self , output_a : str , output_b : str , input : str , reference : Optional [str ]
124+ self , prediction : str , prediction_b : str , input : str , reference : Optional [str ]
125125 ) -> dict :
126126 input_ = {
127- "output_a " : output_a ,
128- "output_b " : output_b ,
127+ "prediction " : prediction ,
128+ "prediction_b " : prediction_b ,
129129 "input" : input ,
130130 }
131131 if reference is not None and "reference" in self .prompt .input_variables :
@@ -135,8 +135,8 @@ def _prepare_input(
135135 def evaluate_string_pairs (
136136 self ,
137137 * ,
138- output_a : str ,
139- output_b : str ,
138+ prediction : str ,
139+ prediction_b : str ,
140140 input : str ,
141141 reference : Optional [str ] = None ,
142142 callbacks : Callbacks = None ,
@@ -145,8 +145,8 @@ def evaluate_string_pairs(
145145 """Evaluate whether output A is preferred to output B.
146146
147147 Args:
148- output_a (str): The output string from the first model.
149- output_b (str): The output string from the second model.
148+ prediction (str): The output string from the first model.
149+ prediction_b (str): The output string from the second model.
150150 input (str): The input or task string.
151151 callbacks (Callbacks, optional): The callbacks to use.
152152 reference (str, optional): The reference string, if any.
@@ -160,7 +160,7 @@ def evaluate_string_pairs(
160160 - score: The preference score, which is 1 for 'A', 0 for 'B',
161161 and 0.5 for None.
162162 """
163- input_ = self ._prepare_input (output_a , output_b , input , reference )
163+ input_ = self ._prepare_input (prediction , prediction_b , input , reference )
164164 result = self (
165165 inputs = input_ ,
166166 callbacks = callbacks ,
@@ -171,8 +171,8 @@ def evaluate_string_pairs(
171171 async def aevaluate_string_pairs (
172172 self ,
173173 * ,
174- output_a : str ,
175- output_b : str ,
174+ prediction : str ,
175+ prediction_b : str ,
176176 input : str ,
177177 reference : Optional [str ] = None ,
178178 callbacks : Callbacks = None ,
@@ -181,8 +181,8 @@ async def aevaluate_string_pairs(
181181 """Asynchronously evaluate whether output A is preferred to output B.
182182
183183 Args:
184- output_a (str): The output string from the first model.
185- output_b (str): The output string from the second model.
184+ prediction (str): The output string from the first model.
185+ prediction_b (str): The output string from the second model.
186186 input (str): The input or task string.
187187 callbacks (Callbacks, optional): The callbacks to use.
188188 reference (str, optional): The reference string, if any.
@@ -196,7 +196,7 @@ async def aevaluate_string_pairs(
196196 - score: The preference score, which is 1 for 'A', 0 for 'B',
197197 and 0.5 for None.
198198 """
199- input_ = self ._prepare_input (output_a , output_b , input , reference )
199+ input_ = self ._prepare_input (prediction , prediction_b , input , reference )
200200 result = await self .acall (
201201 inputs = input_ ,
202202 callbacks = callbacks ,
0 commit comments