|
| 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates. |
| 2 | +# All rights reserved. |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +import pytest |
| 8 | +from torchtune.data import ChatMLFormat, Llama2ChatFormat, Message, MistralChatFormat |
| 9 | + |
| 10 | +# Taken from Open-Orca/SlimOrca-Dedup on HuggingFace: |
| 11 | +# https://huggingface.co/datasets/Open-Orca/SlimOrca-Dedup |
| 12 | +CHAT_SAMPLE = [ |
| 13 | + Message( |
| 14 | + role="system", |
| 15 | + content="You are an AI assistant. User will you give you a task. " |
| 16 | + "Your goal is to complete the task as faithfully as you can. " |
| 17 | + "While performing the task think step-by-step and justify your steps.", |
| 18 | + ), |
| 19 | + Message( |
| 20 | + role="user", |
| 21 | + content="Please briefly summarize this news article:\n\nAOL.com Video - " |
| 22 | + "Father Lets 8-Year-Old Drive On Icy Road\n\nDescription:Would you let your " |
| 23 | + "8-year-old drive your car? How about on an icy road? Well one father in " |
| 24 | + "Russia did just that, and recorded the entire thing. To her credit, the " |
| 25 | + "child seemed to be doing a great job. (0:44)\n\nTags: 8-year-old driver , " |
| 26 | + "caught on camera , child driver , pix11\n\nSummary:", |
| 27 | + ), |
| 28 | + Message( |
| 29 | + role="assistant", |
| 30 | + content="A father in Russia allowed his 8-year-old child to drive his car " |
| 31 | + "on an icy road and recorded the event. The child appeared to be handling the " |
| 32 | + "situation well, showcasing their driving skills despite the challenging conditions.", |
| 33 | + ), |
| 34 | +] |
| 35 | + |
| 36 | + |
| 37 | +def _assert_dialogue_equal(actual, expected): |
| 38 | + assert len(actual) == len(expected) |
| 39 | + for i in range(len(actual)): |
| 40 | + assert actual[i].role == expected[i].role |
| 41 | + assert actual[i].content == expected[i].content |
| 42 | + |
| 43 | + |
| 44 | +class TestLlama2ChatFormat: |
| 45 | + expected_dialogue = [ |
| 46 | + Message( |
| 47 | + role="user", |
| 48 | + content="[INST] <<SYS>>\nYou are an AI assistant. User will you give you a task. " |
| 49 | + "Your goal is to complete the task as faithfully as you can. While performing " |
| 50 | + "the task think step-by-step and justify your steps.\n<</SYS>>\n\nPlease " |
| 51 | + "briefly summarize this news article:\n\nAOL.com Video - Father Lets 8-Year-Old " |
| 52 | + "Drive On Icy Road\n\nDescription:Would you let your 8-year-old drive your car? " |
| 53 | + "How about on an icy road? Well one father in Russia did just that, and recorded " |
| 54 | + "the entire thing. To her credit, the child seemed to be doing a great job. " |
| 55 | + "(0:44)\n\nTags: 8-year-old driver , caught on camera , child driver , pix11\n\n" |
| 56 | + "Summary: [/INST] ", |
| 57 | + ), |
| 58 | + Message( |
| 59 | + role="assistant", |
| 60 | + content="A father in Russia allowed his 8-year-old child to drive his car on an " |
| 61 | + "icy road and recorded the event. The child appeared to be handling the situation well, " |
| 62 | + "showcasing their driving skills despite the challenging conditions.", |
| 63 | + ), |
| 64 | + ] |
| 65 | + |
| 66 | + def test_format(self): |
| 67 | + actual = Llama2ChatFormat.format(CHAT_SAMPLE) |
| 68 | + _assert_dialogue_equal(actual, self.expected_dialogue) |
| 69 | + |
| 70 | + |
| 71 | +class TestMistralChatFormat: |
| 72 | + expected_dialogue = [ |
| 73 | + Message( |
| 74 | + role="user", |
| 75 | + content="[INST] Please briefly summarize this news article:\n\nAOL.com Video - Father Lets 8-Year-Old " |
| 76 | + "Drive On Icy Road\n\nDescription:Would you let your 8-year-old drive your car? " |
| 77 | + "How about on an icy road? Well one father in Russia did just that, and recorded " |
| 78 | + "the entire thing. To her credit, the child seemed to be doing a great job. " |
| 79 | + "(0:44)\n\nTags: 8-year-old driver , caught on camera , child driver , pix11\n\n" |
| 80 | + "Summary: [/INST] ", |
| 81 | + ), |
| 82 | + Message( |
| 83 | + role="assistant", |
| 84 | + content="A father in Russia allowed his 8-year-old child to drive his car on an " |
| 85 | + "icy road and recorded the event. The child appeared to be handling the situation well, " |
| 86 | + "showcasing their driving skills despite the challenging conditions.", |
| 87 | + ), |
| 88 | + ] |
| 89 | + |
| 90 | + def test_format(self): |
| 91 | + no_system_sample = CHAT_SAMPLE[1:] |
| 92 | + actual = MistralChatFormat.format(no_system_sample) |
| 93 | + _assert_dialogue_equal(actual, self.expected_dialogue) |
| 94 | + |
| 95 | + def test_format_with_system_prompt_raises(self): |
| 96 | + with pytest.raises( |
| 97 | + ValueError, match="System prompts are not supported in MistralChatFormat" |
| 98 | + ): |
| 99 | + _ = MistralChatFormat.format(CHAT_SAMPLE) |
| 100 | + |
| 101 | + |
| 102 | +class TestChatMLFormat: |
| 103 | + expected_dialogue = [ |
| 104 | + Message( |
| 105 | + role="system", |
| 106 | + content="<|im_start|>system\nYou are an AI assistant. User will you give you a task. " |
| 107 | + "Your goal is to complete the task as faithfully as you can. While performing " |
| 108 | + "the task think step-by-step and justify your steps.<|im_end|>\n", |
| 109 | + ), |
| 110 | + Message( |
| 111 | + role="user", |
| 112 | + content="<|im_start|>user\nPlease " |
| 113 | + "briefly summarize this news article:\n\nAOL.com Video - Father Lets 8-Year-Old " |
| 114 | + "Drive On Icy Road\n\nDescription:Would you let your 8-year-old drive your car? " |
| 115 | + "How about on an icy road? Well one father in Russia did just that, and recorded " |
| 116 | + "the entire thing. To her credit, the child seemed to be doing a great job. " |
| 117 | + "(0:44)\n\nTags: 8-year-old driver , caught on camera , child driver , pix11\n\n" |
| 118 | + "Summary:<|im_end|>\n", |
| 119 | + ), |
| 120 | + Message( |
| 121 | + role="assistant", |
| 122 | + content="<|im_start|>assistant\nA father in Russia allowed his 8-year-old child to drive his car on an " |
| 123 | + "icy road and recorded the event. The child appeared to be handling the situation well, " |
| 124 | + "showcasing their driving skills despite the challenging conditions.<|im_end|>", |
| 125 | + ), |
| 126 | + ] |
| 127 | + |
| 128 | + def test_format(self): |
| 129 | + actual = ChatMLFormat.format(CHAT_SAMPLE) |
| 130 | + _assert_dialogue_equal(actual, self.expected_dialogue) |
0 commit comments