Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@

import re
from datasets import Dataset
from rich.console import Console
from rich.layout import Layout
from rich.panel import Panel
Comment on lines -9 to -11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we remove this in this PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused imports. But I agree this should be separate.


from src.data.ingestor import Ingestor, get_ingestor
from llmtune.data.ingestor import Ingestor, get_ingestor


class DatasetGenerator:
Expand Down
File renamed without changes.
File renamed without changes.
1 change: 0 additions & 1 deletion src/finetune/finetune.py → llmtune/finetune/generics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from abc import ABC, abstractmethod
from typing import Union, List, Tuple, Dict


class Finetune(ABC):
Expand Down
9 changes: 4 additions & 5 deletions src/finetune/lora.py → llmtune/finetune/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import bitsandbytes as bnb
from datasets import Dataset
from accelerate import Accelerator
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
Expand All @@ -23,10 +22,10 @@
from rich.console import Console


from src.pydantic_models.config_model import Config
from src.utils.save_utils import DirectoryHelper
from src.finetune.finetune import Finetune
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused imports. But I agree this should be separate.

from src.ui.rich_ui import RichUI
from llmtune.pydantic_models.config_model import Config
from llmtune.utils.save_utils import DirectoryHelper
from llmtune.finetune.generics import Finetune
from llmtune.ui.rich_ui import RichUI


class LoRAFinetune(Finetune):
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from abc import ABC, abstractmethod
from typing import Union, List, Tuple, Dict


class Inference(ABC):
Expand Down
11 changes: 4 additions & 7 deletions src/inference/lora.py → llmtune/inference/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,17 @@
import csv

from transformers import TextIteratorStreamer
from rich.console import Console
from rich.table import Table
from rich.live import Live
Comment on lines -7 to -9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same question: I would keep renaming as one change and fixing rich as another PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed

from rich.text import Text
from datasets import Dataset
from transformers import AutoTokenizer, BitsAndBytesConfig
from peft import AutoPeftModelForCausalLM
import torch


from src.pydantic_models.config_model import Config
from src.utils.save_utils import DirectoryHelper
from src.inference.inference import Inference
from src.ui.rich_ui import RichUI
from llmtune.pydantic_models.config_model import Config
from llmtune.utils.save_utils import DirectoryHelper
from llmtune.inference.generics import Inference
from llmtune.ui.rich_ui import RichUI


# TODO: Add type hints please!
Expand Down
File renamed without changes.
File renamed without changes.
23 changes: 14 additions & 9 deletions src/qa/qa.py → llmtune/qa/generics.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from abc import ABC, abstractmethod
from typing import Union, List, Tuple, Dict
import pandas as pd
from src.ui.rich_ui import RichUI
from llmtune.ui.rich_ui import RichUI
import statistics
from src.qa.qa_tests import *
from llmtune.qa.qa_tests import *
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please. do not do this. import only what you need

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • This has been resolved with Vivek
  • We restructured the design pattern to eliminate having to import here at all



class LLMQaTest(ABC):
Expand All @@ -18,6 +18,7 @@ def get_metric(
) -> Union[float, int, bool]:
pass


class QaTestRegistry:
registry = {}

Expand All @@ -27,18 +28,22 @@ def inner_wrapper(wrapped_class):
for name in names:
cls.registry[name] = wrapped_class
return wrapped_class

return inner_wrapper

@classmethod
@classmethod
def create_tests_from_list(cls, test_name: str) -> List[LLMQaTest]:
return [cls.create_test(test) for test in test_names]

class LLMTestSuite():
def __init__(self,
tests:List[LLMQaTest],
prompts:List[str],
ground_truths:List[str],
model_preds:List[str]) -> None:

class LLMTestSuite:
def __init__(
self,
tests: List[LLMQaTest],
prompts: List[str],
ground_truths: List[str],
model_preds: List[str],
) -> None:

self.tests = tests
self.prompts = prompts
Expand Down
12 changes: 10 additions & 2 deletions src/qa/qa_tests.py → llmtune/qa/qa_tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from src.qa.qa import LLMQaTest
from llmtune.qa.generics import LLMQaTest
from typing import Union, List, Tuple, Dict
import torch
from transformers import DistilBertModel, DistilBertTokenizer
Expand All @@ -8,7 +8,7 @@
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from src.qa.qa import TestRegistry
from llmtune.qa.generics import TestRegistry

model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
Expand All @@ -18,6 +18,7 @@
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")


@TestRegistry.register("summary_length")
class LengthTest(LLMQaTest):
@property
Expand All @@ -29,6 +30,7 @@ def get_metric(
) -> Union[float, int, bool]:
return abs(len(ground_truth) - len(model_prediction))


@TestRegistry.register("jaccard_similarity")
class JaccardSimilarityTest(LLMQaTest):
@property
Expand All @@ -47,6 +49,7 @@ def get_metric(
similarity = intersection_size / union_size if union_size != 0 else 0
return similarity


@TestRegistry.register("dot_product")
class DotProductSimilarityTest(LLMQaTest):
@property
Expand All @@ -69,6 +72,7 @@ def get_metric(
)
return dot_product_similarity


@TestRegistry.register("rouge_score")
class RougeScoreTest(LLMQaTest):
@property
Expand All @@ -82,6 +86,7 @@ def get_metric(
scores = scorer.score(model_prediction, ground_truth)
return float(scores["rouge1"].precision)


@TestRegistry.register("word_overlap")
class WordOverlapTest(LLMQaTest):
@property
Expand Down Expand Up @@ -116,6 +121,7 @@ def _get_pos_percent(self, text: str, pos_tags: List[str]) -> float:
total_words = len(text.split(" "))
return round(len(pos_words) / total_words, 2)


@TestRegistry.register("verb_percent")
class VerbPercent(PosCompositionTest):
@property
Expand All @@ -129,6 +135,7 @@ def get_metric(
model_prediction, ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
)


@TestRegistry.register("adjective_percent")
class AdjectivePercent(PosCompositionTest):
@property
Expand All @@ -140,6 +147,7 @@ def get_metric(
) -> float:
return self._get_pos_percent(model_prediction, ["JJ", "JJR", "JJS"])


@TestRegistry.register("noun_percent")
class NounPercent(PosCompositionTest):
@property
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions src/ui/rich_ui.py → llmtune/ui/rich_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from rich.live import Live
from rich.text import Text

from src.ui.ui import UI
from src.utils.rich_print_utils import inject_example_to_rich_layout
from llmtune.ui.generics import UI
from llmtune.utils.rich_print_utils import inject_example_to_rich_layout

console = Console()

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion src/utils/save_utils.py → llmtune/utils/save_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from sqids import Sqids

from src.pydantic_models.config_model import Config
from llmtune.pydantic_models.config_model import Config

NUM_MD5_DIGITS_FOR_SQIDS = 5 # TODO: maybe move consts to a dedicated folder

Expand Down
14 changes: 7 additions & 7 deletions toolkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
import torch
import typer

from src.pydantic_models.config_model import Config
from src.data.dataset_generator import DatasetGenerator
from src.utils.save_utils import DirectoryHelper
from src.utils.ablation_utils import generate_permutations
from src.finetune.lora import LoRAFinetune
from src.inference.lora import LoRAInference
from src.ui.rich_ui import RichUI
from llmtune.pydantic_models.config_model import Config
from llmtune.data.dataset_generator import DatasetGenerator
from llmtune.utils.save_utils import DirectoryHelper
from llmtune.utils.ablation_utils import generate_permutations
from llmtune.finetune.lora import LoRAFinetune
from llmtune.inference.lora import LoRAInference
from llmtune.ui.rich_ui import RichUI

hf_utils.logging.set_verbosity_error()
torch._logging.set_logs(all=logging.CRITICAL)
Expand Down