diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..4f1d51575a04862a685fc3f545959f310f8cbfc5
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,9 @@
+turing/reporting.py
+turing/plots.py
+turing/features.py
+turing/evaluate_model.py
+turing/data_validation.py
+
+turing/CLI_runner
+turing/modeling/train.py
+turing/tests
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ca207ff05c0348a3c30a1a278dfd6b8cdc0618a1
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,14 @@
+services:
+  api:
+    build: .
+    container_name: turing_app
+    image: turing_api
+    ports:
+      - "7860:7860"
+
+    environment:
+      - MLFLOW_TRACKING_USERNAME=${MLFLOW_USER}
+      - MLFLOW_TRACKING_PASSWORD=${MLFLOW_PWD}
+      - DAGSHUB_USER_TOKEN=${DAGSHUB_TOKEN}
+
+    command: uvicorn turing.api.app:app --host 0.0.0.0 --port 7860 --reload
\ No newline at end of file
diff --git a/dockerfile b/dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..42dd62de776aa03b1c0310c0423270758894307e
--- /dev/null
+++ b/dockerfile
@@ -0,0 +1,31 @@
+FROM python:3.12
+
+# Create a non-root user to run the application and set permissions
+RUN useradd -m -u 1000 turinguser
+RUN mkdir -p /app/models && chown -R turinguser:turinguser /app /app/models
+USER turinguser
+
+# Set environment variables 
+# PATH to include local user binaries and project root
+ENV PATH="/home/turinguser/.local/bin:$PATH"
+ENV PROJ_ROOT=/app
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy essential files to install dependencies
+COPY --chown=turinguser requirements.txt .
+
+# Install Python dependencies
+RUN pip install --default-timeout=1000 --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+RUN pip3 install -v -r requirements.txt --upgrade --default-timeout=1000 --no-cache-dir --break-system-packages
+
+# Copy remaining project files
+COPY --chown=turinguser turing ./turing
+COPY --chown=turinguser reports ./reports
+
+# Expose port 7860 for the FastAPI application
+EXPOSE 7860
+
+# Default command to run the FastAPI application on port 7860
+CMD ["uvicorn", "turing.api.app:app", "--host", "0.0.0.0", "--port", "7860"]
\ No newline at end of file
diff --git a/reports/.gitkeep b/reports/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/reports/feedback/feedback_data.csv b/reports/feedback/feedback_data.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c77afd3d1f0fb53b55bd2cb285f2ce199583eddd
--- /dev/null
+++ b/reports/feedback/feedback_data.csv
@@ -0,0 +1,3 @@
+Timestamp,Input_Text,Language,Model_Prediction,User_Correction
+2025-12-11 22:41:05,# Create output directory,python,Usage,DevelopmentNotes
+2025-12-11 23:05:24,# Entry point for running the API directly with python,python,Usage,DevelopmentNotes
diff --git a/reports/figures/.gitkeep b/reports/figures/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/reports/figures/logo_header.svg b/reports/figures/logo_header.svg
new file mode 100644
index 0000000000000000000000000000000000000000..fde0102644902834fbc91f670843f2180619562b
--- /dev/null
+++ b/reports/figures/logo_header.svg
@@ -0,0 +1,38 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="800" height="300" viewBox="0 0 800 300" role="img" aria-label="Logo CCC Header">
+  <defs>
+    <linearGradient id="g1" x1="0" x2="1">
+      <stop offset="0" stop-color="#6EE7B7"/>
+      <stop offset="1" stop-color="#3B82F6"/>
+    </linearGradient>
+    <linearGradient id="g2" x1="0" x2="1">
+      <stop offset="0" stop-color="#FDE68A"/>
+      <stop offset="1" stop-color="#FB7185"/>
+    </linearGradient>
+    <filter id="shadow" x="-50%" y="-50%" width="200%" height="200%">
+      <feDropShadow dx="0" dy="6" stdDeviation="10" flood-color="#000" flood-opacity="0.18"/>
+    </filter>
+  </defs>
+
+  <rect width="100%" height="100%" fill="none"/>
+
+  <!-- left C -->
+  <text x="200" y="140" font-family="Inter, Roboto, Arial, sans-serif"
+        font-weight="800" font-size="150" fill="url(#g1)" opacity="0.95"
+        transform="rotate(-6 200 140)" filter="url(#shadow)">
+    C
+  </text>
+
+  <!-- middle C -->
+  <text x="330" y="190" font-family="Inter, Roboto, Arial, sans-serif"
+        font-weight="900" font-size="180" fill="url(#g2)" opacity="0.92"
+        transform="rotate(5 330 190)" filter="url(#shadow)">
+    C
+  </text>
+
+  <!-- right C -->
+  <text x="470" y="140" font-family="Inter, Roboto, Arial, sans-serif"
+        font-weight="800" font-size="150" fill="#8B5CF6" opacity="0.95"
+        transform="rotate(-6 470 140)" filter="url(#shadow)">
+    C
+  </text>
+</svg>
diff --git a/reports/unit_and_behavioral_tests/report.md b/reports/unit_and_behavioral_tests/report.md
new file mode 100644
index 0000000000000000000000000000000000000000..fdb1a1f776bc054e2cfb61b53b1e172e358fefcf
--- /dev/null
+++ b/reports/unit_and_behavioral_tests/report.md
@@ -0,0 +1,108 @@
+
+# Test Execution Report
+
+
+### Environment
+
+
+```text
+     Parameter                     Value
+     Timestamp       2025-11-27 15:44:47
+       Context                    turing
+Python Version                   3.12.12
+      Platform Windows-11-10.0.26100-SP0
+```
+
+
+### Executive Summary
+
+
+```text
+ Total  Passed  Failed Success Rate
+    66      35      31        53.0%
+```
+
+
+Detailed Breakdown:
+
+
+### BEHAVIORAL Tests
+
+
+```text
+                       Module                                          Test Case     Result  Time                                                      Message
+          test_directional.py              test_java_directional_add_deprecation [ FAILED ] 0.30s turing\tests\behavioral\test_directional.py:16: Assertion...
+          test_directional.py                test_python_directional_remove_todo [ FAILED ] 0.15s turing\tests\behavioral\test_directional.py:31: Assertion...
+          test_directional.py          test_pharo_directional_add_responsibility [ FAILED ] 0.13s turing\tests\behavioral\test_directional.py:49: Assertion...
+          test_directional.py            test_java_directional_contrast_rational [ FAILED ] 0.12s turing\tests\behavioral\test_directional.py:70: Assertion...
+          test_directional.py              test_python_directional_contrast_todo [ FAILED ] 0.12s turing\tests\behavioral\test_directional.py:87: Assertion...
+          test_directional.py      test_pharo_directional_contrast_collaborators [ FAILED ] 0.13s turing\tests\behavioral\test_directional.py:112: Assertio...
+          test_directional.py      test_java_directional_shift_summary_to_expand [ FAILED ] 0.12s turing\tests\behavioral\test_directional.py:132: Assertio...
+          test_directional.py  test_python_directional_shift_summary_to_devnotes [ FAILED ] 0.12s turing\tests\behavioral\test_directional.py:152: Assertio...
+          test_directional.py            test_pharo_directional_shift_to_example [ FAILED ] 0.12s turing\tests\behavioral\test_directional.py:173: Assertio...
+           test_invariance.py test_python_invariance_parameters[:param user_i... [ FAILED ] 0.22s turing\tests\behavioral\test_invariance.py:15: AssertionE...
+           test_invariance.py test_python_invariance_parameters[:PARAM USER_I... [ FAILED ] 0.07s turing\tests\behavioral\test_invariance.py:15: AssertionE...
+           test_invariance.py test_python_invariance_parameters[  :param user... [ FAILED ] 0.06s turing\tests\behavioral\test_invariance.py:15: AssertionE...
+           test_invariance.py test_python_invariance_parameters[:param user_i... [ FAILED ] 0.06s turing\tests\behavioral\test_invariance.py:15: AssertionE...
+           test_invariance.py                   test_java_invariance_deprecation [ FAILED ] 0.13s turing\tests\behavioral\test_invariance.py:26: AssertionE...
+           test_invariance.py                     test_python_invariance_summary [ FAILED ] 0.13s turing\tests\behavioral\test_invariance.py:45: AssertionE...
+           test_invariance.py                       test_pharo_invariance_intent [ FAILED ] 0.13s turing\tests\behavioral\test_invariance.py:64: AssertionE...
+           test_invariance.py            test_python_invariance_typos_parameters [ FAILED ] 0.07s turing\tests\behavioral\test_invariance.py:85: AssertionE...
+           test_invariance.py              test_java_invariance_semantic_summary   [ PASS ] 0.32s                                                             
+test_minimum_functionality.py test_java_mft[test getfilestatus and related li...   [ PASS ] 0.06s                                                             
+test_minimum_functionality.py test_java_mft[/* @deprecated Use something else... [ FAILED ] 0.06s turing\tests\behavioral\test_minimum_functionality.py:17:...
+test_minimum_functionality.py test_java_mft[code source of this file http gre... [ FAILED ] 0.06s turing\tests\behavioral\test_minimum_functionality.py:17:...
+test_minimum_functionality.py test_java_mft[this is balanced if each pool is ... [ FAILED ] 0.06s turing\tests\behavioral\test_minimum_functionality.py:17:...
+test_minimum_functionality.py test_java_mft[// For internal use only.-expecte... [ FAILED ] 0.06s turing\tests\behavioral\test_minimum_functionality.py:17:...
+test_minimum_functionality.py test_java_mft[this impl delegates to the old fi... [ FAILED ] 0.07s turing\tests\behavioral\test_minimum_functionality.py:17:...
+test_minimum_functionality.py test_java_mft[/** Usage: new MyClass(arg1). */-... [ FAILED ] 0.07s turing\tests\behavioral\test_minimum_functionality.py:17:...
+test_minimum_functionality.py test_python_mft[a service specific account of t...   [ PASS ] 0.06s                                                             
+test_minimum_functionality.py test_python_mft[:param user_id: The ID of the u... [ FAILED ] 0.07s turing\tests\behavioral\test_minimum_functionality.py:29:...
+test_minimum_functionality.py test_python_mft[# TODO: Refactor this entire bl... [ FAILED ] 0.07s turing\tests\behavioral\test_minimum_functionality.py:29:...
+test_minimum_functionality.py test_python_mft[use this class if you want acce...   [ PASS ] 0.06s                                                             
+test_minimum_functionality.py test_python_mft[# create a new list by filterin... [ FAILED ] 0.08s turing\tests\behavioral\test_minimum_functionality.py:29:...
+test_minimum_functionality.py test_pharo_mft[i am a simple arrow like arrowhe...   [ PASS ] 0.07s                                                             
+test_minimum_functionality.py test_pharo_mft[the example below shows how to c...   [ PASS ] 0.07s                                                             
+test_minimum_functionality.py test_pharo_mft[i provide a data structure indep... [ FAILED ] 0.06s turing\tests\behavioral\test_minimum_functionality.py:43:...
+test_minimum_functionality.py test_pharo_mft[the cache is cleared after each ... [ FAILED ] 0.07s turing\tests\behavioral\test_minimum_functionality.py:43:...
+test_minimum_functionality.py test_pharo_mft[it is possible hovewer to custom...   [ PASS ] 0.07s                                                             
+test_minimum_functionality.py test_pharo_mft[collaborators: BlElement, BlSpac... [ FAILED ] 0.07s turing\tests\behavioral\test_minimum_functionality.py:43:...
+```
+
+
+### UNIT Tests
+
+
+```text
+          Module                                          Test Case     Result  Time                                              Message
+  test_config.py             test_proj_root_is_correctly_identified   [ PASS ] 0.00s                                                     
+  test_config.py      test_directory_paths_are_correctly_structured   [ PASS ] 0.00s                                                     
+  test_config.py                   test_dataset_constants_are_valid   [ PASS ] 0.00s                                                     
+  test_config.py   test_labels_map_and_total_categories_are_correct   [ PASS ] 0.00s                                                     
+  test_config.py               test_numeric_parameters_are_positive   [ PASS ] 0.00s                                                     
+  test_config.py          test_load_dotenv_is_called_on_module_load   [ PASS ] 0.00s                                                     
+ test_dataset.py              test_initialization_paths_are_correct [ FAILED ] 0.00s turing\tests\unit\test_dataset.py:24: AssertionError
+ test_dataset.py test_format_labels_for_csv[input_labels0-[1, 0,...   [ PASS ] 0.00s                                                     
+ test_dataset.py    test_format_labels_for_csv[[1, 0, 1]-[1, 0, 1]]   [ PASS ] 0.00s                                                     
+ test_dataset.py       test_format_labels_for_csv[input_labels2-[]]   [ PASS ] 0.00s                                                     
+ test_dataset.py              test_format_labels_for_csv[None-None]   [ PASS ] 0.00s                                                     
+ test_dataset.py             test_get_dataset_raises_file_not_found   [ PASS ] 0.00s                                                     
+ test_dataset.py         test_get_dataset_success_and_label_parsing   [ PASS ] 0.48s                                                     
+test_features.py                          test_config_id_generation   [ PASS ] 0.00s                                                     
+test_features.py                             test_config_attributes   [ PASS ] 0.00s                                                     
+test_features.py                              test_clean_text_basic   [ PASS ] 0.00s                                                     
+test_features.py                          test_clean_text_stopwords   [ PASS ] 2.39s                                                     
+test_features.py                      test_clean_text_lemmatization   [ PASS ] 0.00s                                                     
+test_features.py                       test_clean_text_handles_none   [ PASS ] 0.00s                                                     
+test_features.py                      test_extract_numeric_features   [ PASS ] 0.00s                                                     
+   test_model.py       test_model_initialization[randomForestTfIdf]   [ PASS ] 0.00s                                                     
+   test_model.py               test_model_initialization[codeBerta]   [ PASS ] 0.00s                                                     
+   test_model.py                test_model_setup[randomForestTfIdf]   [ PASS ] 0.00s                                                     
+   test_model.py                        test_model_setup[codeBerta]   [ PASS ] 1.39s                                                     
+   test_model.py                test_model_train[randomForestTfIdf]   [ PASS ] 3.06s                                                     
+   test_model.py                        test_model_train[codeBerta]   [ PASS ] 4.90s                                                     
+   test_model.py             test_model_evaluate[randomForestTfIdf]   [ PASS ] 1.39s                                                     
+   test_model.py                     test_model_evaluate[codeBerta] [ FAILED ] 6.36s  turing\tests\unit\test_model.py:101: AssertionError
+   test_model.py              test_model_predict[randomForestTfIdf]   [ PASS ] 1.36s                                                     
+   test_model.py                      test_model_predict[codeBerta]   [ PASS ] 5.26s                                                     
+```
diff --git a/reports/unit_tests/report.md b/reports/unit_tests/report.md
new file mode 100644
index 0000000000000000000000000000000000000000..9ebe350e94f19f83541c3a75f87c163a6baf5b3d
--- /dev/null
+++ b/reports/unit_tests/report.md
@@ -0,0 +1,122 @@
+
+# Turing Test Execution Report
+
+
+
+---
+
+
+
+## Environment Information
+
+
+| Parameter      | Value                      |
+|:---------------|:---------------------------|
+| Timestamp      | 2025-12-04 18:14:18        |
+| Context        | TURING                     |
+| Python Version | 3.12.12                    |
+| Platform       | macOS-15.6-arm64-arm-64bit |
+| Architecture   | arm64                      |
+
+
+---
+
+
+## Executive Summary
+
+
+**Overall Status:** MOSTLY PASSED
+
+
+**Success Rate:** 91.2%
+
+
+| Metric       |   Count |
+|:-------------|--------:|
+| Total Tests  |      34 |
+| Passed       |      31 |
+| Failed       |       3 |
+| Success Rate |   91.2% |
+
+
+**Visual Progress:**
+
+
+```
+Progress: [█████████████████████████████████████████████░░░░░] 91.2%
+Passed: 31/34 tests
+```
+
+
+---
+
+
+## UNIT Tests
+
+
+### Statistics
+
+
+| Status   |      Count |
+|:---------|-----------:|
+| Total    |         34 |
+| Passed   | 31 (91.2%) |
+| Failed   |   3 (8.8%) |
+
+
+### Test Results
+
+
+| Module          | Test Case                                          | Result   | Time   | Message                                              |
+|:----------------|:---------------------------------------------------|:---------|:-------|:-----------------------------------------------------|
+| test_api.py     | test_health_check_returns_ok                       | PASS     | 0.01s  |                                                      |
+| test_api.py     | test_predict_success_java                          | PASS     | 0.02s  |                                                      |
+| test_api.py     | test_predict_success_python                        | PASS     | 0.00s  |                                                      |
+| test_api.py     | test_predict_success_pharo                         | PASS     | 0.00s  |                                                      |
+| test_api.py     | test_predict_missing_texts                         | PASS     | 0.00s  |                                                      |
+| test_api.py     | test_predict_missing_language                      | PASS     | 0.00s  |                                                      |
+| test_api.py     | test_predict_empty_texts                           | PASS     | 0.00s  |                                                      |
+| test_api.py     | test_predict_error_handling                        | PASS     | 0.00s  |                                                      |
+| test_api.py     | test_predict_invalid_language                      | PASS     | 0.00s  |                                                      |
+| test_api.py     | test_prediction_request_valid                      | PASS     | 0.00s  |                                                      |
+| test_api.py     | test_prediction_response_valid                     | PASS     | 0.00s  |                                                      |
+| test_config.py  | test_proj_root_is_correctly_identified             | PASS     | 0.00s  |                                                      |
+| test_config.py  | test_directory_paths_are_correctly_structured      | PASS     | 0.00s  |                                                      |
+| test_config.py  | test_dataset_constants_are_valid                   | PASS     | 0.00s  |                                                      |
+| test_config.py  | test_labels_map_and_total_categories_are_correct   | PASS     | 0.00s  |                                                      |
+| test_config.py  | test_numeric_parameters_are_positive               | PASS     | 0.00s  |                                                      |
+| test_config.py  | test_load_dotenv_is_called_on_module_load          | PASS     | 0.00s  |                                                      |
+| test_dataset.py | test_initialization_paths_are_correct              | FAIL     | 0.00s  | turing/tests/unit/test_dataset.py:25: AssertionError |
+| test_dataset.py | test_format_labels_for_csv[input_labels0-[1, 0,... | PASS     | 0.00s  |                                                      |
+| test_dataset.py | test_format_labels_for_csv[[1, 0, 1]-[1, 0, 1]]    | PASS     | 0.00s  |                                                      |
+| test_dataset.py | test_format_labels_for_csv[input_labels2-[]]       | PASS     | 0.00s  |                                                      |
+| test_dataset.py | test_format_labels_for_csv[None-None]              | PASS     | 0.00s  |                                                      |
+| test_dataset.py | test_get_dataset_raises_file_not_found             | PASS     | 0.00s  |                                                      |
+| test_dataset.py | test_get_dataset_success_and_label_parsing         | FAIL     | 0.00s  | turing/dataset.py:128: FileNotFoundError             |
+| test_model.py   | test_model_initialization[randomForestTfIdf]       | PASS     | 0.00s  |                                                      |
+| test_model.py   | test_model_initialization[codeBerta]               | PASS     | 0.00s  |                                                      |
+| test_model.py   | test_model_setup[randomForestTfIdf]                | PASS     | 0.00s  |                                                      |
+| test_model.py   | test_model_setup[codeBerta]                        | PASS     | 0.93s  |                                                      |
+| test_model.py   | test_model_train[randomForestTfIdf]                | PASS     | 2.66s  |                                                      |
+| test_model.py   | test_model_train[codeBerta]                        | PASS     | 7.22s  |                                                      |
+| test_model.py   | test_model_evaluate[randomForestTfIdf]             | PASS     | 1.31s  |                                                      |
+| test_model.py   | test_model_evaluate[codeBerta]                     | FAIL     | 8.83s  | turing/tests/unit/test_model.py:101: AssertionError  |
+| test_model.py   | test_model_predict[randomForestTfIdf]              | PASS     | 1.21s  |                                                      |
+| test_model.py   | test_model_predict[codeBerta]                      | PASS     | 5.98s  |                                                      |
+
+
+---
+
+
+> **ERROR**: 3 test(s) failed. Please review the error messages above.
+
+
+
+---
+
+
+
+*Report generated on 2025-12-04 at 18:14:18*
+
+
+*Powered by Turing Test Suite*
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f024c882d66251a4a53553b2fedcf33a587a77c4
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,13 @@
+fastapi
+uvicorn[standard]
+loguru
+pydantic
+python-dotenv
+mlflow
+numpy
+transformers
+dagshub
+datasets
+accelerate
+scikit-learn
+gradio
\ No newline at end of file
diff --git a/turing/CLI_runner/run_dataset.py b/turing/CLI_runner/run_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b02c26e3acbdfa3f035ab2c1883108feac2c70cf
--- /dev/null
+++ b/turing/CLI_runner/run_dataset.py
@@ -0,0 +1,105 @@
+import os
+from pathlib import Path
+import sys
+
+from loguru import logger
+import typer
+from typing_extensions import Annotated
+
+try:
+    from turing.config import INTERIM_DATA_DIR, RAW_DATA_DIR
+    from turing.dataset import DatasetManager
+except ImportError:
+    logger.error("Error: Could not import DatasetManager. Check sys.path configuration.")
+    logger.error(f"Current sys.path: {sys.path}")
+    sys.exit(1)
+
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+proj_root = os.path.dirname(os.path.dirname(script_dir))
+sys.path.append(proj_root)
+
+app = typer.Typer(help="CLI for dataset management (Download, Conversion, and Search).")
+
+
+@app.command()
+def download():
+    """
+    Loads the dataset from Hugging Face and saves it into the "raw" folder.
+    """
+    logger.info("Starting dataset download...")
+    manager = DatasetManager()
+    manager.download_dataset()
+    logger.success("Download complete.")
+
+
+@app.command(name="parquet-to-csv")
+def parquet_to_csv():
+    """
+    Converts all parquet files in the raw data directory
+    to CSV format in the interim data directory.
+    """
+    logger.info("Starting Parquet -> CSV conversion...")
+    manager = DatasetManager()
+    manager.parquet_to_csv()
+    logger.success("Conversion complete.")
+
+
+@app.command()
+def search(
+    filename: Annotated[
+        str, typer.Argument(help="The exact filename to search for (e.g., 'java_train.parquet')")
+    ],
+    directory: Annotated[
+        str,
+        typer.Option(
+            "--directory",
+            "-d",
+            help="Directory to search in. Keywords 'raw' or 'interim' can be used.",
+        ),
+    ] = "raw",
+):
+    """
+    Searches for a file by name in the data directories.
+    """
+    logger.info(f"Initializing search for '{filename}'...")
+    manager = DatasetManager()
+
+    search_path = None
+    if directory.lower() == "raw":
+        search_path = RAW_DATA_DIR
+        logger.info("Searching in 'raw' data directory.")
+    elif directory.lower() == "interim":
+        search_path = INTERIM_DATA_DIR
+        logger.info("Searching in 'interim' data directory.")
+    else:
+        search_path = Path(directory)
+        logger.info(f"Searching in custom path: {search_path}")
+
+    results = manager.search_file(filename, search_directory=search_path)
+
+    if results:
+        logger.success(f"Found {len(results)} file(s):")
+        for res in results:
+            print(f"-> {res}")
+    else:
+        logger.warning(f"File '{filename}' not found in {search_path}.")
+
+
+@app.command(name="show-raw-hf")
+def show_raw_hf():
+    """
+    Loads and displays info about the raw dataset from Hugging Face.
+    """
+    logger.info("Loading raw dataset info from Hugging Face...")
+    manager = DatasetManager()
+    dataset = manager.get_raw_dataset_from_hf()
+    if dataset:
+        logger.info("Dataset info:")
+        print(dataset)
+    else:
+        logger.error("Could not retrieve dataset.")
+
+
+if __name__ == "__main__":
+    app()
diff --git a/turing/CLI_runner/run_prediction.py b/turing/CLI_runner/run_prediction.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6f104daa4c0aae5a493fd4d5fc3f6a412b94cc3
--- /dev/null
+++ b/turing/CLI_runner/run_prediction.py
@@ -0,0 +1,57 @@
+from pathlib import Path
+import sys
+
+from loguru import logger
+import typer
+
+from turing.modeling.models.randomForestTfIdf import RandomForestTfIdf
+from turing.modeling.predict import ModelInference
+
+# Add project root to sys.path
+current_dir = Path(__file__).resolve().parent
+project_root = current_dir.parent
+if str(project_root) not in sys.path:
+    sys.path.append(str(project_root))
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    mlflow_run_id: str = typer.Option(
+        "af1fa5959dc14fa9a29a0a19c11f1b08", help="The MLflow Run ID"
+    ),
+    artifact_name: str = typer.Option(
+        "RandomForestTfIdf_java", help="The name of the model artifact"
+    ),
+    language: str = typer.Option("java", help="The target programming language"),
+):
+    """
+    Run inference using the dataset stored on disk (Standard CML/DVC workflow).
+    """
+    logger.info("Starting CLI inference process...")
+
+    try:
+        # Initialize inference engine
+        inference_engine = ModelInference()
+
+        # Run prediction on the test dataset
+        results = inference_engine.predict_from_mlflow(
+            mlflow_run_id=mlflow_run_id,
+            artifact_name=artifact_name,
+            language=language,
+            model_class=RandomForestTfIdf,
+        )
+
+        # Output results
+        print("\n--- Prediction Results ---")
+        print(results)
+        print("--------------------------")
+
+    except Exception as e:
+        logger.error(f"CLI Prediction failed: {e}")
+        raise typer.Exit(code=1)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/turing/__init__.py b/turing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..18ae806ff73753d6560266c7fb68c2bd51971a7b
--- /dev/null
+++ b/turing/__init__.py
@@ -0,0 +1 @@
+from turing import config  # noqa: F401
diff --git a/turing/__pycache__/__init__.cpython-312.pyc b/turing/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b29672cc77222a9cc6febdd7fbd5f3fa8bed3541
Binary files /dev/null and b/turing/__pycache__/__init__.cpython-312.pyc differ
diff --git a/turing/__pycache__/config.cpython-312.pyc b/turing/__pycache__/config.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54a78bb57637c5c4af860f7cba23a1187954768d
Binary files /dev/null and b/turing/__pycache__/config.cpython-312.pyc differ
diff --git a/turing/__pycache__/dataset.cpython-312.pyc b/turing/__pycache__/dataset.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..67213b96474be9aadf50425b2a70e088497b677e
Binary files /dev/null and b/turing/__pycache__/dataset.cpython-312.pyc differ
diff --git a/turing/__pycache__/evaluate_model.cpython-312.pyc b/turing/__pycache__/evaluate_model.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..683e5fae32d19b99f5035f0716ffa3f44b40e6c4
Binary files /dev/null and b/turing/__pycache__/evaluate_model.cpython-312.pyc differ
diff --git a/turing/api/__init__.py b/turing/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/turing/api/app.py b/turing/api/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb0b3dbd841736ee37c70e299763510aada87342
--- /dev/null
+++ b/turing/api/app.py
@@ -0,0 +1,115 @@
+import base64
+import os
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse
+import gradio as gr
+from loguru import logger
+
+from turing.api.demo import create_demo
+from turing.api.schemas import PredictionRequest, PredictionResponse
+from turing.modeling.predict import ModelInference
+
+
+def get_logo_b64_src(filename="logo_header.svg"):
+    """read SVG and convert it into a string Base64 for HTML."""
+    try:
+        base_path = os.path.dirname(os.path.abspath(__file__))
+        target_path = os.path.join(base_path, "..", "..", "reports", "figures", filename)
+        target_path = os.path.normpath(target_path)
+        
+        with open(target_path, "rb") as f:
+            encoded = base64.b64encode(f.read()).decode("utf-8")
+        return f"data:image/svg+xml;base64,{encoded}"
+    except Exception as e:
+        print(f"Unable to load logo for API: {e}")
+        return "" 
+
+
+# load logo
+logo_src = get_logo_b64_src()
+
+# html
+logo_html_big = f"""
+<a href="/gradio">
+    <img src="{logo_src}" width="150" style="display: block; margin: 10px 0;">
+</a>
+"""
+
+# description
+description_md = f"""
+API for classifying code comments.
+
+You can interact with the model directly using the visual interface. 
+Click the logo below to open it:
+
+{logo_html_big}
+
+"""
+
+app = FastAPI(
+    title="Turing Team Code Classification API",
+    description=description_md,
+    version="1.0.0"
+)
+
+@app.get("/manifest.json")
+def get_manifest():
+    return JSONResponse(content={
+        "name": "Turing App",
+        "short_name": "Turing",
+        "start_url": "/gradio",
+        "display": "standalone",
+        "background_color": "#ffffff",
+        "theme_color": "#000000",
+        "icons": []
+    })
+
+# Global inference engine instance
+inference_engine = ModelInference()
+
+demo = create_demo(inference_engine)
+app = gr.mount_gradio_app(app, demo, path="/gradio")
+
+@app.get("/")
+def health_check():
+    """
+    Root endpoint to verify API status.
+    """
+    return {"status": "ok", "message": "Turing Code Classification API is ready.", "ui_url": "/gradio"}
+
+
+@app.post("/predict", response_model=PredictionResponse)
+def predict(request: PredictionRequest):
+    """
+    Endpoint to classify a list of code comments.
+    Dynamically loads the model from MLflow based on the request parameters.
+    """
+    try:
+        logger.info(f"Received prediction request for language: {request.language}")
+
+        # Perform prediction using the inference engine
+        raw, predictions, run_id, artifact = inference_engine.predict_payload(
+            texts=request.texts, language=request.language
+        )
+
+        # Ensure predictions are serializable (convert numpy arrays to lists)
+        if hasattr(predictions, "tolist"):
+            predictions = predictions.tolist()
+
+        return PredictionResponse(
+            predictions=raw.tolist(),
+            labels=predictions,
+            model_info={"artifact": artifact, "language": request.language},
+        )
+
+    except Exception as e:
+        logger.error(f"Prediction failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# Entry point for running the API directly with python
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="127.0.0.1", port=7860)
diff --git a/turing/api/demo.py b/turing/api/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b9e0a57c23895dd4bbf649df7fb9ec44fdfb3bf
--- /dev/null
+++ b/turing/api/demo.py
@@ -0,0 +1,302 @@
+import csv
+from datetime import datetime
+import os
+
+import gradio as gr
+
+# ---IMPORTS ---
+try:
+    from turing.modeling.models.codeBerta import CodeBERTa
+    from turing.modeling.predict import ModelInference
+except ImportError as e:
+    print(f"WARNING: Error importing real modules: {e}")
+    class CodeBERTa: 
+        pass
+    class ModelInference: 
+        pass
+
+# --- CONFIGURATION ---
+FEEDBACK_FILE = "reports/feedback/feedback_data.csv"
+
+LABELS_MAP = {
+    "java": ["summary", "Ownership", "Expand", "usage", "Pointer", "deprecation", "rational"],
+    "python": ["Usage", "Parameters", "DevelopmentNotes", "Expand", "Summary"],
+    "pharo": ["Keyimplementationpoints", "Example", "Responsibilities", "Intent", "Keymessages", "Collaborators"],
+}
+
+# --- CSS ---
+CSS = """
+:root {
+    --bg-primary: #fafaf9; --bg-secondary: #ffffff; --border-color: #e5e7eb;
+    --text-primary: #1f2937; --text-secondary: #6b7280; --accent-bg: #f3f4f6;
+    --primary-btn: #ea580c; --primary-btn-hover: #c2410c;
+}
+.dark, body.dark, .gradio-container.dark {
+    --bg-primary: #0f172a; --bg-secondary: #1e293b; --border-color: #374151;
+    --text-primary: #f3f4f6; --text-secondary: #9ca3af; --accent-bg: #334155;
+}
+body, .gradio-container {
+    background-color: var(--bg-primary) !important; color: var(--text-primary) !important;
+    font-family: 'Segoe UI', system-ui, sans-serif; transition: background 0.3s, color 0.3s;
+}
+.compact-header {
+    display: flex; align-items: center; justify-content: space-between; padding: 1.5rem 2rem;
+    border-bottom: 1px solid var(--border-color); margin-bottom: 2rem;
+    background-color: var(--bg-secondary); flex-wrap: wrap; gap: 1rem; border-radius: 0 0 12px 12px;
+}
+.input-card, .output-card {
+    background-color: var(--bg-secondary); border: 1px solid var(--border-color);
+    border-radius: 12px; padding: 1.5rem; margin-bottom: 1rem; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1);
+}
+.header-left { display: flex; align-items: center; gap: 1.5rem; }
+.logo-icon {
+    height: 55px; width: auto; padding: 0; background-color: transparent;
+    border: none; box-shadow: none; display: flex; align-items: center; justify-content: center; flex-shrink: 0;
+}
+.logo-icon svg { height: 100%; width: auto; fill: var(--primary-btn); }
+.title-group { display: flex; flex-direction: column; }
+.main-title { font-size: 1.6rem; font-weight: 800; margin: 0; line-height: 1.1; color: var(--text-primary); letter-spacing: -0.5px; }
+.subtitle { font-size: 0.95rem; color: var(--text-secondary); margin: 0; font-weight: 400; }
+.section-title { font-weight: 600; color: var(--text-primary); margin-bottom: 1rem; }
+.header-right { flex: 1; display: flex; justify-content: flex-end; align-items: center; min-width: 250px; }
+.dev-note-container {
+    background-color: var(--accent-bg); border: 1px solid var(--border-color); border-radius: 16px;
+    width: 520px; height: 64px; display: flex; align-items: center; justify-content: flex-start; padding: 0 24px; gap: 1rem;
+}
+.dev-note-container:hover { border-color: var(--primary-btn); }
+.dev-icon { font-size: 1.4rem; background: transparent !important; border: none !important; display: flex; align-items: center; flex-shrink: 0; }
+.dev-text {
+    font-family: 'Courier New', monospace; font-size: 0.95rem; color: var(--text-secondary);
+    transition: opacity 1.5s ease; white-space: normal; line-height: 1.2; text-align: left;
+    display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden;
+}
+.dev-text.hidden { opacity: 0; }
+.feedback-section { margin-top: 2rem; padding-top: 1.5rem; border-top: 1px dashed var(--border-color); }
+.feedback-title { font-size: 0.8rem; font-weight: 700; color: var(--text-secondary); text-transform: uppercase; margin-bottom: 0.8rem; }
+.gr-button-primary { background: var(--primary-btn) !important; border: none !important; color: white !important; }
+.gr-button-primary:hover { background: var(--primary-btn-hover) !important; }
+.gr-button-secondary { background: var(--bg-primary) !important; border: 1px solid var(--border-color) !important; color: var(--text-primary) !important; }
+.gr-box, .gr-input, .gr-dropdown { background: var(--bg-primary) !important; border-color: var(--border-color) !important; }
+#result-box textarea {
+    font-size: 1.25rem; font-weight: 700; text-align: center; color: var(--primary-btn);
+    background-color: transparent; border: none; overflow: hidden !important; resize: none; white-space: normal; line-height: 1.4;
+}
+"""
+
+# --- JAVASCRIPT ---
+JS_LOADER = """
+() => {
+    const notes = [
+        "Yes, even Pharo. Don’t ask why.", 
+        "Is ‘deprecated’ significant? Asking for a friend.",
+        "Technical debt is just future-me's problem.",
+        "Comment first, code later. Obviously.",
+        "If it works, don't touch it.",
+        "Fixing bugs created by previous-me.",
+        "Legacy code: don't breathe on it.",
+        "Documentation is a love letter to your future self.",
+        "It works on my machine!",
+        "404: Motivation not found.",
+        "Compiling... please hold."
+    ];
+    let idx = 0;
+    function rotateNotes() {
+        const textEl = document.getElementById('dev-note-text');
+        if (!textEl) { setTimeout(rotateNotes, 500); return; }
+        textEl.classList.add('hidden');
+        setTimeout(() => {
+            idx = (idx + 1) % notes.length;
+            textEl.innerText = notes[idx];
+            textEl.classList.remove('hidden');
+        }, 1500);
+    }
+    setInterval(rotateNotes, 10000);
+}
+"""
+
+# --- UTILITIES ---
+def load_svg_content(filename="logo_header.svg"):
+    base_path = os.path.dirname(os.path.abspath(__file__)) 
+    target_path = os.path.join(base_path, "..", "..", "reports", "figures", filename)
+    target_path = os.path.normpath(target_path)
+    
+    if os.path.exists(target_path):
+        with open(target_path, "r", encoding="utf-8") as f:
+            return f.read()
+    else:
+        print(f"[WARNING] Logo not found in: {target_path}")
+        return "<span style='color: var(--primary-btn); font-weight:bold;'>CCC</span>"
+
+def save_feedback_to_csv(text, language, predicted, suggested):
+    if not text: 
+        return "No data."
+    try:
+        os.makedirs(os.path.dirname(FEEDBACK_FILE), exist_ok=True)
+        file_exists = os.path.isfile(FEEDBACK_FILE)
+        with open(FEEDBACK_FILE, mode='a', newline='', encoding='utf-8') as f:
+            writer = csv.writer(f)
+            if not file_exists:
+                writer.writerow(["Timestamp", "Input_Text", "Language", "Model_Prediction", "User_Correction"])
+            
+            pred_label = predicted
+            if isinstance(predicted, dict):
+                pred_label = max(predicted, key=predicted.get) if predicted else "Unknown"
+
+            writer.writerow([
+                datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                text.strip(),
+                language,
+                pred_label,
+                suggested
+            ])
+        return "Feedback saved successfully!"
+    except Exception as e:
+        return f"Error saving feedback: {str(e)}"
+
+# --- SYNTAX VALIDATION LOGIC ---
+def is_valid_syntax(text: str, language: str) -> bool:
+    """
+    Validates if the text follows the basic comment syntax for the given language.
+    """
+    text = text.strip()
+    if not text:
+        return False
+
+    if language == "java":
+        # Supports: // comment OR /* comment */
+        return text.startswith("//") or (text.startswith("/*") and text.endswith("*/"))
+    
+    elif language == "python":
+        # Supports: # comment OR """ docstring """ OR ''' docstring '''
+        return text.startswith("#") or \
+               (text.startswith('"""') and text.endswith('"""')) or \
+               (text.startswith("'''") and text.endswith("'''"))
+    
+    elif language == "pharo":
+        # Supports: " comment "
+        return text.startswith('"') and text.endswith('"')
+    
+    return True
+
+# --- MAIN DEMO ---
+def create_demo(inference_engine: ModelInference):
+    
+    def classify_comment(text: str, language: str):
+        """
+        Calls the inference engine only if syntax is valid.
+        """
+        if not text: 
+            return None
+        
+        # SYNTAX CHECK
+        if not is_valid_syntax(text, language):
+            error_msg = "Error: Invalid Syntax."
+            if language == "java":
+                error_msg += " Java comments must start with '//' or be enclosed in '/* ... */'."
+            elif language == "python":
+                error_msg += " Python comments must start with '#' or use docstrings ('\"\"\"' / \"'''\")."
+            elif language == "pharo":
+                error_msg += " Pharo comments must be enclosed in double quotes (e.g., \"comment\")."
+            return error_msg
+
+        # INFERENCE
+        try:
+            _, labels, _, _ = inference_engine.predict_payload(
+                texts=[text], 
+                language=language
+            )
+            
+            if labels and len(labels) > 0:
+                first_prediction = labels[0][0]
+                if isinstance(first_prediction, (list, tuple)):
+                    return first_prediction[0] 
+                else:
+                    return str(first_prediction)
+            
+            return "Unknown: Low confidence."
+
+        except Exception as e:
+            print(f"Prediction Error: {e}")
+            return f"System Error: Failed to process request for '{language}'."
+
+    def update_dropdown(language):
+        choices = LABELS_MAP.get(language, [])
+        return gr.Dropdown(choices=choices, value=None, interactive=True)
+    
+    def clear_all():
+        return (None, "java", "", gr.Dropdown(choices=LABELS_MAP["java"], value=None, interactive=True), "")
+
+    logo_svg = load_svg_content("logo_header.svg")
+
+    with gr.Blocks(title="Code Comment Classifier") as demo:
+        gr.HTML(f"<style>{CSS}</style>")
+        
+        # --- HEADER ---
+        gr.HTML(f"""
+            <div class="compact-header">
+                <div class="header-left">
+                    <div class="logo-icon">{logo_svg}</div>
+                    <div class="title-group">
+                        <h1 class="main-title">Code Comment Classifier</h1>
+                        <p class="subtitle">for Java, Python & Pharo</p>
+                    </div>
+                </div>
+                <div class="header-right">
+                    <div class="dev-note-container">
+                        <span class="dev-icon" style="color: var(--primary-btn);">💭</span>
+                        <span id="dev-note-text" class="dev-text">Initializing...</span>
+                    </div>
+                </div>
+            </div>
+        """)
+
+        with gr.Row():
+            with gr.Column():
+                gr.HTML('<div class="input-card"><div class="section-title">📝 Input Source</div></div>')
+                input_text = gr.Textbox(label="Code Comment", lines=8, show_label=False, placeholder="Enter code comment here...")
+                with gr.Row():
+                    input_lang = gr.Dropdown(["java", "python", "pharo"], label="Language", value="java", scale=2)
+                    submit_btn = gr.Button("⚡ Classify", variant="primary", scale=1)
+                clear_btn = gr.Button("🗑️ Clear All", variant="secondary", size="sm")
+
+            with gr.Column():
+                gr.HTML('<div class="output-card"><div class="section-title">📊 Classification Result</div></div>')
+                output_tags = gr.Textbox(
+                    label="Predicted Category", 
+                    show_label=False, 
+                    elem_id="result-box",
+                    interactive=False,
+                    lines=2
+                )
+                
+                gr.HTML('<div class="feedback-section"><div class="feedback-title">🛠️ Help Improve the Model</div></div>')
+                with gr.Row():
+                    correction_dropdown = gr.Dropdown(
+                        choices=LABELS_MAP["java"], 
+                        label="Correct Label", 
+                        show_label=False, 
+                        container=False, 
+                        scale=3, 
+                        interactive=True
+                    )
+                    feedback_btn = gr.Button("📤 Save Feedback", variant="secondary", scale=1)
+                feedback_msg = gr.Markdown("", show_label=False)
+
+        gr.Examples(
+            examples=[
+                ["/** Validates the user session token. */", "java"],
+                ["# Retry logic for DB connection.", "python"],
+                ['"Manages the network connection lifecycle."', "pharo"]
+            ],
+            inputs=[input_text, input_lang],
+            label="Quick Examples"
+        )
+
+        input_lang.change(fn=update_dropdown, inputs=input_lang, outputs=correction_dropdown)
+        submit_btn.click(fn=classify_comment, inputs=[input_text, input_lang], outputs=[output_tags])
+        feedback_btn.click(fn=save_feedback_to_csv, inputs=[input_text, input_lang, output_tags, correction_dropdown], outputs=[feedback_msg])
+        clear_btn.click(fn=clear_all, inputs=None, outputs=[input_text, input_lang, output_tags, correction_dropdown, feedback_msg])
+
+        demo.load(None, js=JS_LOADER)
+
+    return demo
\ No newline at end of file
diff --git a/turing/api/schemas.py b/turing/api/schemas.py
new file mode 100644
index 0000000000000000000000000000000000000000..eff7e9def4c6b4233624f8e81cd5a29a3e71898e
--- /dev/null
+++ b/turing/api/schemas.py
@@ -0,0 +1,22 @@
+from typing import Any, List
+
+from pydantic import BaseModel, Field
+
+
+# Input Schema
+class PredictionRequest(BaseModel):
+    texts: List[str] = Field(
+        ...,
+        description="List of code comments to classify",
+        example=["public void main", "def init self"],
+    )
+    language: str = Field(
+        ..., description="Programming language (java, python, pharo)", example="java"
+    )
+
+
+# Output Schema
+class PredictionResponse(BaseModel):
+    predictions: List[Any] = Field(..., description="List of predicted labels")
+    labels: List[Any] = Field(..., description="List of human-readable labels")
+    model_info: dict = Field(..., description="Metadata about the model used")
diff --git a/turing/config.py b/turing/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4dd4e2177fa9e11404e349039535bdd9fd11a7a
--- /dev/null
+++ b/turing/config.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+
+from dotenv import load_dotenv
+from loguru import logger
+
+# Load environment variables from .env file if it exists
+load_dotenv()
+
+# Paths
+PROJ_ROOT = Path(__file__).resolve().parents[1]
+logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
+
+DATA_DIR = PROJ_ROOT / "data"
+RAW_DATA_DIR = DATA_DIR / "raw"
+INTERIM_DATA_DIR = DATA_DIR / "interim"
+PROCESSED_DATA_DIR = DATA_DIR / "processed"
+EXTERNAL_DATA_DIR = DATA_DIR / "external"
+
+MODELS_DIR = PROJ_ROOT / "models"
+
+REPORTS_DIR = PROJ_ROOT / "reports"
+FIGURES_DIR = REPORTS_DIR / "figures"
+
+# Dataset
+DATASET_HF_ID = "NLBSE/nlbse26-code-comment-classification"
+LANGS = ["java", "python", "pharo"]
+INPUT_COLUMN = "combo"
+LABEL_COLUMN = "labels"
+
+LABELS_MAP = {
+    "java": ["summary", "Ownership", "Expand", "usage", "Pointer", "deprecation", "rational"],
+    "python": ["Usage", "Parameters", "DevelopmentNotes", "Expand", "Summary"],
+    "pharo": [
+        "Keyimplementationpoints",
+        "Example",
+        "Responsibilities",
+        "Intent",
+        "Keymessages",
+        "Collaborators",
+    ],
+}
+
+TOTAL_CATEGORIES = sum(len(v) for v in LABELS_MAP.values())
+
+# Score parameters
+MAX_AVG_RUNTIME = 5.0  # seconds
+MAX_AVG_FLOPS = 5000.0  # GFLOPS
+
+# Training parameters
+DEFAULT_BATCH_SIZE = 32
+
+# Model configuration mapping
+MODEL_CONFIG = {
+    "codeberta": {
+        "model_name": "fine-tuned-CodeBERTa",
+        "exp_name": "fine-tuned-CodeBERTa",
+        "model_class_module": "turing.modeling.models.codeBerta",
+        "model_class_name": "CodeBERTa",
+    },
+    "graphcodebert": {
+        "model_name": "GraphCodeBERT",
+        "exp_name": "fine-tuned-GraphCodeBERT",
+        "model_class_module": "turing.modeling.models.graphCodeBert",
+        "model_class_name": "GraphCodeBERTClassifier",
+    },
+    "tinybert": {
+        "model_name": "TinyBERT",
+        "exp_name": "fine-tuned-TinyBERT",
+        "model_class_module": "turing.modeling.models.tinyBert",
+        "model_class_name": "TinyBERTClassifier",
+    },
+    "randomforest": {
+        "model_name": "RandomForest-TfIdf",
+        "exp_name": "RandomForest-TfIdf",
+        "model_class_module": "turing.modeling.models.randomForestTfIdf",
+        "model_class_name": "RandomForestTfIdf",
+    },
+}
+DEFAULT_NUM_ITERATIONS = 20
+
+# Existing model modules
+EXISTING_MODELS = [
+    "randomForestTfIdf",
+    "codeBerta",
+]
+
+# If tqdm is installed, configure loguru with tqdm.write
+# https://github.com/Delgan/loguru/issues/135
+try:
+    from tqdm import tqdm
+
+    logger.remove(0)
+    logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+except (ModuleNotFoundError, ValueError):
+    pass
diff --git a/turing/data_validation.py b/turing/data_validation.py
new file mode 100644
index 0000000000000000000000000000000000000000..538296889a77a0e771149f8993b738ae90c05ae0
--- /dev/null
+++ b/turing/data_validation.py
@@ -0,0 +1,271 @@
+from pathlib import Path
+import traceback
+from typing import List
+
+from deepchecks.tabular import Dataset, Suite
+from deepchecks.tabular.checks import (
+    ConflictingLabels,
+    DataDuplicates,
+    LabelDrift,
+    OutlierSampleDetection,
+    TrainTestSamplesMix,
+)
+import numpy as np
+import pandas as pd
+
+from turing.config import LABEL_COLUMN, LABELS_MAP
+
+try:
+    from deepchecks.nlp import TextData
+    from deepchecks.nlp.checks import (
+        PropertyDrift,
+        TextEmbeddingsDrift,
+    )
+
+    NLP_AVAILABLE = True
+except ImportError:
+    NLP_AVAILABLE = False
+
+
+def _encode_labels_for_validation(
+    series: pd.Series, class_names: List[str]
+) -> pd.Series:
+    def encode(lbl):
+        active_labels = []
+        for idx, is_active in enumerate(lbl):
+            if is_active:
+                if idx < len(class_names):
+                    active_labels.append(class_names[idx])
+                else:
+                    active_labels.append(f"Class_{idx}")
+        if not active_labels:
+            return "No_Label"
+        return " & ".join(active_labels)
+
+    return series.apply(encode)
+
+
+def _calculate_code_specific_properties(text_series: List[str]) -> pd.DataFrame:
+    props = []
+    for text in text_series:
+        s = str(text)
+        length = len(s)
+        non_alnum = sum(1 for c in s if not c.isalnum() and not c.isspace())
+        props.append(
+            {
+                "Text_Length": length,
+                "Symbol_Ratio": non_alnum / length if length > 0 else 0.0,
+            }
+        )
+    return pd.DataFrame(props)
+
+
+def _nuke_rogue_files():
+    """
+    delete .npy files
+    """
+    rogue_filenames = [
+        "embeddings.npy"
+     
+    ]
+    for fname in rogue_filenames:
+        p = Path(fname) 
+        if p.exists():
+            try:
+                p.unlink()
+            except Exception:
+                pass
+
+
+def run_custom_deepchecks(
+    df_train: pd.DataFrame,
+    df_test: pd.DataFrame,
+    output_dir: Path,
+    stage: str,
+    language: str,
+):
+    print(f"   [Deepchecks] Running Integrity Suite ({stage})...")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    class_names = LABELS_MAP.get(language, [])
+    cols = ["f_length", "f_word_count", "f_starts_verb", "text_hash"]
+
+    for c in cols:
+        if c not in df_train.columns:
+            df_train[c] = 0
+        if c not in df_test.columns:
+            df_test[c] = 0
+
+    train_ds_df = df_train[cols].copy()
+    train_ds_df["target"] = _encode_labels_for_validation(
+        df_train[LABEL_COLUMN], class_names
+    )
+    test_ds_df = df_test[cols].copy()
+    test_ds_df["target"] = _encode_labels_for_validation(
+        df_test[LABEL_COLUMN], class_names
+    )
+
+    cat_features = ["text_hash", "f_starts_verb"]
+    train_ds = Dataset(train_ds_df, label="target", cat_features=cat_features)
+    test_ds = Dataset(test_ds_df, label="target", cat_features=cat_features)
+
+    check_conflicts = ConflictingLabels(columns=["text_hash"])
+    if hasattr(check_conflicts, "add_condition_ratio_of_conflicting_labels_not_greater_than"):
+        check_conflicts.add_condition_ratio_of_conflicting_labels_not_greater_than(0)
+    else:
+        check_conflicts.add_condition_ratio_of_conflicting_labels_less_or_equal(0)
+
+    check_duplicates = DataDuplicates()
+    if hasattr(check_duplicates, "add_condition_ratio_not_greater_than"):
+        check_duplicates.add_condition_ratio_not_greater_than(0.05)
+    else:
+        check_duplicates.add_condition_ratio_less_or_equal(0.05)
+
+    check_leakage = TrainTestSamplesMix(columns=["text_hash"])
+    try:
+        if hasattr(check_leakage, "add_condition_ratio_not_greater_than"):
+            check_leakage.add_condition_ratio_not_greater_than(0)
+    except Exception:
+        pass
+
+    check_outliers = OutlierSampleDetection()
+    try:
+        if hasattr(check_outliers, "add_condition_outlier_ratio_less_or_equal"):
+            check_outliers.add_condition_outlier_ratio_less_or_equal(0.05)
+    except Exception:
+        pass
+
+    custom_suite = Suite(
+        "Code Quality & Integrity",
+        check_conflicts,
+        check_duplicates,
+        check_leakage,
+        LabelDrift(),
+        check_outliers,
+    )
+
+    try:
+        result = custom_suite.run(train_dataset=train_ds, test_dataset=test_ds)
+        report_path = output_dir / f"1_Integrity_{stage}.html"
+        result.save_as_html(str(report_path), as_widget=False)
+        print(f"   [Deepchecks] Report Saved: {report_path}")
+    except Exception as e:
+        print(f"   [Deepchecks] Error: {e}")
+        traceback.print_exc()
+
+
+def run_targeted_nlp_checks(
+    df_train: pd.DataFrame,
+    df_test: pd.DataFrame,
+    output_dir: Path,
+    stage: str,
+    language: str = "english",
+):
+    if not NLP_AVAILABLE:
+        print("   [Skip] NLP Suite skipped (libs not installed).")
+        return
+
+    from deepchecks.nlp import Suite as NLPSuite
+
+    print(f"   [NLP Check] Running Semantic Analysis ({stage})...")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Clean up any existing garbage before starting
+    _nuke_rogue_files()
+
+    DRIFT_THRESHOLD = 0.20
+    PROP_THRESHOLD = 0.35
+    SAMPLE_SIZE = 2000
+    df_tr = (
+        df_train.sample(n=SAMPLE_SIZE, random_state=42)
+        if len(df_train) > SAMPLE_SIZE
+        else df_train
+    )
+    df_te = (
+        df_test.sample(n=SAMPLE_SIZE, random_state=42)
+        if len(df_test) > SAMPLE_SIZE
+        else df_test
+    )
+
+    try: # START MAIN TRY BLOCK
+        y_tr = np.vstack(df_tr[LABEL_COLUMN].tolist())
+        y_te = np.vstack(df_te[LABEL_COLUMN].tolist())
+
+        train_ds = TextData(
+            df_tr["comment_sentence"].tolist(),
+            label=y_tr,
+            task_type="text_classification",
+        )
+        test_ds = TextData(
+            df_te["comment_sentence"].tolist(),
+            label=y_te,
+            task_type="text_classification",
+        )
+
+        print("   [NLP Check] Calculating custom code properties...")
+        train_props = _calculate_code_specific_properties(
+            df_tr["comment_sentence"].tolist()
+        )
+        test_props = _calculate_code_specific_properties(
+            df_te["comment_sentence"].tolist()
+        )
+
+        train_ds.set_properties(train_props)
+        test_ds.set_properties(test_props)
+
+        # In-memory calculation only. 
+        train_ds.calculate_builtin_embeddings()
+        test_ds.calculate_builtin_embeddings()
+
+        check_embeddings = TextEmbeddingsDrift()
+        if hasattr(check_embeddings, "add_condition_drift_score_not_greater_than"):
+            check_embeddings.add_condition_drift_score_not_greater_than(DRIFT_THRESHOLD)
+        elif hasattr(check_embeddings, "add_condition_drift_score_less_than"):
+            check_embeddings.add_condition_drift_score_less_than(DRIFT_THRESHOLD)
+
+        check_len = PropertyDrift(custom_property_name="Text_Length")
+        if hasattr(check_len, "add_condition_drift_score_not_greater_than"):
+            check_len.add_condition_drift_score_not_greater_than(PROP_THRESHOLD)
+        elif hasattr(check_len, "add_condition_drift_score_less_than"):
+            check_len.add_condition_drift_score_less_than(PROP_THRESHOLD)
+
+        check_sym = PropertyDrift(custom_property_name="Symbol_Ratio")
+        if hasattr(check_sym, "add_condition_drift_score_not_greater_than"):
+            check_sym.add_condition_drift_score_not_greater_than(PROP_THRESHOLD)
+        elif hasattr(check_sym, "add_condition_drift_score_less_than"):
+            check_sym.add_condition_drift_score_less_than(PROP_THRESHOLD)
+
+        suite = NLPSuite(
+            "Code Comment Semantic Analysis", 
+            check_embeddings, 
+            check_len, 
+            check_sym
+        )
+
+        res = suite.run(train_ds, test_ds)
+        
+        report_path = output_dir / f"2_Semantic_{stage}.html"
+        res.save_as_html(str(report_path), as_widget=False)
+        print(f"   [NLP Check] Report saved: {report_path}")
+
+        try:
+            passed = res.get_passed_checks()
+            n_passed = len(passed)
+            n_total = len(res.results)
+            print(f"   [NLP Result] {n_passed}/{n_total} checks passed.")
+            
+            if n_passed < n_total:
+                print("   [NLP Warning] Failed Checks details:")
+                for result in res.results:
+                    if not result.passed_conditions():
+                        print(f"     - {result.check.name}: {result.conditions_results[0].details}")
+        except Exception:
+            pass
+
+    except Exception as e:
+        print(f"   [NLP Check] Failed: {e}")
+        import traceback
+        traceback.print_exc()
+    
+    finally:
+        _nuke_rogue_files()
\ No newline at end of file
diff --git a/turing/dataset.py b/turing/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..97cd6efd816d5880790d43d918e193a4a4eb12ab
--- /dev/null
+++ b/turing/dataset.py
@@ -0,0 +1,210 @@
+import ast
+import os
+from pathlib import Path
+
+from datasets import DatasetDict, load_dataset
+from loguru import logger
+
+import turing.config as config
+
+
+class DatasetManager:
+    """
+    Manages the loading, transformation, and access of project datasets.
+    """
+
+    def __init__(self, dataset_path: Path = None):
+        self.hf_id = config.DATASET_HF_ID
+        self.raw_data_dir = config.RAW_DATA_DIR
+        self.interim_data_dir = config.INTERIM_DATA_DIR
+        self.base_interim_path = self.interim_data_dir / "base"
+        
+        if dataset_path:
+            self.dataset_path = dataset_path
+        else:
+            self.dataset_path = self.base_interim_path
+
+    def _format_labels_for_csv(self, example: dict) -> dict:
+        """
+        Formats the labels list as a string for CSV storage.
+        (Private class method)
+
+        Args:
+            example (dict): A single example from the dataset.
+
+        Returns:
+            dict: The example with labels converted to string.
+        """
+        labels = example.get("labels")
+        if isinstance(labels, list):
+            example["labels"] = str(labels)
+        return example
+
+    def download_dataset(self):
+        """
+        Loads the dataset from Hugging Face and saves it into the "raw" folder.
+        """
+        logger.info(f"Loading dataset: {self.hf_id}")
+        try:
+            ds = load_dataset(self.hf_id)
+            logger.success("Dataset loaded successfully.")
+            logger.info(f"Dataset splits: {ds}")
+
+            self.raw_data_dir.mkdir(parents=True, exist_ok=True)
+
+            for split_name, dataset_split in ds.items():
+                output_path = os.path.join(
+                    self.raw_data_dir, f"{split_name.replace('-', '_')}.parquet"
+                )
+                dataset_split.to_parquet(output_path)
+
+            logger.success(f"Dataset saved to {self.raw_data_dir}.")
+        except Exception as e:
+            logger.warning(f"Error during loading: {e}.")
+
+    def parquet_to_csv(self):
+        """
+        Converts all parquet files in the raw data directory
+        to CSV format in the interim data directory.
+        """
+        logger.info("Starting Parquet to CSV conversion...")
+        self.base_interim_path.mkdir(parents=True, exist_ok=True)
+
+        for file_name in os.listdir(self.raw_data_dir):
+            if file_name.endswith(".parquet"):
+                part_name = file_name.replace(".parquet", "").replace("-", "_")
+
+                # Load the parquet file
+                dataset = load_dataset(
+                    "parquet", data_files={part_name: str(self.raw_data_dir / file_name)}
+                )
+
+                # Map and format labels
+                dataset[part_name] = dataset[part_name].map(self._format_labels_for_csv)
+
+                # Save to CSV
+                csv_output_path = os.path.join(self.base_interim_path, f"{part_name}.csv")
+                dataset[part_name].to_csv(csv_output_path)
+
+                logger.info(f"Converted {file_name} to {csv_output_path}")
+
+        logger.success("Parquet -> CSV conversion complete.")
+
+    def get_dataset_name(self) -> str:
+        """
+        Returns the name of the current dataset being used.
+        
+        Returns:
+            str: The name of the dataset (e.g., 'clean-aug-soft-k5000').
+        """
+        return self.dataset_path.name
+
+    def get_dataset(self) -> DatasetDict:
+        """
+        Returns the processed dataset from the interim data directory
+        as a DatasetDict (loaded from CSVs).
+
+        Returns:
+            DatasetDict: The complete dataset with train and test splits for each language.
+        """
+
+        dataset_path = self.dataset_path
+
+        # Define the base filenames
+        data_files = {
+            "java_train": str(dataset_path / "java_train.csv"),
+            "java_test": str(dataset_path / "java_test.csv"),
+            "python_train": str(dataset_path / "python_train.csv"),
+            "python_test": str(dataset_path / "python_test.csv"),
+            "pharo_train": str(dataset_path / "pharo_train.csv"),
+            "pharo_test": str(dataset_path / "pharo_test.csv"),
+        }
+
+        # Verify file existence before loading
+        logger.info("Loading CSV dataset from splits...")
+        existing_data_files = {}
+        for key, path in data_files.items():
+            if not os.path.exists(path):
+                found = False
+                if os.path.exists(dataset_path):
+                    for f in os.listdir(dataset_path):
+                        if f.startswith(key) and f.endswith(".csv"):
+                            existing_data_files[key] = str(dataset_path / f)
+                            found = True
+                            break
+                if not found:
+                    logger.warning(f"File not found for split '{key}': {path}")
+            else:
+                existing_data_files[key] = path
+
+        if not existing_data_files:
+            logger.error("No dataset CSV files found. Run 'parquet-to-csv' first.")
+            raise FileNotFoundError("Dataset CSV files not found.")
+
+        logger.info(f"Found files: {list(existing_data_files.keys())}")
+
+        full_dataset = load_dataset("csv", data_files=existing_data_files)
+
+        logger.info("Formatting labels (from string back to list)...")
+        for split in full_dataset:
+            full_dataset[split] = full_dataset[split].map(
+                lambda x: {
+                    "labels": ast.literal_eval(x["labels"])
+                    if isinstance(x["labels"], str)
+                    else x["labels"]
+                }
+            )
+
+        logger.success("Dataset is ready for use.")
+        return full_dataset
+
+    def get_raw_dataset_from_hf(self) -> DatasetDict:
+        """
+        Loads the raw dataset directly from Hugging Face without saving.
+
+        Returns:
+            DatasetDict: The raw dataset from Hugging Face.
+        """
+        logger.info(f"Loading raw dataset '{self.hf_id}' from Hugging Face...")
+        try:
+            ds = load_dataset(self.hf_id)
+            logger.success(f"Successfully loaded '{self.hf_id}'.")
+            return ds
+        except Exception as e:
+            logger.error(f"Failed to load dataset from Hugging Face: {e}")
+            return None
+
+    def search_file(self, file_name: str, search_directory: Path = None) -> list:
+        """
+        Recursively searches for a file by name within a specified data directory.
+
+        Args:
+            file_name (str): The name of the file to search for (e.g., "java_train.csv").
+            search_directory (Path, optional): The directory to search in.
+                                              Defaults to self.raw_data_dir.
+
+        Returns:
+            list: A list of Path objects for all found files.
+        """
+        if search_directory is None:
+            search_directory = self.raw_data_dir
+            logger.info(f"Defaulting search to raw data directory: {search_directory}")
+
+        if not search_directory.is_dir():
+            logger.error(f"Search directory not found: {search_directory}")
+            return []
+
+        logger.info(f"Searching for '{file_name}' in '{search_directory}'...")
+
+        found_files = []
+        for root, dirs, files in os.walk(search_directory):
+            for file in files:
+                if file == file_name:
+                    found_files.append(Path(root) / file)
+
+        if not found_files:
+            logger.warning(f"No files named '{file_name}' found in '{search_directory}'.")
+        else:
+            logger.success(f"Found {len(found_files)} matching file(s).")
+
+        return found_files
diff --git a/turing/evaluate_model.py b/turing/evaluate_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e41e59169ed44b57b7c06b29233d82d65722ce5
--- /dev/null
+++ b/turing/evaluate_model.py
@@ -0,0 +1,121 @@
+import time
+
+from datasets import DatasetDict
+from loguru import logger
+import numpy as np
+import pandas as pd
+import torch
+
+import turing.config as config
+
+
+def calculate_submission_score(avg_f1: float, avg_runtime: float, avg_flops: float) -> float:
+    """
+    Calculates the final competition score.
+    The score is a weighted sum of F1 score, runtime, and GFLOPS.
+    Weights:
+    - F1 Score: 60%
+    - Runtime: 20%
+    - GFLOPS: 20%
+
+    Args:
+        avg_f1 (float): Average F1 score across all categories.
+        avg_runtime (float): Average runtime in seconds.
+        avg_flops (float): Average GFLOPS.
+
+    Returns:
+        float: Final submission score.
+    """
+
+    score_f1 = 0.6 * avg_f1
+
+    runtime_ratio = (config.MAX_AVG_RUNTIME - avg_runtime) / config.MAX_AVG_RUNTIME
+    score_runtime = 0.2 * max(runtime_ratio, 0)
+
+    flops_ratio = (config.MAX_AVG_FLOPS - avg_flops) / config.MAX_AVG_FLOPS
+    score_flops = 0.2 * max(flops_ratio, 0)
+
+    total_score = score_f1 + score_runtime + score_flops
+
+    logger.info(f"  F1 Score (60%): {score_f1:.4f} (avg_f1: {avg_f1:.4f})")
+    logger.info(
+        f"  Runtime Score (20%): {score_runtime:.4f} (avg_runtime: {avg_runtime:.4f}s / {config.MAX_AVG_RUNTIME}s)"
+    )
+    logger.info(
+        f"  GFLOPS Score (20%): {score_flops:.4f} (avg_flops: {avg_flops:.4f} / {config.MAX_AVG_FLOPS})"
+    )
+    logger.info("  ====================")
+    logger.info(f"  Final Score: {total_score:.4f}")
+
+    return total_score
+
+
+def evaluate_models(models: dict, dataset: DatasetDict):
+    """
+    Evaluates the provided models on the test datasets for each language.
+    Computes precision, recall, and F1 score for each category and language.
+    Also measures average runtime and GFLOPS for model inference.
+
+    Args:
+        models (dict): A dictionary mapping language codes to their respective models.
+        dataset (DatasetDict): A DatasetDict containing test datasets for each language.
+
+    Returns:
+        pd.DataFrame: DataFrame containing precision, recall, and F1 scores for each category and language.
+        float: Final submission score calculated based on average F1, runtime, and GF
+    """
+
+    total_flops = 0
+    total_time = 0
+    scores = []
+
+    for lan in config.LANGS:
+        logger.info(f"\n--- Evaluating Language: {lan.upper()} ---")
+        model = models[lan]
+
+        with torch.profiler.profile(with_flops=True) as p:
+            test_data = dataset[f"{lan}_test"]
+            x = test_data[config.INPUT_COLUMN]
+            x = list(x) if hasattr(x, 'tolist') else x  # Convert pandas Series to list
+            y_true = np.array(test_data[config.LABEL_COLUMN]).T
+
+            begin = time.time()
+            for i in range(10):
+                y_pred = model.predict(x)
+                y_pred = np.asarray(y_pred).T
+            total = time.time() - begin
+            total_time = total_time + total
+
+        total_flops = total_flops + (sum(k.flops for k in p.key_averages()) / 1e9)
+
+        for i in range(len(y_pred)):
+            assert len(y_pred[i]) == len(y_true[i])
+            tp = sum([true == pred == 1 for (true, pred) in zip(y_true[i], y_pred[i])])
+            #tn = sum([true == pred == 0 for (true, pred) in zip(y_true[i], y_pred[i])])
+            fp = sum([true == 0 and pred == 1 for (true, pred) in zip(y_true[i], y_pred[i])])
+            fn = sum([true == 1 and pred == 0 for (true, pred) in zip(y_true[i], y_pred[i])])
+            precision = tp / (tp + fp)
+            recall = tp / (tp + fn)
+            f1 = (2 * tp) / (2 * tp + fp + fn)
+            scores.append({
+                "lan": lan,
+                "cat": config.LABELS_MAP[lan][i],
+                "precision": precision,
+                "recall": recall,
+                "f1": f1,
+            })
+
+    logger.info(f"Compute in GFLOPs: {total_flops / 10}")
+    logger.info(f"Avg runtime in seconds: {total_time / 10}")
+    scores = pd.DataFrame(scores)
+    print(scores)
+
+    avg_f1 = scores["f1"].mean()
+    avg_runtime = total_time / 10
+    avg_flops = total_flops / 10
+
+    final_score = calculate_submission_score(avg_f1, avg_runtime, avg_flops)
+
+    logger.info(f"Final Score for {lan.upper()}: {final_score:.4f}")
+
+    return scores, final_score
diff --git a/turing/features.py b/turing/features.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1b350f3e1156dc7394b0725189f05094616488b
--- /dev/null
+++ b/turing/features.py
@@ -0,0 +1,678 @@
+import ast
+import hashlib
+from pathlib import Path
+import random
+import re
+from typing import List, Tuple
+
+import nltk
+from nltk.corpus import stopwords, wordnet
+from nltk.stem import PorterStemmer, WordNetLemmatizer
+import numpy as np
+import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.feature_selection import SelectKBest, chi2
+import typer
+
+from turing.config import (
+    INTERIM_DATA_DIR,
+    LABEL_COLUMN,
+    LANGS,
+)
+from turing.data_validation import run_custom_deepchecks, run_targeted_nlp_checks
+from turing.dataset import DatasetManager
+
+# --- NLTK Resource Check ---
+REQUIRED_NLTK_PACKAGES = [
+    "stopwords",
+    "wordnet",
+    "omw-1.4",
+    "averaged_perceptron_tagger",
+    "punkt",
+]
+for package in REQUIRED_NLTK_PACKAGES:
+    try:
+        nltk.data.find(f"corpora/{package}")
+    except LookupError:
+        try:
+            nltk.download(package, quiet=True)
+        except Exception:
+            pass
+
+app = typer.Typer()
+
+
+# --- CONFIGURATION CLASS ---
+class FeaturePipelineConfig:
+    """
+    Configuration holder for the pipeline. Generates a unique ID based on parameters
+    to version the output directories.
+    """
+
+    def __init__(
+        self,
+        use_stopwords: bool,
+        use_lemmatization: bool,
+        use_combo_feature: bool,
+        max_features: int,
+        min_comment_length: int,
+        max_comment_length: int,
+        enable_augmentation: bool,
+        custom_tags: str = "base",
+    ):
+        self.use_stopwords = use_stopwords
+        self.use_lemmatization = use_lemmatization
+        self.use_combo_feature = use_combo_feature
+        self.max_features = max_features
+        self.min_comment_length = min_comment_length
+        self.max_comment_length = max_comment_length
+        self.enable_augmentation = enable_augmentation
+        self.custom_tags = custom_tags
+        self.hash_id = self._generate_readable_id()
+
+    def _generate_readable_id(self) -> str:
+        tags = ["clean"]
+        if self.enable_augmentation:
+            tags.append("aug-soft")
+        tags.append(f"k{self.max_features}")
+        if self.custom_tags != "base":
+            tags.append(self.custom_tags)
+        return "-".join(tags)
+
+
+# --- TEXT UTILITIES ---
+class TextCanonicalizer:
+    """
+    Reduces text to a 'canonical' form (stemmed, lowercase)
+    to detect semantic duplicates.
+    preserves javadoc tags to distinguish usage (@return) from summary (Returns).
+    """
+
+    def __init__(self):
+        self.stemmer = PorterStemmer()
+        self.stop_words = set(stopwords.words("english"))
+        # Code keywords are preserved as they carry semantic weight
+        self.code_keywords = {
+            "return",
+            "true",
+            "false",
+            "null",
+            "if",
+            "else",
+            "void",
+            "int",
+            "boolean",
+            "param", 
+            "throws",
+            "exception",
+        }
+
+    def to_canonical(self, text: str) -> str:
+        if pd.isna(text):
+            return ""
+        text = str(text).lower()
+        text = re.sub(r"[^a-z0-9\s@]", " ", text)
+        
+        words = text.split()
+        canonical_words = []
+        
+        for w in words:
+            # If the word starts with @ (e.g., @return), keep it as is
+            if w.startswith("@"):
+                canonical_words.append(w)
+                continue
+
+            if w in self.stop_words and w not in self.code_keywords:
+                continue
+            
+            stemmed = self.stemmer.stem(w)
+            canonical_words.append(stemmed)
+            
+        return " ".join(canonical_words).strip()
+
+
+class TextProcessor:
+    """
+    Standard text cleaning logic for final feature extraction (TF-IDF).
+    """
+
+    def __init__(self, config: FeaturePipelineConfig, language: str = "english"):
+        self.config = config
+        self.stop_words = set(stopwords.words(language))
+        self.lemmatizer = WordNetLemmatizer()
+
+    def clean_text(self, text: str) -> str:
+        if pd.isna(text):
+            return ""
+        text = str(text).lower()
+        # Remove heavy code markers but keep text structure
+        text = re.sub(r"(^\s*//+|^\s*/\*+|\*/$)", "", text)
+        # Keep only alpha characters for NLP model (plus pipe for combo)
+        text = re.sub(r"[^a-z\s|]", " ", text)
+        tokens = text.split()
+        if self.config.use_stopwords:
+            tokens = [w for w in tokens if w not in self.stop_words]
+        if self.config.use_lemmatization:
+            tokens = [self.lemmatizer.lemmatize(w) for w in tokens]
+        return " ".join(tokens)
+
+
+# --- AUGMENTATION ---
+class SafeAugmenter:
+    """
+    protects reserved keywords from synonym replacement.
+    """
+
+    def __init__(self, aug_prob=0.3):
+        self.aug_prob = aug_prob
+        self.protected_words = {
+            "return",
+            "public",
+            "private",
+            "void",
+            "class",
+            "static",
+            "final",
+            "if",
+            "else",
+            "for",
+            "while",
+            "try",
+            "catch",
+            "import",
+            "package",
+            "null",
+            "true",
+            "false",
+            "self",
+            "def",
+            "todo",
+            "fixme",
+            "param",
+            "throw",
+        }
+
+    def get_synonyms(self, word):
+        synonyms = set()
+        for syn in wordnet.synsets(word):
+            for lemma in syn.lemmas():
+                name = lemma.name().replace("_", " ")
+                if name.isalpha() and name.lower() != word.lower():
+                    synonyms.add(name)
+        return list(synonyms)
+
+    def augment(self, text: str) -> str:
+        if pd.isna(text) or not text:
+            return ""
+        words = text.split()
+        if len(words) < 2:
+            return text
+        new_words = []
+        for word in words:
+            word_lower = word.lower()
+
+            if word_lower in self.protected_words:
+                new_words.append(word)
+                continue
+
+            # Random Case Injection (Noise)
+            if random.random() < 0.1:
+                if word[0].isupper():
+                    new_words.append(word.lower())
+                else:
+                    new_words.append(word.capitalize())
+                continue
+
+            # Synonym Replacement
+            if random.random() < self.aug_prob and len(word) > 3:
+                syns = self.get_synonyms(word_lower)
+                if syns:
+                    replacement = random.choice(syns)
+                    if word[0].isupper():
+                        replacement = replacement.capitalize()
+                    new_words.append(replacement)
+                else:
+                    new_words.append(word)
+            else:
+                new_words.append(word)
+        return " ".join(new_words)
+
+    def apply_balancing(
+        self, df: pd.DataFrame, min_samples: int = 100
+    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        """
+        Generates synthetic data for minority classes.
+        Returns: (Balanced DataFrame, Report DataFrame)
+        """
+        df["temp_label_str"] = df[LABEL_COLUMN].astype(str)
+        counts = df["temp_label_str"].value_counts()
+        print(
+            f"\n   [Balance Check - PRE] Min class size: {counts.min()} | Max: {counts.max()}"
+        )
+
+        existing_sentences = set(df["comment_sentence"].str.strip())
+        new_rows = []
+        report_rows = []
+
+        for label_str, count in counts.items():
+            if count < min_samples:
+                needed = min_samples - count
+                class_subset = df[df["temp_label_str"] == label_str]
+                if class_subset.empty:
+                    continue
+
+                samples = class_subset["comment_sentence"].tolist()
+                orig_label = class_subset[LABEL_COLUMN].iloc[0]
+
+                # Propagate 'combo' if present
+                orig_combo = None
+                if "combo" in class_subset.columns:
+                    orig_combo = class_subset["combo"].iloc[0]
+
+                generated = 0
+                attempts = 0
+                # Cap attempts to avoid infinite loops if vocabulary is too small
+                while generated < needed and attempts < needed * 5:
+                    attempts += 1
+                    src = random.choice(samples)
+                    aug_txt = self.augment(src).strip()
+
+                    # Ensure Global Uniqueness
+                    if aug_txt and aug_txt not in existing_sentences:
+                        row = {
+                            "comment_sentence": aug_txt,
+                            LABEL_COLUMN: orig_label,
+                            "partition": "train_aug",
+                            "index": -1,  # Placeholder
+                        }
+                        if orig_combo:
+                            row["combo"] = orig_combo
+
+                        new_rows.append(row)
+                        report_rows.append(
+                            {
+                                "original_text": src,
+                                "augmented_text": aug_txt,
+                                "label": label_str,
+                                "reason": f"Class has {count} samples (Target {min_samples})",
+                            }
+                        )
+                        existing_sentences.add(aug_txt)
+                        generated += 1
+
+        df = df.drop(columns=["temp_label_str"])
+        df_report = pd.DataFrame(report_rows)
+
+        if new_rows:
+            augmented_df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)
+            augmented_df["index"] = range(len(augmented_df))
+
+            temp_counts = augmented_df[LABEL_COLUMN].astype(str).value_counts()
+            print(
+                f"   [Balance Check - POST] Min class size: {temp_counts.min()} | Max: {temp_counts.max()}"
+            )
+            return augmented_df, df_report
+
+        return df, df_report
+
+
+# --- CLEANING LOGIC ---
+def clean_training_data_smart(
+    df: pd.DataFrame, min_len: int, max_len: int, language: str = "english"
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """
+    Performs 'Smart Cleaning' on the Training Set with language-specific heuristics.
+    """
+    canon = TextCanonicalizer()
+    dropped_rows = []
+
+    print(f"   [Clean] Computing heuristics (Language: {language})...")
+    df["canon_key"] = df["comment_sentence"].apply(canon.to_canonical)
+
+    # 1. Token Length Filter
+    def count_code_tokens(text):
+        return len([t for t in re.split(r"[^a-zA-Z0-9]+", str(text)) if t])
+
+    df["temp_token_len"] = df["comment_sentence"].apply(count_code_tokens)
+
+   
+    MIN_ALPHA_CHARS = 6
+    MAX_SYMBOL_RATIO = 0.50
+
+    # 2. Heuristic Filters (Tiny/Huge/Code)
+    def get_heuristics(text):
+        s = str(text).strip()
+        char_len = len(s)
+        if char_len == 0:
+            return False, False, 1.0
+        
+        alpha_len = sum(1 for c in s if c.isalpha())
+        
+        non_alnum_chars = sum(1 for c in s if not c.isalnum() and not c.isspace())
+        symbol_ratio = non_alnum_chars / char_len if char_len > 0 else 0
+
+        is_tiny = alpha_len < MIN_ALPHA_CHARS
+        is_huge = char_len > 800
+        is_code = symbol_ratio > MAX_SYMBOL_RATIO
+        
+        return is_tiny, is_huge, is_code
+
+    heuristics = df["comment_sentence"].apply(get_heuristics)
+    df["is_tiny"] = [x[0] for x in heuristics]
+    df["is_huge"] = [x[1] for x in heuristics]
+    df["symbol_ratio"] = [x[2] for x in heuristics] 
+    
+    
+    df["is_code"] = df["symbol_ratio"] > 0.50
+
+    mask_keep = (
+        (df["temp_token_len"] >= min_len)
+        & (df["temp_token_len"] <= max_len)
+        & (~df["is_tiny"])
+        & (~df["is_huge"])
+        & (~df["is_code"])
+    )
+
+    df_dropped_qual = df[~mask_keep].copy()
+    if not df_dropped_qual.empty:
+        def reason(row):
+            if row["is_tiny"]:
+                return f"Too Tiny (<{MIN_ALPHA_CHARS} alpha)"
+            if row["is_huge"]:
+                return "Too Huge (>800 chars)"
+            if row["is_code"]:
+                return f"Pure Code (>{int(MAX_SYMBOL_RATIO*100)}% symbols)"
+            return f"Token Count ({row['temp_token_len']})"
+
+        df_dropped_qual["drop_reason"] = df_dropped_qual.apply(reason, axis=1)
+        dropped_rows.append(df_dropped_qual)
+
+    df = df[mask_keep].copy()
+
+    # 3. Semantic Conflicts (Ambiguity)
+    df["label_s"] = df[LABEL_COLUMN].astype(str)
+    conflict_counts = df.groupby("canon_key")["label_s"].nunique()
+    conflicting_keys = conflict_counts[conflict_counts > 1].index
+
+    mask_conflicts = df["canon_key"].isin(conflicting_keys)
+    df_dropped_conflicts = df[mask_conflicts].copy()
+    if not df_dropped_conflicts.empty:
+        df_dropped_conflicts["drop_reason"] = "Semantic Conflict"
+        dropped_rows.append(df_dropped_conflicts)
+
+    df = df[~mask_conflicts].copy()
+
+    # 4. Exact Duplicates
+    mask_dupes = df.duplicated(subset=["comment_sentence"], keep="first")
+    df_dropped_dupes = df[mask_dupes].copy()
+    if not df_dropped_dupes.empty:
+        df_dropped_dupes["drop_reason"] = "Exact Duplicate"
+        dropped_rows.append(df_dropped_dupes)
+
+    df = df[~mask_dupes].copy()
+
+    # Cleanup columns
+    cols_to_drop = [
+        "canon_key",
+        "label_s",
+        "temp_token_len",
+        "is_tiny",
+        "is_huge",
+        "is_code",
+        "symbol_ratio"
+    ]
+    df = df.drop(columns=cols_to_drop, errors="ignore")
+
+    if dropped_rows:
+        df_report = pd.concat(dropped_rows, ignore_index=True)
+        cols_rep = ["index", "comment_sentence", LABEL_COLUMN, "drop_reason"]
+        final_cols = [c for c in cols_rep if c in df_report.columns]
+        df_report = df_report[final_cols]
+    else:
+        df_report = pd.DataFrame(columns=["index", "comment_sentence", "drop_reason"])
+
+    print(f"   [Clean] Removed {len(df_report)} rows. Final: {len(df)}.")
+    return df, df_report
+
+# --- FEATURE ENGINEERING ---
+class FeatureEngineer:
+    def __init__(self, config: FeaturePipelineConfig):
+        self.config = config
+        self.processor = TextProcessor(config=config)
+        self.tfidf_vectorizer = TfidfVectorizer(max_features=config.max_features)
+
+    def extract_features_for_check(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Extracts metadata features for analysis."""
+
+        def analyze(text):
+            s = str(text)
+            words = s.split()
+            n_words = len(words)
+            if n_words == 0:
+                return 0, 0, 0
+            first_word = words[0].lower()
+            starts_verb = (
+                1
+                if first_word.endswith("s")
+                or first_word.startswith("get")
+                or first_word.startswith("set")
+                else 0
+            )
+            return (len(s), n_words, starts_verb)
+
+        metrics = df["comment_sentence"].apply(analyze)
+        df["f_length"] = [x[0] for x in metrics]
+        df["f_word_count"] = [x[1] for x in metrics]
+        df["f_starts_verb"] = [x[2] for x in metrics]
+        # Calculate MD5 hash for efficient exact duplicate detection in Deepchecks
+        df["text_hash"] = df["comment_sentence"].apply(
+            lambda x: hashlib.md5(str(x).encode()).hexdigest()
+        )
+        return df
+
+    def vectorize_and_select(self, df_train, df_test):
+        def clean_fn(x):
+            return re.sub(r"[^a-zA-Z\s]", "", str(x).lower())
+
+        X_train = self.tfidf_vectorizer.fit_transform(
+            df_train["comment_sentence"].apply(clean_fn)
+        )
+        y_train = np.stack(df_train[LABEL_COLUMN].values)
+
+        # Handling multi-label for Chi2 (using sum or max)
+        y_train_sum = (
+            y_train.sum(axis=1) if len(y_train.shape) > 1 else y_train
+        )
+        selector = SelectKBest(
+            chi2, k=min(self.config.max_features, X_train.shape[1])
+        )
+        X_train = selector.fit_transform(X_train, y_train_sum)
+
+        X_test = self.tfidf_vectorizer.transform(
+            df_test["comment_sentence"].apply(clean_fn)
+        )
+        X_test = selector.transform(X_test)
+
+        vocab = [
+            self.tfidf_vectorizer.get_feature_names_out()[i]
+            for i in selector.get_support(indices=True)
+        ]
+        return X_train, X_test, vocab
+
+
+# --- MAIN EXECUTION ---
+def main(
+    feature_dir: Path = typer.Option(
+        INTERIM_DATA_DIR / "features", help="Output dir."
+    ),
+    reports_root: Path = typer.Option(
+        Path("reports/data"), help="Reports root."
+    ),
+    max_features: int = typer.Option(5000),
+    min_comment_length: int = typer.Option(
+        2, help="Remove comments shorter than chars."
+    ),
+    max_comment_length: int = typer.Option(300),
+    augment: bool = typer.Option(False, "--augment", help="Enable augmentation."),
+    balance_threshold: int = typer.Option(100, help="Min samples per class."),
+    run_vectorization: bool = typer.Option(False, "--run-vectorization"),
+    run_nlp_check: bool = typer.Option(
+        True, "--run-nlp", help="Run Deepchecks NLP suite."
+    ),
+    custom_tags: str = typer.Option("base", help="Custom tags."),
+    save_full_csv: bool = typer.Option(False, "--save-full-csv"),
+    languages: List[str] = typer.Option(LANGS, show_default=False),
+):
+
+    config = FeaturePipelineConfig(
+        True,
+        True,
+        True,
+        max_features,
+        min_comment_length,
+        max_comment_length,
+        augment,
+        custom_tags,
+    )
+    print(f"=== Pipeline ID: {config.hash_id} ===")
+
+    dm = DatasetManager()
+    full_dataset = dm.get_dataset()
+    fe = FeatureEngineer(config)
+    augmenter = SafeAugmenter()
+
+    feat_output_dir = feature_dir / config.hash_id
+    feat_output_dir.mkdir(parents=True, exist_ok=True)
+    report_output_dir = reports_root / config.hash_id
+
+    for lang in languages:
+        print(f"\n{'='*30}\nPROCESSING LANGUAGE: {lang.upper()}\n{'='*30}")
+        df_train = full_dataset[f"{lang}_train"].to_pandas()
+        df_test = full_dataset[f"{lang}_test"].to_pandas()
+
+        # Standardize Label Format
+        for df in [df_train, df_test]:
+            if isinstance(df[LABEL_COLUMN].iloc[0], str):
+                df[LABEL_COLUMN] = (
+                    df[LABEL_COLUMN]
+                    .str.replace(r"\s+", ", ", regex=True)
+                    .apply(ast.literal_eval)
+                )
+
+        lang_report_dir = report_output_dir / lang
+
+        # 1. RAW AUDIT
+        print("   >>> Phase 1: Auditing RAW Data")
+        df_train_raw = fe.extract_features_for_check(df_train.copy())
+        df_test_raw = fe.extract_features_for_check(df_test.copy())
+        run_custom_deepchecks(
+            df_train_raw, df_test_raw, lang_report_dir, "raw", lang
+        )
+        if run_nlp_check:
+            run_targeted_nlp_checks(
+                df_train_raw, df_test_raw, lang_report_dir, "raw"
+            )
+
+        # 2. CLEANING & AUGMENTATION
+        print("\n   >>> Phase 2: Smart Cleaning & Augmentation")
+        df_train, df_dropped = clean_training_data_smart(
+            df_train, min_comment_length, max_comment_length, language=lang
+        )
+
+        if not df_dropped.empty:
+            dropped_path = lang_report_dir / "dropped_rows.csv"
+            df_dropped.to_csv(dropped_path, index=False)
+            print(f"   [Report] Dropped rows details saved to: {dropped_path}")
+
+        if augment:
+            print("   [Augment] Applying Soft Balancing...")
+            df_train, df_aug_report = augmenter.apply_balancing(
+                df_train, min_samples=balance_threshold
+            )
+
+            if not df_aug_report.empty:
+                aug_path = lang_report_dir / "augmentation_report.csv"
+                df_aug_report.to_csv(aug_path, index=False)
+                print(
+                    f"   [Report] Augmentation details saved to: {aug_path}"
+                )
+
+        # 3. PROCESSED AUDIT
+        print("\n   >>> Phase 3: Auditing PROCESSED Data")
+        df_train = fe.extract_features_for_check(df_train)
+        df_test = fe.extract_features_for_check(df_test)
+        run_custom_deepchecks(
+            df_train, df_test, lang_report_dir, "processed", lang
+        )
+        if run_nlp_check:
+            run_targeted_nlp_checks(
+                df_train, df_test, lang_report_dir, "processed"
+            )
+
+        # 4. FINAL PROCESSING & SAVING
+        print("\n   >>> Phase 4: Final Processing & Save")
+        df_train["comment_clean"] = df_train["comment_sentence"].apply(
+            fe.processor.clean_text
+        )
+        df_test["comment_clean"] = df_test["comment_sentence"].apply(
+            fe.processor.clean_text
+        )
+
+        if config.use_combo_feature:
+            if "combo" in df_train.columns:
+                df_train["combo_clean"] = df_train["combo"].apply(
+                    fe.processor.clean_text
+                )
+            if "combo" in df_test.columns:
+                df_test["combo_clean"] = df_test["combo"].apply(
+                    fe.processor.clean_text
+                )
+
+        X_train, X_test, vocab = None, None, []
+        if run_vectorization:
+            print("   [Vectorization] TF-IDF & Chi2...")
+            X_train, X_test, vocab = fe.vectorize_and_select(df_train, df_test)
+        def format_label_robust(lbl):
+            if hasattr(lbl, "tolist"): # Check if numpy array
+                lbl = lbl.tolist()
+            return str(lbl)
+
+        df_train[LABEL_COLUMN] = df_train[LABEL_COLUMN].apply(format_label_robust)
+        df_test[LABEL_COLUMN] = df_test[LABEL_COLUMN].apply(format_label_robust)
+
+        cols_to_save = [
+            "index",
+            LABEL_COLUMN,
+            "comment_sentence",
+            "comment_clean",
+        ]
+        if "combo" in df_train.columns:
+            cols_to_save.append("combo")
+        if "combo_clean" in df_train.columns:
+            cols_to_save.append("combo_clean")
+        meta_cols = [c for c in df_train.columns if c.startswith("f_")]
+        cols_to_save.extend(meta_cols)
+
+        print(f"   [Save] Columns: {cols_to_save}")
+        df_train[cols_to_save].to_csv(
+            feat_output_dir / f"{lang}_train.csv", index=False
+        )
+        df_test[cols_to_save].to_csv(
+            feat_output_dir / f"{lang}_test.csv", index=False
+        )
+
+        if run_vectorization and X_train is not None:
+            from scipy.sparse import save_npz
+
+            save_npz(feat_output_dir / f"{lang}_train_tfidf.npz", X_train)
+            save_npz(feat_output_dir / f"{lang}_test_tfidf.npz", X_test)
+            with open(
+                feat_output_dir / f"{lang}_vocab.txt", "w", encoding="utf-8"
+            ) as f:
+                f.write("\n".join(vocab))
+
+    print(f"\nAll Done. Reports in: {report_output_dir}")
+
+
+if __name__ == "__main__":
+    typer.run(main)
\ No newline at end of file
diff --git a/turing/modeling/__init__.py b/turing/modeling/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/turing/modeling/__pycache__/__init__.cpython-312.pyc b/turing/modeling/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c639786e44075a325e64356d683d24db8ed0a443
Binary files /dev/null and b/turing/modeling/__pycache__/__init__.cpython-312.pyc differ
diff --git a/turing/modeling/__pycache__/baseModel.cpython-312.pyc b/turing/modeling/__pycache__/baseModel.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..80883da933239f0960209bb179084d4a46408082
Binary files /dev/null and b/turing/modeling/__pycache__/baseModel.cpython-312.pyc differ
diff --git a/turing/modeling/baseModel.py b/turing/modeling/baseModel.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9f0fde2c413d1052b0902c980a10a8274cd75c6
--- /dev/null
+++ b/turing/modeling/baseModel.py
@@ -0,0 +1,111 @@
+from abc import ABC, abstractmethod
+import os
+import shutil
+
+from loguru import logger
+import mlflow
+from numpy import ndarray
+
+
+class BaseModel(ABC):
+    """
+    Abstract base class for training models.
+    Subclasses should define the model and implement specific logic
+    for training, evaluation, and model persistence.
+    """
+
+    def __init__(self, language, path=None):
+        """
+        Initialize the trainer.
+
+        Args:
+            language (str): Language for the model.
+            path (str, optional): Path to load a pre-trained model. Defaults to None.
+                                    If None, a new model is initialized.
+        """
+
+        self.language = language
+        self.model = None
+        if path:
+            self.load(path)
+        else:
+            self.setup_model()
+
+    @abstractmethod
+    def setup_model(self):
+        """
+        Initialize or build the model.
+        Called in __init__ of subclass.
+        """
+        pass
+
+    @abstractmethod
+    def train(self, X_train, y_train) -> dict[str,any]:
+        """
+        Main training logic for the model.
+
+        Args:
+            X_train: Input training data.
+            y_train: True labels for training data.
+        """
+        pass
+
+    @abstractmethod
+    def evaluate(self, X_test, y_test) -> dict[str,any]:
+        """
+        Evaluation logic for the model.
+
+        Args:
+            X_test: Input test data.
+            y_test: True labels for test data.
+        """
+        pass
+
+    @abstractmethod
+    def predict(self, X) -> ndarray:
+        """
+        Make predictions using the trained model.
+
+        Args:
+            X: Input data for prediction.
+
+        Returns:
+            Predictions made by the model.
+        """
+        pass
+
+    def save(self, path, model_name):
+        """
+        Save model and log to MLflow.
+
+        Args:
+            path (str): Path to save the model.
+            model_name (str): Name to use when saving the model (without extension).
+        """
+
+        if self.model is None:
+            raise ValueError("Model is not trained. Cannot save uninitialized model.")
+
+        complete_path = os.path.join(path, f"{model_name}_{self.language}")
+        if os.path.exists(complete_path) and os.path.isdir(complete_path):
+            shutil.rmtree(complete_path)
+        mlflow.sklearn.save_model(self.model, complete_path)
+
+        try:
+            mlflow.log_artifact(complete_path)
+        except Exception as e:
+            logger.error(f"Failed to log model to MLflow: {e}")
+
+        logger.info(f"Model saved to: {complete_path}")
+
+    def load(self, model_path):
+        """
+        Load model from specified local path or mlflow model URI.
+
+        Args:
+            model_path (str): Path to load the model from (local or mlflow URI).
+        """
+
+        self.model = mlflow.sklearn.load_model(model_path)
+        logger.info(f"Model loaded from: {model_path}")
+        
diff --git a/turing/modeling/model_selector.py b/turing/modeling/model_selector.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f83c97a5477c72c299631f1c29930672e693289
--- /dev/null
+++ b/turing/modeling/model_selector.py
@@ -0,0 +1,145 @@
+from typing import Optional
+
+from loguru import logger
+from mlflow.tracking import MlflowClient
+
+
+def get_best_model_by_tag(
+    language: str,
+    tag_key: str = "best_model",
+    metric: str = "f1_score"
+) -> Optional[dict]:
+    """
+    Retrieve the best model for a specific language using MLflow tags.
+    
+    Args:
+        language: Programming language (java, python, pharo)
+        tag_key: Tag key to search for (default: "best_model")
+        metric: Metric to use for ordering (default: "f1_score")
+    
+    Returns:
+        Dict with run_id and artifact_name of the best model or None if not found
+    """
+
+    client = MlflowClient()
+    experiments = client.search_experiments()
+    if not experiments:
+        logger.error("No experiments found in MLflow")
+        return None
+    
+    try:
+        runs = client.search_runs(
+            experiment_ids=[exp.experiment_id for exp in experiments],
+            filter_string=f"tags.{tag_key} = 'true' and tags.Language = '{language}'",
+            order_by=[f"metrics.{metric} DESC"],
+            max_results=1
+        )
+        
+        if not runs:
+            logger.warning(f"No runs found with tag '{tag_key}' for language '{language}'")
+            return None
+        
+        best_run = runs[0]
+        run_id = best_run.info.run_id
+        exp_name = client.get_experiment(best_run.info.experiment_id).name
+        run_name = best_run.info.run_name
+        artifact_name = best_run.data.tags.get("model_name")
+        model_id = best_run.data.tags.get("model_id")
+        logger.info(f"Found best model for {language}: {exp_name}/{run_name} ({run_id}), artifact={artifact_name}")
+        
+        return {
+            "run_id": run_id,
+            "artifact": artifact_name, 
+            "model_id": model_id
+        }
+    
+    except Exception as e:
+        logger.error(f"Error searching for best model: {e}")
+        return None
+
+
+def get_best_model_info(
+    language: str,
+    fallback_registry: dict = None
+) -> dict:
+    """
+    Retrieve the best model information for a language.
+    First searches by tag, then falls back to hardcoded registry.
+    
+    Args:
+        language: Programming language
+        fallback_registry: Fallback registry with run_id and artifact
+        
+    Returns:
+        Dict with run_id and artifact of the model
+    """
+    
+    model_info = get_best_model_by_tag(language, "best_model")
+    
+    if model_info:
+        logger.info(f"Using tagged best model for {language}")
+        return model_info
+    
+    if fallback_registry and language in fallback_registry:
+        logger.warning(f"No tagged model found for {language}, using fallback registry")
+        return fallback_registry[language]
+    
+    model_info = get_best_model_by_metric(language)
+    
+    if model_info:
+        logger.warning(f"Using best model by metric for {language}")
+        return model_info
+    
+    raise ValueError(f"No model found for language {language}")
+
+
+def get_best_model_by_metric(
+    language: str,
+    metric: str = "f1_score"
+) -> Optional[dict]:
+    """
+    Find the model with the best metric for a language.
+    
+    Args:
+        language: Programming language
+        metric: Metric to use for ordering
+        
+    Returns:
+        Dict with run_id and artifact of the model or None
+    """
+
+    client = MlflowClient()
+    experiments = client.search_experiments()
+    if not experiments:
+        logger.error("No experiments found in MLflow")
+        return None
+    
+    try:
+        runs = client.search_runs(
+            experiment_ids=[exp.experiment_id for exp in experiments],
+            filter_string=f"tags.Language = '{language}'",
+            order_by=[f"metrics.{metric} DESC"],
+            max_results=1
+        )
+        
+        if not runs:
+            logger.warning(f"No runs found for language '{language}'")
+            return None
+        
+        best_run = runs[0]
+        run_id = best_run.info.run_id
+        exp_name = client.get_experiment(best_run.info.experiment_id).name
+        run_name = best_run.info.run_name
+        artifact_name = best_run.data.tags.get("model_name")
+        model_id = best_run.data.tags.get("model_id")
+        logger.info(f"Found best model for {language}: {exp_name}/{run_name} ({run_id}), artifact={artifact_name}")
+        
+        return {
+            "run_id": run_id,
+            "artifact": artifact_name,
+            "model_id": model_id
+        }
+    
+    except Exception as e:
+        logger.error(f"Error finding best model by metric: {e}")
+        return None
diff --git a/turing/modeling/models/__init__.py b/turing/modeling/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fc7efe62befe30f25787a6adbb0ee796e167fe5
--- /dev/null
+++ b/turing/modeling/models/__init__.py
@@ -0,0 +1,15 @@
+"""
+Model classes for code comment classification.
+"""
+
+from turing.modeling.models.codeBerta import CodeBERTa
+from turing.modeling.models.graphCodeBert import GraphCodeBERTClassifier
+from turing.modeling.models.randomForestTfIdf import RandomForestTfIdf
+from turing.modeling.models.tinyBert import TinyBERTClassifier
+
+__all__ = [
+    "CodeBERTa",
+    "RandomForestTfIdf",
+    "TinyBERTClassifier",
+    "GraphCodeBERTClassifier",
+]
diff --git a/turing/modeling/models/__pycache__/miniLM.cpython-312.pyc b/turing/modeling/models/__pycache__/miniLM.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f0ee2b529f8208b9a8597159087ccf2452ee16d
Binary files /dev/null and b/turing/modeling/models/__pycache__/miniLM.cpython-312.pyc differ
diff --git a/turing/modeling/models/__pycache__/miniLmWithClassificationHead.cpython-312.pyc b/turing/modeling/models/__pycache__/miniLmWithClassificationHead.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d95768e056e5a2372b5ff2dc24236212578cbc8
Binary files /dev/null and b/turing/modeling/models/__pycache__/miniLmWithClassificationHead.cpython-312.pyc differ
diff --git a/turing/modeling/models/__pycache__/randomForestTfIdf.cpython-312.pyc b/turing/modeling/models/__pycache__/randomForestTfIdf.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0f0602f2610e640ed25a906433f59c392845613
Binary files /dev/null and b/turing/modeling/models/__pycache__/randomForestTfIdf.cpython-312.pyc differ
diff --git a/turing/modeling/models/codeBerta.py b/turing/modeling/models/codeBerta.py
new file mode 100644
index 0000000000000000000000000000000000000000..d593503288765f11635627da12c01ae8b35e9461
--- /dev/null
+++ b/turing/modeling/models/codeBerta.py
@@ -0,0 +1,463 @@
+import os
+import shutil
+import warnings
+
+from loguru import logger
+import mlflow
+import numpy as np
+from numpy import ndarray
+from sklearn.metrics import (
+    accuracy_score,
+    classification_report,
+    f1_score,
+    precision_score,
+    recall_score,
+)
+import torch
+from torch.utils.data import Dataset
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    EarlyStoppingCallback,
+    Trainer,
+    TrainingArguments,
+)
+
+from turing.config import MODELS_DIR
+
+from ..baseModel import BaseModel
+
+warnings.filterwarnings("ignore")
+
+
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    
+    # Sigmoid function to convert logits to probabilities
+    probs = 1 / (1 + np.exp(-predictions)) 
+    
+    # Apply threshold of 0.5 (becomes 1 if > 0.5, otherwise 0)
+    preds = (probs > 0.5).astype(int)
+    
+    # Calculate F1 score (macro average for multi-label)
+    f1 = f1_score(labels, preds, average='macro') 
+    precision = precision_score(labels, preds, average='macro', zero_division=0)
+    recall = recall_score(labels, preds, average='macro', zero_division=0)
+
+    return {
+        'f1': f1,
+        'precision': precision,
+        'recall': recall,
+    }
+
+
+
+class CodeBERTaDataset(Dataset):
+    """
+    Internal Dataset class for CodeBERTa.
+    """
+    
+    def __init__(self, encodings, labels=None, num_labels=None):
+        """
+        Initialize the InternalDataset.
+        Args:
+            encodings (dict): Tokenized encodings.
+            labels (list or np.ndarray, optional): Corresponding labels.
+            num_labels (int, optional): Total number of classes. Required for auto-converting indices to one-hot.
+        """
+
+        self.encodings = {key: torch.tensor(val) for key, val in encodings.items()}
+        
+        if labels is not None:
+            if not isinstance(labels, (np.ndarray, torch.Tensor)):
+                labels = np.array(labels)
+            
+            # Case A: labels are indices (integers)
+            if num_labels is not None and (len(labels.shape) == 1 or (len(labels.shape) == 2 and labels.shape[1] == 1)):
+                labels_flat = labels.flatten()
+                
+                # Create one-hot encoded matrix
+                one_hot = np.zeros((len(labels_flat), num_labels), dtype=np.float32)
+                
+                # Set the corresponding index to 1
+                valid_indices = labels_flat < num_labels
+                one_hot[valid_indices, labels_flat[valid_indices]] = 1.0
+                
+                self.labels = torch.tensor(one_hot, dtype=torch.float)
+
+            # Case B: labels are already vectors (e.g., One-Hot or Multi-Hot)
+            else:
+                self.labels = torch.tensor(labels, dtype=torch.float)
+        else:
+            self.labels = None
+
+
+    def __getitem__(self, idx):
+        """
+        Retrieve item at index idx.
+
+        Args:
+            idx (int): Index of the item to retrieve.
+
+        Returns:
+            dict: Dictionary containing input_ids, attention_mask, and labels (if available).
+        """
+
+        item = {key: val[idx] for key, val in self.encodings.items()}
+        if self.labels is not None:
+            item['labels'] = self.labels[idx]
+        return item
+
+
+    def __len__(self):
+        """
+        Return the length of the dataset.
+
+        Returns:
+            int: Length of the dataset.
+        """
+
+        return len(self.encodings['input_ids'])
+
+
+
+class CodeBERTa(BaseModel):
+    """
+    HuggingFace implementation of BaseModel for Code Comment Classification.
+    Uses CodeBERTa-small-v1 for efficient inference.
+    """
+
+    def __init__(self, language, path=None):
+        """
+        Initialize the CodeBERTa model with configuration parameters.
+
+        Args:
+            language (str): Language for the model.
+            path (str, optional): Path to load a pre-trained model. Defaults to None.
+        """
+        
+        self.params = {
+            "model_name_hf": "huggingface/CodeBERTa-small-v1",
+            "num_labels": 7 if language == "java" else 5 if language == "python" else 6,
+            "max_length": 128,
+            "epochs": 15,
+            "batch_size_train": 16,
+            "batch_size_eval": 64,
+            "learning_rate": 1e-5,
+            "weight_decay": 0.02,
+            "train_size": 0.8,
+            "early_stopping_patience": 3,
+            "early_stopping_threshold": 0.005
+        }
+        
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = None
+        
+        super().__init__(language, path)
+
+
+    def setup_model(self):
+        """
+        Initialize the CodeBERTa tokenizer and model.
+        """
+
+        logger.info(f"Initializing {self.params['model_name_hf']} on {self.device}...")
+        
+        self.tokenizer = AutoTokenizer.from_pretrained(self.params["model_name_hf"])
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            self.params["model_name_hf"], 
+            num_labels=self.params["num_labels"],
+            problem_type="multi_label_classification"
+        ).to(self.device)
+        logger.info("CodeBERTa model initialized.")
+
+
+    def _tokenize(self, texts):
+        """
+        Helper to tokenize list of texts efficiently.
+
+        Args:
+            texts (list): List of text strings to tokenize.
+
+        Returns:
+            dict: Tokenized encodings.
+        """
+        
+        safe_texts = []
+        for t in texts:
+            if t is None:
+                safe_texts.append("")
+            elif isinstance(t, (int, float)):
+                if t != t: # NaN check
+                    safe_texts.append("")
+                else:
+                    safe_texts.append(str(t))
+            else:
+                safe_texts.append(str(t))
+
+        return self.tokenizer(
+            safe_texts, 
+            truncation=True, 
+            padding=True, 
+            max_length=self.params["max_length"]
+        )
+
+
+    def train(self, X_train, y_train) -> dict[str,any]:
+        """
+        Train the model using HF Trainer and log to MLflow.
+
+        Args:
+            X_train (list): Training input texts.
+            y_train (list or np.ndarray): Training labels.
+
+        Returns:
+            dict[str, any]: Dictionary of parameters used for training.
+        """
+
+        if self.model is None:
+            raise ValueError("Model is not initialized. Call setup_model() before training.")
+
+        # log parameters to MLflow without model_name_hf
+        params_to_log = {k: v for k, v in self.params.items() if k != "model_name_hf" and k != "num_labels"}
+
+        logger.info(f"Starting training for: {self.language.upper()}")
+        
+        # Prepare dataset (train/val split)
+        train_encodings = self._tokenize(X_train)
+        full_dataset = CodeBERTaDataset(train_encodings, y_train, num_labels=self.params["num_labels"])
+        train_size = int(self.params["train_size"] * len(full_dataset))
+        val_size = len(full_dataset) - train_size
+        train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
+
+        temp_ckpt_dir = os.path.join(MODELS_DIR, "temp_checkpoints")
+        
+        use_fp16 = torch.cuda.is_available()
+        if not use_fp16:
+            logger.info("Mixed Precision (fp16) disabled because CUDA is not available.")
+
+        training_args = TrainingArguments(
+            output_dir=temp_ckpt_dir,
+            num_train_epochs=self.params["epochs"],
+            per_device_train_batch_size=self.params["batch_size_train"],
+            per_device_eval_batch_size=self.params["batch_size_eval"],
+            learning_rate=self.params["learning_rate"],
+            weight_decay=self.params["weight_decay"],
+            eval_strategy="epoch",
+            save_strategy="epoch",
+            load_best_model_at_end=True,
+            metric_for_best_model="f1",
+            greater_is_better=True,
+            save_total_limit=2,
+            logging_dir='./logs',
+            logging_steps=50,
+            fp16=use_fp16,
+            optim="adamw_torch",
+            report_to="none",
+            no_cuda=not torch.cuda.is_available() 
+        )
+
+        trainer = Trainer(
+            model=self.model,
+            args=training_args,
+            train_dataset=train_dataset,
+            eval_dataset=val_dataset,
+            compute_metrics=compute_metrics,
+            callbacks=[EarlyStoppingCallback(early_stopping_patience=self.params["early_stopping_patience"], early_stopping_threshold=self.params["early_stopping_threshold"])]
+        )
+        trainer.train()
+        logger.info(f"Training for {self.language.upper()} completed.")
+        
+        if os.path.exists(temp_ckpt_dir):
+            shutil.rmtree(temp_ckpt_dir)
+
+        return params_to_log
+    
+    
+    def evaluate(self, X_test, y_test) -> dict[str,any]:
+        """
+        Evaluate model on test data, return metrics and log to MLflow.
+        Handles automatic conversion of y_test to match multi-label prediction shape.
+
+        Args:
+            X_test (list): Input test data.
+            y_test (list or np.ndarray): True labels for test data.
+
+        Returns:
+            dict[str, any]: Dictionary of evaluation metrics.
+        """
+        
+        # Obtain predictions
+        y_pred = self.predict(X_test)
+
+        # Convert y_test to numpy array if needed
+        if not isinstance(y_test, (np.ndarray, torch.Tensor)):
+            y_test_np = np.array(y_test)
+        elif isinstance(y_test, torch.Tensor):
+            y_test_np = y_test.cpu().numpy()
+        else:
+            y_test_np = y_test
+
+        num_labels = self.params["num_labels"]
+        is_multilabel_pred = (y_pred.ndim == 2 and y_pred.shape[1] > 1)
+        is_flat_truth = (y_test_np.ndim == 1) or (y_test_np.ndim == 2 and y_test_np.shape[1] == 1)
+
+        if is_multilabel_pred and is_flat_truth:
+            # Create a zero matrix
+            y_test_expanded = np.zeros((y_test_np.shape[0], num_labels), dtype=int)
+            
+            # Flatten y_test for iteration
+            indices = y_test_np.flatten()
+            
+            # Use indices to set the correct column to 1
+            for i, label_idx in enumerate(indices):
+                idx = int(label_idx)
+                if 0 <= idx < num_labels:
+                    y_test_expanded[i, idx] = 1
+            
+            y_test_np = y_test_expanded
+
+        # Generate classification report
+        report = classification_report(y_test_np, y_pred, zero_division=0)
+        print("\n" + "=" * 50)
+        print("CLASSIFICATION REPORT")
+        print(report)
+        print("=" * 50 + "\n")
+
+        metrics = {
+            "accuracy": accuracy_score(y_test_np, y_pred),
+            "precision": precision_score(y_test_np, y_pred, average="macro", zero_division=0),
+            "recall": recall_score(y_test_np, y_pred, average="macro", zero_division=0),
+            "f1_score": f1_score(y_test_np, y_pred, average="macro"),
+        }
+
+        mlflow.log_metrics(metrics)
+
+        logger.info(
+            f"Evaluation completed — Accuracy: {metrics['accuracy']:.3f}, F1: {metrics['f1_score']:.3f}"
+        )
+        return metrics
+
+
+    def predict(self, X) -> ndarray:
+        """
+        Make predictions for Multi-Label classification.
+        Returns Binary Matrix (Multi-Hot) where multiple classes can be 1.
+
+        Args:
+            X (list): Input texts for prediction.
+
+        Returns:
+            np.ndarray: Multi-Hot Encoded predictions (e.g., [[0, 1, 1, 0], ...])
+        """
+
+        if self.model is None:
+            raise ValueError("Model is not trained. Call train() or load() before prediction.")
+
+        # Set model to evaluation mode
+        self.model.eval()
+        
+        encodings = self._tokenize(X)
+        # Pass None as labels because we are in inference
+        dataset = CodeBERTaDataset(encodings, labels=None)
+        
+        use_fp16 = torch.cuda.is_available()
+        
+        training_args = TrainingArguments(
+            output_dir="./pred_temp", 
+            per_device_eval_batch_size=self.params["batch_size_eval"],
+            fp16=use_fp16,
+            report_to="none",
+            no_cuda=not torch.cuda.is_available()
+        )
+        
+        trainer = Trainer(model=self.model, args=training_args)
+        output = trainer.predict(dataset)
+        
+        # Clean up temporary prediction directory
+        if os.path.exists("./pred_temp"):
+            shutil.rmtree("./pred_temp")
+        
+        # Convert logits to probabilities
+        logits = output.predictions
+        probs = 1 / (1 + np.exp(-logits))
+        
+        # Apply a threshold of 0.5 (if prob > 0.5, predict 1 else 0)
+        preds_binary = (probs > 0.5).astype(int)
+        
+        return preds_binary
+    
+
+    def save(self, path, model_name):
+        """
+        Save model locally and log to MLflow as artifact.
+
+        Args:
+            path (str): Directory path to save the model.
+            model_name (str): Name for the saved model.
+        """
+
+        if self.model is None:
+            raise ValueError("Model is not trained. Cannot save uninitialized model.")
+
+        # Local Saving
+        complete_path = os.path.join(path, f"{model_name}_{self.language}")
+        
+        # Remove existing directory if it exists
+        if os.path.exists(complete_path) and os.path.isdir(complete_path):
+            shutil.rmtree(complete_path)
+        
+        # Save model and tokenizer
+        logger.info(f"Saving model to: {complete_path}")
+        self.model.save_pretrained(complete_path)
+        self.tokenizer.save_pretrained(complete_path)
+        logger.info("Model saved locally.")
+
+        try:
+            # Log to MLflow
+            logger.info("Logging artifacts to MLflow...")
+            mlflow.log_artifacts(local_dir=complete_path, artifact_path=f"{model_name}_{self.language}")
+        except Exception as e:
+            logger.error(f"Failed to log model artifacts to MLflow: {e}")
+
+
+    def load(self, model_path):
+        """
+        Load model from a local path OR an MLflow URI.
+
+        Args:
+            model_path (str): Local path or MLflow URI to load the model from.
+        """
+
+        logger.info(f"Loading model from: {model_path}")
+        local_model_path = model_path
+
+        # Downloading model from MLflow and saving to local path
+        if model_path.startswith("models:/") or model_path.startswith("runs:/"):
+            try:
+                logger.info("Detected MLflow model URI. Attempting to load from MLflow...")
+                local_model_path = os.path.join(MODELS_DIR, "mlflow_temp_models")
+                local_model_path = mlflow.artifacts.download_artifacts(artifact_uri=model_path, dst_path=local_model_path)
+                logger.info(f"Model downloaded from MLflow to: {local_model_path}")
+            except Exception as e:
+                logger.error(f"Failed to load from MLflow: {e}")
+                raise e
+
+        # Loading from local path
+        try:
+            if not os.path.exists(local_model_path):
+                raise FileNotFoundError(f"Model path not found: {local_model_path}")
+
+            # Load tokenizer and model from local path
+            self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
+            self.model = AutoModelForSequenceClassification.from_pretrained(
+                local_model_path
+            ).to(self.device)
+            logger.info("Model loaded from local path successfully.")
+
+        except Exception as e:
+            logger.error(f"Failed to load model from local path: {e}")
+            raise e
+
+        # Set model to evaluation mode
+        self.model.eval()
\ No newline at end of file
diff --git a/turing/modeling/models/graphCodeBert.py b/turing/modeling/models/graphCodeBert.py
new file mode 100644
index 0000000000000000000000000000000000000000..83ec9d1042c2c2dd9e7835e0576f3c09d051e61b
--- /dev/null
+++ b/turing/modeling/models/graphCodeBert.py
@@ -0,0 +1,469 @@
+import os
+import shutil
+import warnings
+
+from loguru import logger
+import mlflow
+import numpy as np
+from numpy import ndarray
+from sklearn.metrics import (
+    accuracy_score,
+    classification_report,
+    f1_score,
+    precision_score,
+    recall_score,
+)
+import torch
+from torch.utils.data import Dataset
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    EarlyStoppingCallback,
+    Trainer,
+    TrainingArguments,
+)
+
+from turing.config import MODELS_DIR
+
+from ..baseModel import BaseModel
+
+warnings.filterwarnings("ignore")
+
+
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+
+    # Sigmoid function to convert logits to probabilities
+    probs = 1 / (1 + np.exp(-predictions))
+
+    # Apply threshold of 0.5 (becomes 1 if > 0.5, otherwise 0)
+    preds = (probs > 0.5).astype(int)
+
+    # Calculate F1 score (macro average for multi-label)
+    f1 = f1_score(labels, preds, average="macro")
+    precision = precision_score(labels, preds, average="macro", zero_division=0)
+    recall = recall_score(labels, preds, average="macro", zero_division=0)
+
+    return {
+        "f1": f1,
+        "precision": precision,
+        "recall": recall,
+    }
+
+
+class GraphCodeBERTDataset(Dataset):
+    """
+    Internal Dataset class for GraphCodeBERT.
+    """
+
+    def __init__(self, encodings, labels=None, num_labels=None):
+        """
+        Initialize the InternalDataset.
+        Args:
+            encodings (dict): Tokenized encodings.
+            labels (list or np.ndarray, optional): Corresponding labels.
+            num_labels (int, optional): Total number of classes. Required for auto-converting indices to one-hot.
+        """
+
+        self.encodings = {key: torch.tensor(val) for key, val in encodings.items()}
+
+        if labels is not None:
+            if not isinstance(labels, (np.ndarray, torch.Tensor)):
+                labels = np.array(labels)
+
+            # Case A: labels are indices (integers)
+            if num_labels is not None and (
+                len(labels.shape) == 1 or (len(labels.shape) == 2 and labels.shape[1] == 1)
+            ):
+                labels_flat = labels.flatten()
+
+                # Create one-hot encoded matrix
+                one_hot = np.zeros((len(labels_flat), num_labels), dtype=np.float32)
+
+                # Set the corresponding index to 1
+                valid_indices = labels_flat < num_labels
+                one_hot[valid_indices, labels_flat[valid_indices]] = 1.0
+
+                self.labels = torch.tensor(one_hot, dtype=torch.float)
+
+            # Case B: labels are already vectors (e.g., One-Hot or Multi-Hot)
+            else:
+                self.labels = torch.tensor(labels, dtype=torch.float)
+        else:
+            self.labels = None
+
+    def __getitem__(self, idx):
+        """
+        Retrieve item at index idx.
+
+        Args:
+            idx (int): Index of the item to retrieve.
+
+        Returns:
+            dict: Dictionary containing input_ids, attention_mask, and labels (if available).
+        """
+
+        item = {key: val[idx] for key, val in self.encodings.items()}
+        if self.labels is not None:
+            item["labels"] = self.labels[idx]
+        return item
+
+    def __len__(self):
+        """
+        Return the length of the dataset.
+
+        Returns:
+            int: Length of the dataset.
+        """
+
+        return len(self.encodings["input_ids"])
+
+
+class GraphCodeBERTClassifier(BaseModel):
+    """
+    HuggingFace implementation of BaseModel for Code Comment Classification.
+    Uses GraphCodeBERT (microsoft/graphcodebert-base) for code understanding via data flow graphs.
+    """
+
+    def __init__(self, language, path=None):
+        """
+        Initialize the GraphCodeBERT model with configuration parameters.
+
+        Args:
+            language (str): Language for the model.
+            path (str, optional): Path to load a pre-trained model. Defaults to None.
+        """
+
+        self.params = {
+            "model_name_hf": "microsoft/graphcodebert-base",
+            "num_labels": 7 if language == "java" else 5 if language == "python" else 6,
+            "max_length": 256,
+            "epochs": 15,
+            "batch_size_train": 16,
+            "batch_size_eval": 64,
+            "learning_rate": 2e-5,
+            "weight_decay": 0.01,
+            "train_size": 0.8,
+            "early_stopping_patience": 3,
+            "early_stopping_threshold": 0.0,
+            "warmup_steps": 500,
+            "seed": 42,
+        }
+
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = None
+
+        super().__init__(language, path)
+
+    def setup_model(self):
+        """
+        Initialize the GraphCodeBERT tokenizer and model.
+        """
+
+        logger.info(f"Initializing {self.params['model_name_hf']} on {self.device}...")
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.params["model_name_hf"])
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            self.params["model_name_hf"],
+            num_labels=self.params["num_labels"],
+            problem_type="multi_label_classification",
+            use_safetensors=True,  # Force use of safetensors for security
+        ).to(self.device)
+        logger.info("GraphCodeBERT model initialized.")
+
+    def _tokenize(self, texts):
+        """
+        Helper to tokenize list of texts efficiently.
+
+        Args:
+            texts (list): List of text strings to tokenize.
+
+        Returns:
+            dict: Tokenized encodings.
+        """
+
+        safe_texts = []
+        for t in texts:
+            if t is None:
+                safe_texts.append("")
+            elif isinstance(t, (int, float)):
+                if t != t:  # NaN check
+                    safe_texts.append("")
+                else:
+                    safe_texts.append(str(t))
+            else:
+                safe_texts.append(str(t))
+
+        return self.tokenizer(
+            safe_texts, truncation=True, padding=True, max_length=self.params["max_length"]
+        )
+
+    def train(self, X_train, y_train) -> dict[str, any]:
+        """
+        Train the model using HF Trainer and log to MLflow.
+
+        Args:
+            X_train (list): Training input texts.
+            y_train (list or np.ndarray): Training labels.
+
+        Returns:
+            dict[str, any]: Dictionary of parameters used for training.
+        """
+
+        if self.model is None:
+            raise ValueError("Model is not initialized. Call setup_model() before training.")
+
+        # log parameters to MLflow without model_name_hf
+        params_to_log = {
+            k: v for k, v in self.params.items() if k != "model_name_hf" and k != "num_labels"
+        }
+
+        logger.info(f"Starting training for: {self.language.upper()}")
+
+        # Prepare dataset (train/val split)
+        train_encodings = self._tokenize(X_train)
+        full_dataset = GraphCodeBERTDataset(
+            train_encodings, y_train, num_labels=self.params["num_labels"]
+        )
+        train_size = int(self.params["train_size"] * len(full_dataset))
+        val_size = len(full_dataset) - train_size
+        train_dataset, val_dataset = torch.utils.data.random_split(
+            full_dataset, [train_size, val_size]
+        )
+
+        temp_ckpt_dir = os.path.join(MODELS_DIR, "temp_checkpoints")
+
+        use_fp16 = torch.cuda.is_available()
+        if not use_fp16:
+            logger.info("Mixed Precision (fp16) disabled because CUDA is not available.")
+
+        training_args = TrainingArguments(
+            output_dir=temp_ckpt_dir,
+            num_train_epochs=self.params["epochs"],
+            per_device_train_batch_size=self.params["batch_size_train"],
+            per_device_eval_batch_size=self.params["batch_size_eval"],
+            learning_rate=self.params["learning_rate"],
+            weight_decay=self.params["weight_decay"],
+            eval_strategy="epoch",
+            save_strategy="epoch",
+            load_best_model_at_end=True,
+            metric_for_best_model="f1",
+            greater_is_better=True,
+            save_total_limit=2,
+            logging_dir="./logs",
+            logging_steps=50,
+            fp16=use_fp16,
+            optim="adamw_torch",
+            report_to="none",
+            no_cuda=not torch.cuda.is_available(),
+        )
+
+        trainer = Trainer(
+            model=self.model,
+            args=training_args,
+            train_dataset=train_dataset,
+            eval_dataset=val_dataset,
+            compute_metrics=compute_metrics,
+            callbacks=[
+                EarlyStoppingCallback(
+                    early_stopping_patience=self.params["early_stopping_patience"],
+                    early_stopping_threshold=self.params["early_stopping_threshold"],
+                )
+            ],
+        )
+        trainer.train()
+        logger.info(f"Training for {self.language.upper()} completed.")
+
+        if os.path.exists(temp_ckpt_dir):
+            shutil.rmtree(temp_ckpt_dir)
+
+        return params_to_log
+
+    def evaluate(self, X_test, y_test) -> dict[str, any]:
+        """
+        Evaluate model on test data, return metrics and log to MLflow.
+        Handles automatic conversion of y_test to match multi-label prediction shape.
+
+        Args:
+            X_test (list): Input test data.
+            y_test (list or np.ndarray): True labels for test data.
+
+        Returns:
+            dict[str, any]: Dictionary of evaluation metrics.
+        """
+
+        # Obtain predictions
+        y_pred = self.predict(X_test)
+
+        # Convert y_test to numpy array if needed
+        if not isinstance(y_test, (np.ndarray, torch.Tensor)):
+            y_test_np = np.array(y_test)
+        elif isinstance(y_test, torch.Tensor):
+            y_test_np = y_test.cpu().numpy()
+        else:
+            y_test_np = y_test
+
+        num_labels = self.params["num_labels"]
+        is_multilabel_pred = y_pred.ndim == 2 and y_pred.shape[1] > 1
+        is_flat_truth = (y_test_np.ndim == 1) or (y_test_np.ndim == 2 and y_test_np.shape[1] == 1)
+
+        if is_multilabel_pred and is_flat_truth:
+            # Create a zero matrix
+            y_test_expanded = np.zeros((y_test_np.shape[0], num_labels), dtype=int)
+
+            # Flatten y_test for iteration
+            indices = y_test_np.flatten()
+
+            # Use indices to set the correct column to 1
+            for i, label_idx in enumerate(indices):
+                idx = int(label_idx)
+                if 0 <= idx < num_labels:
+                    y_test_expanded[i, idx] = 1
+
+            y_test_np = y_test_expanded
+
+        # Generate classification report
+        report = classification_report(y_test_np, y_pred, zero_division=0)
+        print("\n" + "=" * 50)
+        print("CLASSIFICATION REPORT")
+        print(report)
+        print("=" * 50 + "\n")
+
+        metrics = {
+            "accuracy": accuracy_score(y_test_np, y_pred),
+            "precision": precision_score(y_test_np, y_pred, average="macro", zero_division=0),
+            "recall": recall_score(y_test_np, y_pred, average="macro", zero_division=0),
+            "f1_score": f1_score(y_test_np, y_pred, average="macro", zero_division=0),
+        }
+
+        mlflow.log_metrics(metrics)
+
+        logger.info(
+            f"Evaluation completed — Accuracy: {metrics['accuracy']:.3f}, F1: {metrics['f1_score']:.3f}"
+        )
+        return metrics
+
+    def predict(self, X) -> ndarray:
+        """
+        Make predictions for Multi-Label classification.
+        Returns Binary Matrix (Multi-Hot) where multiple classes can be 1.
+
+        Args:
+            X (list): Input texts for prediction.
+
+        Returns:
+            np.ndarray: Multi-Hot Encoded predictions (e.g., [[0, 1, 1, 0], ...])
+        """
+
+        if self.model is None:
+            raise ValueError("Model is not trained. Call train() or load() before prediction.")
+
+        # Set model to evaluation mode
+        self.model.eval()
+
+        encodings = self._tokenize(X)
+        # Pass None as labels because we are in inference
+        dataset = GraphCodeBERTDataset(encodings, labels=None)
+
+        use_fp16 = torch.cuda.is_available()
+
+        training_args = TrainingArguments(
+            output_dir="./pred_temp",
+            per_device_eval_batch_size=self.params["batch_size_eval"],
+            fp16=use_fp16,
+            report_to="none",
+            no_cuda=not torch.cuda.is_available(),
+        )
+
+        trainer = Trainer(model=self.model, args=training_args)
+        output = trainer.predict(dataset)
+
+        # Clean up temporary prediction directory
+        if os.path.exists("./pred_temp"):
+            shutil.rmtree("./pred_temp")
+
+        # Convert logits to probabilities
+        logits = output.predictions
+        probs = 1 / (1 + np.exp(-logits))
+
+        # Apply a threshold of 0.5 (if prob > 0.5, predict 1 else 0)
+        preds_binary = (probs > 0.5).astype(int)
+
+        return preds_binary
+
+    def save(self, path, model_name):
+        """
+        Save model locally and log to MLflow as artifact.
+
+        Args:
+            path (str): Directory path to save the model.
+            model_name (str): Name for the saved model.
+        """
+
+        if self.model is None:
+            raise ValueError("Model is not trained. Cannot save uninitialized model.")
+
+        # Local Saving
+        complete_path = os.path.join(path, f"{model_name}_{self.language}")
+
+        # Remove existing directory if it exists
+        if os.path.exists(complete_path) and os.path.isdir(complete_path):
+            shutil.rmtree(complete_path)
+
+        # Save model and tokenizer
+        logger.info(f"Saving model to: {complete_path}")
+        self.model.save_pretrained(complete_path)
+        self.tokenizer.save_pretrained(complete_path)
+        logger.info("Model saved locally.")
+
+        try:
+            # Log to MLflow
+            logger.info("Logging artifacts to MLflow...")
+            mlflow.log_artifacts(
+                local_dir=complete_path, artifact_path=f"{model_name}_{self.language}"
+            )
+        except Exception as e:
+            logger.error(f"Failed to log model artifacts to MLflow: {e}")
+
+    def load(self, model_path):
+        """
+        Load model from a local path OR an MLflow URI.
+
+        Args:
+            model_path (str): Local path or MLflow URI to load the model from.
+        """
+
+        logger.info(f"Loading model from: {model_path}")
+        local_model_path = model_path
+
+        # Downloading model from MLflow and saving to local path
+        if model_path.startswith("models:/") or model_path.startswith("runs:/"):
+            try:
+                logger.info("Detected MLflow model URI. Attempting to load from MLflow...")
+                local_model_path = os.path.join(MODELS_DIR, "mlflow_temp_models")
+                local_model_path = mlflow.artifacts.download_artifacts(
+                    artifact_uri=model_path, dst_path=local_model_path
+                )
+                logger.info(f"Model downloaded from MLflow to: {local_model_path}")
+            except Exception as e:
+                logger.error(f"Failed to load from MLflow: {e}")
+                raise e
+
+        # Loading from local path
+        try:
+            if not os.path.exists(local_model_path):
+                raise FileNotFoundError(f"Model path not found: {local_model_path}")
+
+            # Load tokenizer and model from local path
+            self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
+            self.model = AutoModelForSequenceClassification.from_pretrained(local_model_path).to(
+                self.device
+            )
+            logger.info("Model loaded from local path successfully.")
+
+        except Exception as e:
+            logger.error(f"Failed to load model from local path: {e}")
+            raise e
+
+        # Set model to evaluation mode
+        self.model.eval()
diff --git a/turing/modeling/models/randomForestTfIdf.py b/turing/modeling/models/randomForestTfIdf.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e785f7e0c24f4bf7e01e292d06d0a47c59d92f6
--- /dev/null
+++ b/turing/modeling/models/randomForestTfIdf.py
@@ -0,0 +1,153 @@
+import warnings
+
+from loguru import logger
+from numpy import ndarray
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics import (
+    accuracy_score,
+    classification_report,
+    f1_score,
+    precision_score,
+    recall_score,
+)
+from sklearn.model_selection import GridSearchCV
+from sklearn.multioutput import MultiOutputClassifier
+from sklearn.pipeline import Pipeline
+
+from ..baseModel import BaseModel
+
+warnings.filterwarnings("ignore")
+
+
+class RandomForestTfIdf(BaseModel):
+    """
+    Sklearn implementation of BaseModel with integrated Grid Search.
+    Builds a TF-IDF + RandomForest pipeline for multi-output text classification.
+    """
+
+    def __init__(self, language, path=None):
+        """
+        Initialize the RandomForestTfIdf model with configuration parameters.
+
+        Args:
+            language (str): Language for the model.
+            path (str, optional): Path to load a pre-trained model. Defaults to None.
+                                    If None, a new model is initialized.
+        """
+
+        self.params = {"stop_words": "english", "random_state": 42, "cv_folds": 5}
+
+        self.grid_params = {
+            "clf__estimator__n_estimators": [50, 100, 200],
+            "clf__estimator__max_depth": [None, 10, 20],
+            "tfidf__max_features": [3000, 5000, 8000],
+        }
+
+        super().__init__(language, path)
+
+    def setup_model(self):
+        """
+        Initialize the scikit-learn pipeline with TF-IDF vectorizer and RandomForest classifier.
+        """
+
+        base_estimator = RandomForestClassifier(
+            random_state=self.params["random_state"], n_jobs=-1
+        )
+
+        self.pipeline = Pipeline(
+            [
+                (
+                    "tfidf",
+                    TfidfVectorizer(ngram_range=(1, 2), stop_words=self.params["stop_words"]),
+                ),
+                ("clf", MultiOutputClassifier(base_estimator, n_jobs=-1)),
+            ]
+        )
+
+        self.model = self.pipeline
+        logger.info("Scikit-learn pipeline initialized.")
+
+    def train(self, X_train, y_train) -> dict[str, any]:
+        """
+        Train the model using Grid Search to find the best hyperparameters.
+
+        Args:
+            X_train: Input training data.
+            y_train: True labels for training data.
+        """
+
+        if self.model is None:
+            raise ValueError(
+                "Model pipeline is not initialized. Call setup_model() before training."
+            )
+
+        logger.info(f"Starting training for: {self.language.upper()}")
+        logger.info("Performing Grid Search for best hyperparameters...")
+        grid_search = GridSearchCV(
+            self.pipeline,
+            param_grid=self.grid_params,
+            cv=self.params["cv_folds"],
+            scoring="f1_weighted",
+            n_jobs=-1,
+            verbose=1,
+        )
+        grid_search.fit(X_train, y_train)
+
+        logger.success(f"Best params found: {grid_search.best_params_}")
+
+        parameters_to_log = {
+            "max_features": grid_search.best_params_["tfidf__max_features"],
+            "n_estimators": grid_search.best_params_["clf__estimator__n_estimators"],
+            "max_depth": grid_search.best_params_["clf__estimator__max_depth"],
+        }
+
+        self.model = grid_search.best_estimator_
+        logger.success(f"Training for {self.language.upper()} completed.")
+
+        return parameters_to_log
+
+    def evaluate(self, X_test, y_test) -> dict[str, any]:
+        """
+        Evaluate model on test data and return metrics.
+
+        Args:
+            X_test: Input test data.
+            y_test: True labels for test data.
+        """
+
+        y_pred = self.predict(X_test)
+
+        report = classification_report(y_test, y_pred, zero_division=0)
+        print("\n" + "=" * 50)
+        print("CLASSIFICATION REPORT")
+        print(report)
+        print("=" * 50 + "\n")
+
+        metrics = {
+            "accuracy": accuracy_score(y_test, y_pred),
+            "precision": precision_score(y_test, y_pred, average="macro", zero_division=0),
+            "recall": recall_score(y_test, y_pred, average="macro", zero_division=0),
+            "f1_score": f1_score(y_test, y_pred, average="weighted"),
+        }
+
+        logger.info(
+            f"Evaluation completed — Accuracy: {metrics['accuracy']:.3f}, F1: {metrics['f1_score']:.3f}"
+        )
+        return metrics
+
+    def predict(self, X) -> ndarray:
+        """
+        Make predictions using the trained model.
+
+        Args:
+            X: Input data for prediction.
+
+        Returns:
+            Predictions made by the model.
+        """
+
+        if self.model is None:
+            raise ValueError("Model is not trained. Call train() or load() before prediction.")
+
+        return self.model.predict(X)
diff --git a/turing/modeling/models/tinyBert.py b/turing/modeling/models/tinyBert.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d76eb4dddf2e767bae4a5cced7c97328580b6da
--- /dev/null
+++ b/turing/modeling/models/tinyBert.py
@@ -0,0 +1,441 @@
+"""
+Ultra-lightweight multi-label text classification model for code comment analysis.
+
+This module implements a specialized neural architecture combining TinyBERT
+(15MB, 96 layers compressed) with a custom multi-label classification head.
+Designed for efficient inference on resource-constrained environments while
+maintaining competitive performance on code comment classification tasks.
+
+Architecture:
+    - Encoder: TinyBERT (prajjwal1/bert-tiny)
+    - Hidden dimension: 312
+    - Classification layers: 312 -> 128 (ReLU) -> num_labels (Sigmoid)
+    - Regularization: Dropout(0.2) for preventing overfitting
+    - Loss function: Binary Cross-Entropy for multi-label classification
+
+Performance characteristics:
+    - Model size: ~15MB
+    - Inference latency: ~50ms per sample
+    - Memory footprint: ~200MB during training
+    - Supports multi-label outputs via sigmoid activation
+"""
+
+from typing import List
+
+from loguru import logger
+import numpy as np
+from sklearn.preprocessing import MultiLabelBinarizer
+import torch
+from torch import nn
+from torch.optim import Adam
+
+import turing.config as config
+from turing.modeling.baseModel import BaseModel
+
+try:
+    from transformers import AutoModel, AutoTokenizer
+except ImportError:
+    logger.error("transformers library required. Install with: pip install transformers torch")
+
+
+class TinyBERTClassifier(BaseModel):
+    """
+    Ultra-lightweight multi-label classifier for code comment analysis.
+
+    Combines TinyBERT encoder with a custom classification head optimized for
+    multi-label code comment classification across Java, Python, and Pharo.
+
+    Attributes:
+        device (torch.device): Computation device (CPU/GPU).
+        model (nn.ModuleDict): Container for encoder and classifier components.
+        tokenizer (AutoTokenizer): Hugging Face tokenizer for text preprocessing.
+        classifier (nn.Sequential): Custom multi-label classification head.
+        num_labels (int): Number of output classes per language.
+        labels_map (list): Mapping of label indices to semantic categories.
+
+    References:
+        TinyBERT: https://huggingface.co/prajjwal1/bert-tiny
+    """
+
+    def __init__(self, language: str, path: str = None):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"TinyBERT using device: {self.device}")
+        self.model = None
+        self.tokenizer = None
+        self.classifier = None
+        self.mlb = MultiLabelBinarizer()
+        self.labels_map = config.LABELS_MAP.get(language, [])
+        self.num_labels = len(self.labels_map)
+        self.params = {
+            "model": "TinyBERT",
+            "model_size": "15MB",
+            "epochs": 15,
+            "batch_size": 8,
+            "learning_rate": 1e-3,
+        }
+        super().__init__(language=language, path=path)
+
+    def setup_model(self):
+        """
+        Initialize TinyBERT encoder and custom classification head.
+
+        Loads the pre-trained TinyBERT model from Hugging Face model hub and
+        constructs a custom multi-label classification head with:
+        - Input: 312-dimensional encoder embeddings [CLS] token
+        - Hidden layer: 128 units with ReLU activation
+        - Dropout: 0.2 for regularization
+        - Output: num_labels units with Sigmoid activation
+
+        Raises:
+            Exception: If model initialization fails due to network or missing dependencies.
+        """
+        self._initialize_model()
+
+    def _initialize_model(self):
+        """
+        Initialize TinyBERT encoder and custom classification head.
+
+        Loads the pre-trained TinyBERT model from Hugging Face model hub and
+        constructs a custom multi-label classification head with:
+        - Input: 312-dimensional encoder embeddings [CLS] token
+        - Hidden layer: 128 units with ReLU activation
+        - Dropout: 0.2 for regularization
+        - Output: num_labels units with Sigmoid activation
+
+        Raises:
+            Exception: If model initialization fails due to network or missing dependencies.
+        """
+        try:
+            model_name = "prajjwal1/bert-tiny"
+
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            encoder = AutoModel.from_pretrained(model_name)
+            encoder.to(self.device)
+
+            hidden_dim = encoder.config.hidden_size
+
+            self.classifier = nn.Sequential(
+                nn.Linear(hidden_dim, 128),
+                nn.ReLU(),
+                nn.Dropout(0.2),
+                nn.Linear(128, self.num_labels),
+                nn.Sigmoid(),
+            ).to(self.device)
+
+            self.model = nn.ModuleDict({"encoder": encoder, "classifier": self.classifier})
+
+            logger.success(f"Initialized TinyBERTClassifier for {self.language}")
+            logger.info(f"Model size: ~15MB | Labels: {self.num_labels}")
+
+        except Exception as e:
+            logger.error(f"Error initializing model: {e}")
+            raise
+
+    def train(
+        self,
+        X_train: List[str],
+        y_train: np.ndarray,
+        path: str = None,
+        model_name: str = "tinybert_classifier",
+        epochs: int = 15,
+        batch_size: int = 8,
+        learning_rate: float = 1e-3,
+    ) -> dict:
+        """
+        Train the classifier using binary cross-entropy loss.
+
+        Implements gradient descent optimization with adaptive learning rate scheduling.
+        Supports checkpoint saving for model persistence and recovery.
+
+        Args:
+            X_train (List[str]): Training text samples (code comments).
+            y_train (np.ndarray): Binary label matrix of shape (n_samples, n_labels).
+            path (str, optional): Directory path for model checkpoint saving.
+            model_name (str): Identifier for saved model artifacts.
+            epochs (int): Number of complete training iterations. Default: 3.
+            batch_size (int): Number of samples per gradient update. Default: 16.
+            learning_rate (float): Adam optimizer learning rate. Default: 2e-5.
+
+        Returns:
+            dict: Training configuration including hyperparameters and model metadata.
+
+        Raises:
+            Exception: If training fails due to data inconsistency or resource exhaustion.
+        """
+        try:
+            if self.model is None:
+                self._initialize_model()
+
+            optimizer = Adam(self.classifier.parameters(), lr=learning_rate)
+            criterion = nn.BCELoss()
+
+            num_samples = len(X_train)
+            num_batches = (num_samples + batch_size - 1) // batch_size
+
+            logger.info(f"Starting training: {epochs} epochs, {num_batches} batches per epoch")
+
+            for epoch in range(epochs):
+                total_loss = 0.0
+
+                for batch_idx in range(num_batches):
+                    start_idx = batch_idx * batch_size
+                    end_idx = min(start_idx + batch_size, num_samples)
+
+                    batch_texts = X_train[start_idx:end_idx]
+                    batch_labels = y_train[start_idx:end_idx]
+
+                    optimizer.zero_grad()
+
+                    tokens = self.tokenizer(
+                        batch_texts,
+                        padding=True,
+                        truncation=True,
+                        max_length=128,
+                        return_tensors="pt",
+                    ).to(self.device)
+
+                    with torch.no_grad():
+                        encoder_output = self.model["encoder"](**tokens)
+                        cls_token = encoder_output.last_hidden_state[:, 0, :]
+
+                    logits = self.classifier(cls_token)
+
+                    labels_tensor = torch.tensor(batch_labels, dtype=torch.float32).to(self.device)
+                    loss = criterion(logits, labels_tensor)
+
+                    loss.backward()
+                    optimizer.step()
+
+                    total_loss += loss.item()
+
+                avg_loss = total_loss / num_batches
+                logger.info(f"Epoch {epoch + 1}/{epochs} - Loss: {avg_loss:.4f}")
+
+            logger.success(f"Training completed for {self.language}")
+
+            if path:
+                self.save(path, model_name)
+
+            return {
+                "epochs": epochs,
+                "batch_size": batch_size,
+                "learning_rate": learning_rate,
+                "model_size_mb": 15,
+            }
+
+        except Exception as e:
+            logger.error(f"Error training model: {e}")
+            raise
+
+    def predict(self, texts: List[str], threshold: float = 0.3) -> np.ndarray:
+        """
+        Generate multi-label predictions for code comments.
+
+        Performs inference in evaluation mode without gradient computation.
+        Applies probability threshold to convert sigmoid outputs to binary labels.
+
+        Args:
+            texts (List[str]): Code comment samples for classification.
+            threshold (float): Decision boundary for label assignment. Default: 0.5.
+                Values below threshold are mapped to 0, above to 1.
+
+        Returns:
+            np.ndarray: Binary predictions matrix of shape (n_samples, n_labels).
+
+        Raises:
+            ValueError: If model is not initialized.
+            Exception: If inference fails due to incompatible input dimensions.
+        """
+        if self.model is None:
+            raise ValueError("Model not initialized. Train or load a model first.")
+
+        self.model.eval()
+        predictions = []
+
+        # Convert various types to list: pandas Series, Dataset Column, etc.
+        if hasattr(texts, "tolist"):
+            texts = texts.tolist()
+        elif hasattr(texts, "__iter__") and not isinstance(texts, list):
+            texts = list(texts)
+
+        try:
+            with torch.no_grad():
+                tokens = self.tokenizer(
+                    texts, padding=True, truncation=True, max_length=128, return_tensors="pt"
+                ).to(self.device)
+
+                encoder_output = self.model["encoder"](**tokens)
+                cls_token = encoder_output.last_hidden_state[:, 0, :]
+
+                logits = self.classifier(cls_token)
+                probabilities = logits.cpu().numpy()
+
+                predictions = (probabilities > threshold).astype(int)
+
+            return predictions
+
+        except Exception as e:
+            logger.error(f"Error during prediction: {e}")
+            raise
+
+    def evaluate(self, X_test: List[str], y_test: np.ndarray) -> dict:
+        """
+        Evaluate classification performance on test set.
+
+        Computes per-label and macro-averaged metrics:
+        - Precision: TP / (TP + FP) - correctness of positive predictions
+        - Recall: TP / (TP + FN) - coverage of actual positive instances
+        - F1-Score: 2 * (P * R) / (P + R) - harmonic mean of precision and recall
+        - Accuracy: Per-sample exact match rate
+
+        Args:
+            X_test (List[str]): Test text samples for evaluation.
+            y_test (np.ndarray): Ground truth binary label matrix or indices.
+
+        Returns:
+            dict: Evaluation metrics including f1_score, precision, recall, accuracy.
+
+        Raises:
+            Exception: If evaluation fails due to prediction errors.
+        """
+        try:
+            predictions = self.predict(X_test)
+
+            # Convert y_test to numpy array if needed
+            if not isinstance(y_test, (np.ndarray, torch.Tensor)):
+                y_test_np = np.array(y_test)
+            elif isinstance(y_test, torch.Tensor):
+                y_test_np = y_test.cpu().numpy()
+            else:
+                y_test_np = y_test
+
+            # Handle conversion from flat indices to multi-hot encoding if needed
+            is_multilabel_pred = predictions.ndim == 2 and predictions.shape[1] > 1
+            is_flat_truth = (y_test_np.ndim == 1) or (
+                y_test_np.ndim == 2 and y_test_np.shape[1] == 1
+            )
+
+            if is_multilabel_pred and is_flat_truth:
+                # Create zero matrix for multi-hot encoding
+                y_test_expanded = np.zeros((y_test_np.shape[0], self.num_labels), dtype=int)
+                indices = y_test_np.flatten()
+
+                # Set columns to 1 based on indices
+                for i, label_idx in enumerate(indices):
+                    idx = int(label_idx)
+                    if 0 <= idx < self.num_labels:
+                        y_test_expanded[i, idx] = 1
+
+                y_test_np = y_test_expanded
+
+            tp = np.sum((predictions == 1) & (y_test_np == 1), axis=0)
+            fp = np.sum((predictions == 1) & (y_test_np == 0), axis=0)
+            fn = np.sum((predictions == 0) & (y_test_np == 1), axis=0)
+
+            precision_per_label = tp / (tp + fp + 1e-10)
+            recall_per_label = tp / (tp + fn + 1e-10)
+            f1_per_label = (
+                2
+                * (precision_per_label * recall_per_label)
+                / (precision_per_label + recall_per_label + 1e-10)
+            )
+
+            metrics = {
+                "f1_score": float(np.mean(f1_per_label)),
+                "precision": float(np.mean(precision_per_label)),
+                "recall": float(np.mean(recall_per_label)),
+                "accuracy": float(np.mean(predictions == y_test_np)),
+            }
+
+            logger.info(f"Evaluation metrics: {metrics}")
+            return metrics
+
+        except Exception as e:
+            logger.error(f"Error evaluating model: {e}")
+            raise
+
+    def save(self, path: str, model_name: str = "tinybert_classifier"):
+        """
+        Persist model artifacts including weights, tokenizer, and configuration.
+
+        Saves the following components:
+        - classifier.pt: PyTorch state dictionary of classification head
+        - tokenizer configuration: Hugging Face tokenizer files
+        - config.json: Model metadata and label mappings
+
+        Args:
+            path (str): Parent directory for model checkpoint storage.
+            model_name (str): Model identifier used as subdirectory name.
+
+        Raises:
+            Exception: If file I/O or serialization fails.
+        """
+        try:
+            import os
+
+            model_path = os.path.join(path, model_name)
+            os.makedirs(model_path, exist_ok=True)
+
+            if self.classifier:
+                torch.save(self.classifier.state_dict(), os.path.join(model_path, "classifier.pt"))
+
+            if self.tokenizer:
+                self.tokenizer.save_pretrained(model_path)
+
+            config_data = {
+                "language": self.language,
+                "num_labels": self.num_labels,
+                "labels_map": self.labels_map,
+                "model_type": "tinybert_classifier",
+                "model_name": model_name,
+            }
+
+            import json
+
+            with open(os.path.join(model_path, "config.json"), "w") as f:
+                json.dump(config_data, f, indent=2)
+
+            logger.success(f"Model saved to {model_path}")
+
+        except Exception as e:
+            logger.error(f"Error saving model: {e}")
+            raise
+
+    def load(self, path: str):
+        """
+        Restore model state from checkpoint directory.
+
+        Loads classifier weights from serialized PyTorch tensors and reinitializes
+        the tokenizer from saved configuration. Restores language-specific label
+        mappings from JSON metadata.
+
+        Args:
+            path (str): Directory containing model checkpoint files.
+
+        Raises:
+            Exception: If file not found or deserialization fails.
+        """
+        try:
+            import json
+            import os
+
+            self._initialize_model()
+
+            classifier_path = os.path.join(path, "classifier.pt")
+            if os.path.exists(classifier_path):
+                self.classifier.load_state_dict(
+                    torch.load(classifier_path, map_location=self.device)
+                )
+
+            config_path = os.path.join(path, "config.json")
+            if os.path.exists(config_path):
+                with open(config_path, "r") as f:
+                    config_data = json.load(f)
+                    self.language = config_data.get("language", self.language)
+                    self.labels_map = config_data.get("labels_map", self.labels_map)
+
+            logger.success(f"Model loaded from {path}")
+
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            raise
diff --git a/turing/modeling/predict.py b/turing/modeling/predict.py
new file mode 100644
index 0000000000000000000000000000000000000000..4304a04830035c8e3e50dc97cc67a955ecce1c77
--- /dev/null
+++ b/turing/modeling/predict.py
@@ -0,0 +1,195 @@
+import importlib
+import warnings
+
+import dagshub
+from loguru import logger
+import mlflow
+import numpy as np
+import pandas as pd
+
+from turing.config import INPUT_COLUMN, LABELS_MAP, LANGS, MODEL_CONFIG, MODELS_DIR
+from turing.dataset import DatasetManager
+from turing.modeling.model_selector import get_best_model_info
+from turing.modeling.models.codeBerta import CodeBERTa
+
+
+class ModelInference:
+    # Model Configuration (Fallback Registry)
+    FALLBACK_MODEL_REGISTRY = {
+        "java": {
+            "run_id": "446f4459780347da8c796e619129be37",
+            "artifact": "fine-tuned-CodeBERTa_java",
+            "model_id": "codeberta",
+        },
+        "python": {
+            "run_id": "ef5fd8ebf33a412087dcf02afd9e3147",
+            "artifact": "fine-tuned-CodeBERTa_python",
+            "model_id": "codeberta",
+        },
+        "pharo": {
+            "run_id": "97822c6d84fc40c5b2363c9201a39997",
+            "artifact": "fine-tuned-CodeBERTa_pharo",
+            "model_id": "codeberta",
+        },
+    }
+
+
+    def __init__(self, repo_owner="se4ai2526-uniba", repo_name="Turing", use_best_model_tags=True):
+        dagshub.init(repo_owner=repo_owner, repo_name=repo_name, mlflow=True)
+        warnings.filterwarnings("ignore")
+        self.dataset_manager = DatasetManager()
+        self.use_best_model_tags = use_best_model_tags
+
+        # Initialize model registry based on configuration
+        if use_best_model_tags:
+            logger.info("Using MLflow tags to find best models")
+
+            self.model_registry = {}
+            for lang in LANGS:
+                try:
+                    model_info = get_best_model_info(
+                        lang, fallback_registry=self.FALLBACK_MODEL_REGISTRY
+                    )
+                    self.model_registry[lang] = model_info
+                    logger.info(f"Loaded model info for {lang}: {model_info}")
+
+                    # raise error if any required info is missing
+                    if not all(k in model_info for k in ("run_id", "artifact", "model_id")):
+                        raise ValueError(f"Incomplete model info for {lang}: {model_info}")
+
+                except Exception as e:
+                    logger.warning(f"Could not load model info for {lang}: {e}")
+                    if lang in self.FALLBACK_MODEL_REGISTRY:
+                        self.model_registry[lang] = self.FALLBACK_MODEL_REGISTRY[lang]
+
+                # Pre-cache models locally
+                run_id = self.model_registry[lang]["run_id"]
+                artifact = self.model_registry[lang]["artifact"]
+                self._get_cached_model_path(run_id, artifact, lang)
+        else:
+            logger.info("Using hardcoded model registry")
+            self.model_registry = self.FALLBACK_MODEL_REGISTRY
+
+    def _decode_predictions(self, raw_predictions, language: str):
+        """
+        Converts the binary matrix from the model into human-readable labels.
+
+        Args:
+            raw_predictions: Numpy array or similar with binary predictions
+            language: Programming language for label mapping
+        """
+
+        labels_map = LABELS_MAP.get(language, [])
+        decoded_results = []
+
+        # Ensure input is a numpy array for processing
+        if isinstance(raw_predictions, list):
+            raw_array = np.array(raw_predictions)
+        elif isinstance(raw_predictions, pd.DataFrame):
+            raw_array = raw_predictions.values
+        else:
+            raw_array = raw_predictions
+
+        # Iterate over rows
+        for row in raw_array:
+            indices = np.where(row == 1)[0]
+            # Map indices to labels safely
+            row_labels = [labels_map[i] for i in indices if i < len(labels_map)]
+            decoded_results.append(row_labels)
+
+        return decoded_results
+
+    def _get_cached_model_path(self, run_id: str, artifact_name: str, language: str) -> str:
+        """Checks if model exists locally; if not, downloads it from MLflow."""
+        # Define local path: models/mlflow_temp_models/language/artifact_name
+        local_path = MODELS_DIR / "mlflow_temp_models" / language / artifact_name
+
+        if local_path.exists():
+            logger.info(f"Loading {language} model from local cache: {local_path}")
+            return str(local_path)
+
+        logger.info(
+            f"Model not found locally. Downloading {language} model from MLflow (Run ID: {run_id})..."
+        )
+
+        # Ensure parent directory exists
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Download artifacts to the parent directory (artifact_name folder will be created inside)
+        mlflow.artifacts.download_artifacts(
+            run_id=run_id, artifact_path=artifact_name, dst_path=str(local_path.parent)
+        )
+        logger.success(f"Model downloaded and cached at: {local_path}")
+
+        return str(local_path)
+
+    def predict_payload(self, texts: list[str], language: str):
+        """
+        API Prediction: Automatically fetches the correct model from the registry based on language.
+
+        Args:
+            texts: List of code comments to classify
+            language: Programming language
+        """
+
+        # 1. Validate Language and Fetch Config
+        if language not in self.model_registry:
+            raise ValueError(
+                f"Language '{language}' is not supported or the model is not configured."
+            )
+
+        model_config = self.model_registry[language]
+        run_id = model_config["run_id"]
+        artifact_name = model_config["artifact"]
+        model_id = model_config["model_id"]
+
+        # Dynamically import model class
+        config_entry = MODEL_CONFIG[model_id]
+        module_name = config_entry["model_class_module"]
+        class_name = config_entry["model_class_name"]
+        module = importlib.import_module(module_name)
+        model_class = getattr(module, class_name)
+
+        # 2. Get Model Path (Local Cache or Download)
+        model_path = self._get_cached_model_path(run_id, artifact_name, language)
+
+        # Load Model
+        model = model_class(language=language, path=model_path)
+
+        # 3. Predict
+        raw_predictions = model.predict(texts)
+
+        # 4. Decode Labels
+        decoded_labels = self._decode_predictions(raw_predictions, language)
+
+        return raw_predictions, decoded_labels, run_id, artifact_name
+
+    def predict_from_mlflow(
+        self, mlflow_run_id: str, artifact_name: str, language: str, model_class=CodeBERTa
+    ):
+        """
+        Legacy method for CML/CLI: Predicts on the test dataset stored on disk.
+        """
+        # Load Dataset
+        try:
+            full_dataset = self.dataset_manager.get_dataset()
+            dataset_key = f"{language}_test"
+            if dataset_key not in full_dataset:
+                raise ValueError(f"Dataset key '{dataset_key}' not found.")
+            test_ds = full_dataset[dataset_key]
+            X_test = test_ds[INPUT_COLUMN]
+        except Exception as e:
+            logger.error(f"Error loading dataset: {e}")
+            raise e
+
+        # Load Model (Local Cache or Download)
+        model_path = self._get_cached_model_path(mlflow_run_id, artifact_name, language)
+        model = model_class(language=language, path=model_path)
+
+        raw_predictions = model.predict(X_test)
+
+        # Decode output
+        readable_predictions = self._decode_predictions(raw_predictions, language)
+
+        logger.info("Dataset prediction completed.")
+        return readable_predictions
diff --git a/turing/modeling/train.py b/turing/modeling/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..cad1b0567f663bcf20c40af5ecdb17c28fa49fe7
--- /dev/null
+++ b/turing/modeling/train.py
@@ -0,0 +1,212 @@
+from importlib import import_module
+import os
+import warnings
+
+import dagshub
+from loguru import logger
+import mlflow
+from mlflow.tracking import MlflowClient
+import numpy as np
+import typer
+
+import turing.config as config
+from turing.dataset import DatasetManager
+from turing.evaluate_model import evaluate_models
+
+dagshub.init(repo_owner="se4ai2526-uniba", repo_name="Turing", mlflow=True)
+
+warnings.filterwarnings("ignore")
+
+DEFAULT_MODEL = "codeberta"
+_default_cfg = config.MODEL_CONFIG[DEFAULT_MODEL]
+
+MODEL_CLASS_MODULE = _default_cfg["model_class_module"]
+MODEL_CLASS_NAME = _default_cfg["model_class_name"]
+MODEL_CLASS = __import__(MODEL_CLASS_MODULE, fromlist=[MODEL_CLASS_NAME])
+MODEL_CLASS = getattr(MODEL_CLASS, MODEL_CLASS_NAME)
+EXP_NAME = _default_cfg["exp_name"]
+MODEL_NAME = _default_cfg["model_name"]
+
+
+
+app = typer.Typer()
+
+
+def tag_best_models(
+    metric: str = "f1_score"
+):
+    """
+    Tag the best existing models in MLflow based on the specified metric.
+    Remove previous best_model tags before tagging the new best models.
+    
+    Args:
+        metric: Metric to use for determining the best model
+    """
+
+    dagshub.init(repo_owner="se4ai2526-uniba", repo_name="Turing", mlflow=True)
+    client = MlflowClient()
+    
+    # Get all experiments from Mlflow
+    experiments = client.search_experiments()
+    if not experiments:
+        logger.error("No experiments found in MLflow")
+        return
+    
+    # Find the best run for each language
+    experiments_ids = [exp.experiment_id for exp in experiments]
+    for lang in config.LANGS:
+        # Get all runs for the language
+        runs = client.search_runs(
+            experiment_ids=experiments_ids,
+            filter_string=f"tags.Language = '{lang}'",
+            order_by=[f"metrics.{metric} DESC"]
+        )
+
+        if not runs:
+            logger.warning(f"No runs found for language {lang}")
+            continue
+        logger.info(f"Found {len(runs)} runs for {lang}")
+
+        # Get the best run for the language
+        best_run = runs[0]
+        run_id = best_run.info.run_id
+
+        # Remove previous best_model tags for this language
+        for run in runs[1:]:
+            try:
+                client.delete_tag(run.info.run_id, "best_model")
+            except Exception:
+                pass
+
+        # Tag the best model
+        client.set_tag(run_id, "best_model", "true")
+
+
+def show_tagged_models():
+    """
+    Show all models tagged as best_model.
+    """
+
+    dagshub.init(repo_owner="se4ai2526-uniba", repo_name="Turing", mlflow=True)
+    client = MlflowClient()
+
+    # Get all experiments from Mlflow
+    experiments = client.search_experiments()
+    if not experiments:
+        logger.error("No experiments found in MLflow")
+        return
+    
+    # Find all runs tagged as best_model
+    runs = client.search_runs(
+        experiment_ids=[exp.experiment_id for exp in experiments],
+        filter_string="tags.best_model = 'true'",
+        order_by=["tags.Language ASC"]
+    )
+    logger.info(f"\nFound {len(runs)} best models in experiments:\n")
+    
+    # Display details of each tagged best model
+    for run in runs:
+        language = run.data.tags.get("Language", "unknown")
+        exp_name = client.get_experiment(run.info.experiment_id).name
+        run_id = run.info.run_id
+        run_name = run.data.tags.get("mlflow.runName", "N/A")
+        dataset_name = run.data.tags.get("dataset_name", "unknown")
+        
+        logger.info(f"Language: {language}")
+        logger.info(f"  Run: {exp_name}/{run_name} ({run_id})")
+        logger.info(f"  Dataset: {dataset_name}")
+        
+        if run.data.metrics:
+            for metric in run.data.metrics:
+                logger.info(f"  {metric}: {run.data.metrics[metric]:.4f}")
+        
+        logger.info("")
+
+
+@app.command()
+def main(model: str = typer.Option("codeberta", help="Model to train: codeberta, graphcodebert, tinybert, or randomforest"), dataset: str = typer.Option(None, help="Dataset to use for training")):
+    # Get model configuration from config
+    model_key = model.lower()
+    if model_key not in config.MODEL_CONFIG:
+        logger.error(f"Unknown model: {model_key}. Available models: {list(config.MODEL_CONFIG.keys())}")
+        return
+    
+    model_cfg = config.MODEL_CONFIG[model_key]
+    model_name = model_cfg["model_name"]
+    exp_name = model_cfg["exp_name"]
+    
+    # Dynamically import model class
+    module = import_module(model_cfg["model_class_module"])
+    model_class = getattr(module, model_cfg["model_class_name"])
+    
+    logger.info(f"Training model: {model_name}")
+
+    # Load dataset
+    dataset_path = config.INTERIM_DATA_DIR / "features" / dataset
+    dataset_manager = DatasetManager(dataset_path=dataset_path)
+    try:
+        full_dataset = dataset_manager.get_dataset()
+        dataset_name = dataset_manager.get_dataset_name()
+    except Exception as e:
+        logger.error(f"Error loading dataset: {e}")
+        return
+    logger.info(f"Dataset loaded successfully: {dataset_name}")
+
+    # Train and evaluate models for each language
+    mlflow.set_experiment(exp_name)
+    models = {}
+    for lang in config.LANGS:
+        # Prepare training and testing data
+        train_ds = full_dataset[f"{lang}_train"]
+        test_ds = full_dataset[f"{lang}_test"]
+        X_train = train_ds[config.INPUT_COLUMN]
+        y_train = train_ds[config.LABEL_COLUMN]
+        X_test = test_ds[config.INPUT_COLUMN]
+        y_test = test_ds[config.LABEL_COLUMN]
+        X_train = list(X_train)
+        X_test = list(X_test)
+        y_train = np.array(y_train)
+
+        # Initialize model
+        model = model_class(language=lang)
+
+        # Train and evaluate model within an MLflow run
+        try:
+            with mlflow.start_run(run_name=f"{model_name}_{lang}"):
+                mlflow.set_tag("Language", lang)
+                mlflow.set_tag("dataset_name", dataset_name)
+                mlflow.set_tag("model_id", model_key)
+                mlflow.log_params(model.params)
+                parameters_to_log = model.train(
+                    X_train,
+                    y_train
+                )
+                mlflow.log_params(parameters_to_log)
+                model.save(os.path.join(config.MODELS_DIR, exp_name),model_name=model_name)
+                metrics = model.evaluate(X_test, y_test)
+                mlflow.log_metrics(metrics)
+                
+                # Log model name for later retrieval
+                mlflow.set_tag("model_name", f"{model_name}_{lang}")
+                
+        except Exception as e:
+            logger.error(f"Error training/evaluating model for {lang}: {e}")
+            return
+
+        # Store trained model
+        models[lang] = model
+    logger.success(f"All {model_name} models trained and evaluated.")
+
+    # Competition-style evaluation of trained models
+    logger.info("Starting competition-style evaluation of trained models...")
+    evaluate_models(models, full_dataset)
+    logger.success("Evaluation completed.")
+
+    logger.info("Tagging best models in MLflow...")
+    tag_best_models()
+    logger.info("Best models:")
+    show_tagged_models()
+
+
+if __name__ == "__main__":
+    app()
diff --git a/turing/plots.py b/turing/plots.py
new file mode 100644
index 0000000000000000000000000000000000000000..10f8e958ed9634b8c2aceaff6fb3bd6a8841a998
--- /dev/null
+++ b/turing/plots.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+from loguru import logger
+from tqdm import tqdm
+import typer
+
+from turing.config import FIGURES_DIR, PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+    output_path: Path = FIGURES_DIR / "plot.png",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Generating plot from data...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Plot generation complete.")
+    # -----------------------------------------
+
+
+if __name__ == "__main__":
+    app()
diff --git a/turing/reporting.py b/turing/reporting.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff4fb88e672d38867d2b31f68a95c14c43a04f0f
--- /dev/null
+++ b/turing/reporting.py
@@ -0,0 +1,173 @@
+from datetime import datetime
+import platform
+import sys
+from typing import Optional
+
+from loguru import logger
+import pandas as pd
+
+from turing.config import REPORTS_DIR
+
+
+class TestReportGenerator:
+    """
+    Handles the generation of structured Markdown reports specifically for test execution results.
+    """
+
+    def __init__(self, context_name: str, report_category: str):
+        self.context_name = context_name
+        self.report_category = report_category
+        self.timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        self.content = []
+        self.output_dir = REPORTS_DIR / self.report_category
+
+    def add_header(self, text: str, level: int = 1):
+        self.content.append(f"\n{'#' * level} {text}\n")
+
+    def add_divider(self, style: str = "thin"):
+        """Add a visual divider line."""
+        dividers = {
+            "thin": "---",
+            "thick": "___",
+            "section": "\n---\n",
+        }
+        self.content.append(f"\n{dividers.get(style, dividers['thin'])}\n")
+
+    def add_code_block(self, content: str, language: str = ""):
+        """Add a code block."""
+        self.content.append(f"\n```{language}\n{content}\n```\n")
+
+    def add_alert_box(self, message: str, box_type: str = "info"):
+        """Add a styled alert box using blockquotes."""
+        box_headers = {
+            "info": "INFO",
+            "success": "SUCCESS",
+            "warning": "WARNING",
+            "error": "ERROR",
+        }
+        header = box_headers.get(box_type, "INFO")
+        self.content.append(f"\n> **{header}**: {message}\n")
+
+    def add_progress_bar(self, passed: int, total: int, width: int = 50):
+        """Add an ASCII progress bar."""
+        if total == 0:
+            percentage = 0
+            filled = 0
+        else:
+            percentage = (passed / total * 100)
+            filled = int(width * passed / total)
+        
+        empty = width - filled
+        bar = "█" * filled + "░" * empty
+        self.add_code_block(f"Progress: [{bar}] {percentage:.1f}%\nPassed: {passed}/{total} tests", "")
+
+    def add_summary_box(self, total: int, passed: int, failed: int, skipped: int = 0):
+        """Add a visually enhanced summary box."""
+        success_rate = (passed / total * 100) if total > 0 else 0
+        
+        # Determine status
+        if success_rate == 100:
+            status = "ALL TESTS PASSED"
+        elif success_rate >= 80:
+            status = "MOSTLY PASSED"
+        elif success_rate >= 50:
+            status = "PARTIAL SUCCESS"
+        else:
+            status = "NEEDS ATTENTION"
+        
+        self.add_header("Executive Summary", level=2)
+        self.add_text(f"**Overall Status:** {status}")
+        self.add_text(f"**Success Rate:** {success_rate:.1f}%")
+        
+        # Summary table
+        summary_data = [
+            ["Total Tests", str(total)],
+            ["Passed", str(passed)],
+            ["Failed", str(failed)],
+        ]
+        
+        if skipped > 0:
+            summary_data.append(["Skipped", str(skipped)])
+        
+        summary_data.append(["Success Rate", f"{success_rate:.1f}%"])
+        
+        df = pd.DataFrame(summary_data, columns=["Metric", "Count"])
+        self.add_dataframe(df, title=None, align=("left", "right"))
+        
+        # Progress bar
+        self.add_text("**Visual Progress:**")
+        self.add_progress_bar(passed, total)
+
+    def add_environment_metadata(self):
+        """Add enhanced environment metadata."""
+        self.add_header("Environment Information", level=2)
+        
+        metadata = [
+            ["Timestamp", datetime.now().strftime("%Y-%m-%d %H:%M:%S")],
+            ["Context", self.context_name.upper()],
+            ["Python Version", sys.version.split()[0]],
+            ["Platform", platform.platform()],
+            ["Architecture", platform.machine()],
+        ]
+        df = pd.DataFrame(metadata, columns=["Parameter", "Value"])
+        self.add_dataframe(df, title=None, align=("left", "left"))
+
+    def add_text(self, text: str):
+        self.content.append(f"\n{text}\n")
+
+    def add_category_stats(self, df: pd.DataFrame, category: str):
+        """Add statistics for a test category."""
+        total = len(df)
+        passed = len(df[df['Result'] == "PASS"])
+        failed = len(df[df['Result'] == "FAIL"])
+        skipped = len(df[df['Result'] == "SKIP"])
+        
+        stats = [
+            ["Total", str(total)],
+            ["Passed", f"{passed} ({passed/total*100:.1f}%)" if total > 0 else "0"],
+            ["Failed", f"{failed} ({failed/total*100:.1f}%)" if total > 0 else "0"],
+        ]
+        
+        if skipped > 0:
+            stats.append(["Skipped", f"{skipped} ({skipped/total*100:.1f}%)"])
+        
+        stats_df = pd.DataFrame(stats, columns=["Status", "Count"])
+        self.add_dataframe(stats_df, title="Statistics", align=("left", "right"))
+
+    def add_dataframe(self, df: pd.DataFrame, title: Optional[str] = None, align: tuple = None):
+        """Add a formatted dataframe table."""
+        if title:
+            self.add_header(title, level=3)
+
+        if df.empty:
+            self.content.append("\n_No data available._\n")
+            return
+
+        try:
+            if not align:
+                align = tuple(["left"] * len(df.columns))
+
+            table_md = df.to_markdown(index=False, tablefmt="pipe", colalign=align)
+            self.content.append(f"\n{table_md}\n")
+        except Exception as e:
+            logger.warning(f"Tabulate error: {e}. Using simple text.")
+            self.content.append(f"\n```text\n{df.to_string(index=False)}\n```\n")
+
+    def save(self, filename: str = "test_report.md") -> str:
+        """Save the report to a file."""
+        try:
+            self.output_dir.mkdir(parents=True, exist_ok=True)
+            file_path = self.output_dir / filename
+            
+            # Add footer
+            self.add_divider("section")
+            self.add_text(f"*Report generated on {datetime.now().strftime('%Y-%m-%d at %H:%M:%S')}*")
+            self.add_text("*Powered by Turing Test Suite*")
+            
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write("\n".join(self.content))
+            logger.info(f"Test report saved: {file_path}")
+            return str(file_path)
+        except Exception as e:
+            logger.error(f"Save failed: {e}")
+            raise
diff --git a/turing/tests/behavioral/test_directional.py b/turing/tests/behavioral/test_directional.py
new file mode 100644
index 0000000000000000000000000000000000000000..d82d16743916763a64e603224e78f0e693660fc2
--- /dev/null
+++ b/turing/tests/behavioral/test_directional.py
@@ -0,0 +1,183 @@
+# These tests check that adding or removing keywords logically changes the prediction
+
+
+def test_java_directional_add_deprecation(java_model, get_predicted_labels):
+    """Tests that adding '@deprecated' ADDs the 'deprecation' label"""
+    # Base comment should be a 'Pointer' due to the link
+    base_comment = "/** Use {@link #newUserMethod()} instead. */"
+    # Perturbed comment adds a keyword
+    pert_comment = "/** @deprecated Use {@link #newUserMethod()} instead. */"
+
+    preds_base = get_predicted_labels(java_model, base_comment, "java")
+    preds_pert = get_predicted_labels(java_model, pert_comment, "java")
+
+    # The base comment should not have 'deprecation'
+    assert "deprecation" not in preds_base
+    # The perturbed comment must have 'deprecation'
+    assert "deprecation" in preds_pert
+    # The original 'Pointer' label should still be there
+    assert "Pointer" in preds_base
+    assert "Pointer" in preds_pert
+
+
+def test_python_directional_remove_todo(python_model, get_predicted_labels):
+    """Tests that removing 'TODO' REMOVES the 'DevelopmentNotes' labe."""
+    base_comment = "# TODO: Refactor this entire block."
+    pert_comment = "# Refactor this entire block."
+
+    preds_base = get_predicted_labels(python_model, base_comment, "python")
+    preds_pert = get_predicted_labels(python_model, pert_comment, "python")
+
+    # The base comment must have 'DevelopmentNotes'
+    assert "DevelopmentNotes" in preds_base
+    # The perturbed comment must not have 'DevelopmentNotes'
+    assert "DevelopmentNotes" not in preds_pert
+
+
+def test_pharo_directional_add_responsibility(pharo_model, get_predicted_labels):
+    """Tests that adding 'i am responsible for' adds the 'Responsibilities' label"""
+    base_comment = '"i am a simple arrow"'
+    pert_comment = '"i am a simple arrow. i am responsible for drawing."'
+
+    preds_base = get_predicted_labels(pharo_model, base_comment, "pharo")
+    preds_pert = get_predicted_labels(pharo_model, pert_comment, "pharo")
+
+    # base comment should have 'Intent'
+    assert "Intent" in preds_base
+    # base comment should not have 'Responsibilities'
+    assert "Responsibilities" not in preds_base
+    # perturbed comment must have 'Responsibilities'
+    assert "Responsibilities" in preds_pert
+    # original 'Intent' label should still be there
+    assert "Intent" in preds_pert
+
+
+def test_java_directional_contrast_rational(java_model, get_predicted_labels):
+    """
+    Tests that adding a design rationale adds the 'rational' label
+    """
+    # Base comment is a simple summary
+    base_comment = "/** Returns the user ID. */"
+    # Perturbed comment adds a design rationale
+    pert_comment = "/** Returns the user ID. This is cached for performance. */"
+
+    preds_base = get_predicted_labels(java_model, base_comment, "java")
+    preds_pert = get_predicted_labels(java_model, pert_comment, "java")
+
+    # Base comment should be a 'summary'
+    assert "summary" in preds_base
+    # Base comment should not have 'rational'
+    assert "rational" not in preds_base
+    # Perturbed comment must now have 'rational'
+    assert "rational" in preds_pert
+    # Perturbed comment should ideally still be a 'summary'
+    assert "summary" in preds_pert
+
+
+def test_python_directional_contrast_todo(python_model, get_predicted_labels):
+    """
+    Tests that adding a "TODO" clause adds the 'DevelopmentNotes' label
+    """
+    # Base comment is a simple summary
+    base_comment = "Fetches the user profile."
+    # Perturbed comment adds a development note
+    pert_comment = "Fetches the user profile. TODO: This is deprecated."
+
+    preds_base = get_predicted_labels(python_model, base_comment, "python")
+    preds_pert = get_predicted_labels(python_model, pert_comment, "python")
+
+    # Base comment should be a 'Summary'
+    assert "Summary" in preds_base
+    # Base comment should not have 'DevelopmentNotes'
+    assert "DevelopmentNotes" not in preds_base
+    # Perturbed comment must now have 'DevelopmentNotes'
+    assert "DevelopmentNotes" in preds_pert
+    # Perturbed comment should ideally still be a 'Summary'
+    assert "Summary" in preds_pert
+
+
+def test_pharo_directional_contrast_collaborators(pharo_model, get_predicted_labels):
+    """
+    Tests that adding a 'but i work with' clause adds the 'Collaborators' label
+    """
+    # Base comment is a simple intent
+    base_comment = '"i am a simple arrow like arrowhead."'
+    pert_comment = '"i am a simple arrow, but i work with BlSpace to position."'
+
+    preds_base = get_predicted_labels(pharo_model, base_comment, "pharo")
+    preds_pert = get_predicted_labels(pharo_model, pert_comment, "pharo")
+
+    # Base comment should be 'Intent'
+    assert "Intent" in preds_base
+    # Base comment should not  have 'Collaborators'
+    assert "Collaborators" not in preds_base
+    # Perturbed comment must now have 'Collaborators'
+    assert "Collaborators" in preds_pert
+    # Perturbed comment should ideally still have 'Intent'
+    assert "Intent" in preds_pert
+
+
+def test_java_directional_shift_summary_to_expand(java_model, get_predicted_labels):
+    """
+    Tests that replacing a simple 'summary' with an 'Expand' implementation note
+    shifts the primary classification from 'summary' to 'Expand'
+    """
+    # Base comment is a simple summary
+    base_comment = "/** Returns the user ID. */"
+    # Perturbed comment shifts the focus entirely to implementation details
+    pert_comment = "/** Implementation Note: This delegates to the old system. */"
+
+    preds_base = get_predicted_labels(java_model, base_comment, "java")
+    preds_pert = get_predicted_labels(java_model, pert_comment, "java")
+
+    # Base comment must have 'summary'
+    assert "summary" in preds_base
+    # Perturbed comment must not have 'summary'
+    assert "summary" not in preds_pert
+    #  Perturbed comment must now have 'Expand'
+    assert "Expand" in preds_pert
+
+
+def test_python_directional_shift_summary_to_devnotes(python_model, get_predicted_labels):
+    """
+    Tests that replacing a 'Summary' with a critical development note (deprecated)
+    shifts the classification from 'Summary' to 'DevelopmentNotes'
+    """
+    print(f"\n[DEBUG] Oggetto modello Python: {python_model}, Lingua: {python_model.language}")
+    # Base comment is a clear Summary
+    base_comment = "Fetches the user profile."
+    # Perturbed comment shifts the focus entirely to a note about future work
+    pert_comment = "DEPRECATED: This function is scheduled for removal in v2.0."
+
+    preds_base = get_predicted_labels(python_model, base_comment, "python")
+    preds_pert = get_predicted_labels(python_model, pert_comment, "python")
+
+    # Base comment must have 'Summary'
+    assert "Summary" in preds_base
+    # Perturbed comment must not have 'Summary'
+    assert "Summary" not in preds_pert
+    # Perturbed comment must now have 'DevelopmentNotes'
+    assert "DevelopmentNotes" in preds_pert
+
+
+def test_pharo_directional_shift_to_example(pharo_model, get_predicted_labels):
+    """
+    Tests that changing a comment from a 'Responsibility' statement to an
+    explicit 'Example' statement shifts the primary classification
+    """
+    # Base comment is a clear 'Responsibilities'
+    base_comment = '"i provide a data structure independent api"'
+    # Perturbed comment replaces the responsibility claim with an explicit example pattern
+    pert_comment = '"[Example] run the data structure independent api."'
+
+    preds_base = get_predicted_labels(pharo_model, base_comment, "pharo")
+    preds_pert = get_predicted_labels(pharo_model, pert_comment, "pharo")
+
+    # Base comment msut have Responsibilities
+    assert "Responsibilities" in preds_base
+    # Base comment should not have Example
+    assert "Example" not in preds_base
+    # Perturbed comment must now have Example
+    assert "Example" in preds_pert
+    # Perturbed comment should not have Responsibilities
+    assert "Responsibilities" not in preds_pert
diff --git a/turing/tests/behavioral/test_invariance.py b/turing/tests/behavioral/test_invariance.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe85fdb484ca2c6d1d1db8284b020826c2e23a88
--- /dev/null
+++ b/turing/tests/behavioral/test_invariance.py
@@ -0,0 +1,117 @@
+import pytest
+
+# These tests check that "noise" (like capitalization or punctuation) does not change the prediction
+
+
+@pytest.mark.parametrize(
+    "comment",
+    [
+        ":param user_id: The ID of the user.",  # Base
+        ":PARAM USER_ID: THE ID OF THE USER.",  # Uppercase
+        "  :param user_id: The ID of the  user .  ",  # Whitespace
+        ":param user_id: The ID of the user!!!",  # Punctuation
+    ],
+)
+def test_python_invariance_parameters(python_model, comment, get_predicted_labels):
+    """Tests that noise  doesn't break ':param' detection."""
+    expected = {"Parameters"}
+    preds = get_predicted_labels(python_model, comment, "python")
+    assert preds == expected
+
+
+def test_java_invariance_deprecation(java_model, get_predicted_labels):
+    """Tests that noise doesn't break '@deprecated' detection"""
+    base_comment = "/** @deprecated Use newUserMethod() */"
+    pert_comment = "/** @DEPRECATED... Use newUserMethod()!!! */"
+
+    preds_base = get_predicted_labels(java_model, base_comment, "java")
+    preds_pert = get_predicted_labels(java_model, pert_comment, "java")
+
+    assert {"deprecation"} <= preds_base
+    assert preds_base == preds_pert
+
+
+def test_python_invariance_summary(python_model, get_predicted_labels):
+    """Tests that noise doesn't break a simple 'Summary' detection"""
+
+    base_comment = "a service specific account of type bar."
+    expected = {"Summary"}
+
+    # Perturbations
+    variants = [
+        base_comment,
+        "A SERVICE SPECIFIC ACCOUNT OF TYPE BAR.",
+        "  a service specific account of type bar.  ",
+        "a service specific account of type bar!!!",
+    ]
+
+    for comment in variants:
+        preds = get_predicted_labels(python_model, comment, "python")
+        assert preds == expected
+
+
+def test_pharo_invariance_intent(pharo_model, get_predicted_labels):
+    """Tests that noise doesn't break Pharo's 'Intent' detection"""
+
+    base_comment = '"i am a simple arrow like arrowhead."'
+    expected = {"Intent"}
+
+    # Perturbations
+    variants = [
+        base_comment,
+        '"I AM A SIMPLE ARROW LIKE ARROWHEAD."',
+        '  "i am a simple arrow like arrowhead."  ',
+        '"i am a simple arrow like arrowhead !!"',  #
+    ]
+
+    for comment in variants:
+        preds = get_predicted_labels(pharo_model, comment, "pharo")
+        assert preds == expected
+
+
+def test_python_invariance_typos_parameters(python_model, get_predicted_labels):
+    """
+    Tests typo tolerance
+
+    """
+
+    # Define the single expected outcome
+    expected_labels = {"Parameters"}
+
+    # Define the base case and all its variants (with typos)
+    variants = [
+        ":param user_id: The ID of the user.",
+        ":paramater user_id: The ID of the user.",
+        ":pram user_id: The ID of teh user.",
+    ]
+
+    # Loop through all variants and assert they all produce the *exact* expected outcome
+    for comment in variants:
+        preds = get_predicted_labels(python_model, comment, "python")
+        assert preds == expected_labels
+
+
+def test_java_invariance_semantic_summary(java_model, get_predicted_labels):
+    """
+    Tests semantic invariance
+
+    """
+
+    # Get the prediction for the base comment
+    base_comment = "/** Returns the user ID. */"
+    base_preds = get_predicted_labels(java_model, base_comment, "java")
+
+    # Define semantic paraphrases of the base comment
+    variants = [
+        base_comment,
+        "/** Gets the user ID. */",
+        "/** Fetches the ID for the user. */",
+        "/** A method to return the user's ID. */",
+    ]
+
+    # Check that the base prediction is valid (summary)
+    assert "summary" in base_preds
+
+    for comment in variants:
+        preds = get_predicted_labels(java_model, comment, "java")
+        assert preds == base_preds
diff --git a/turing/tests/behavioral/test_minimum_functionality.py b/turing/tests/behavioral/test_minimum_functionality.py
new file mode 100644
index 0000000000000000000000000000000000000000..f088e7656bb98d67aaebbdfc1bc8da37a1dc5e74
--- /dev/null
+++ b/turing/tests/behavioral/test_minimum_functionality.py
@@ -0,0 +1,52 @@
+import pytest
+
+# These tests check for basic, obvious classifications
+
+
+@pytest.mark.parametrize(
+    "comment, expected_labels",
+    [
+        ("test getfilestatus and related listing operations.", {"summary"}),
+        ("/* @deprecated Use something else. */", {"deprecation"}),
+        ("code source of this file http grepcode.com", {"Pointer"}),
+        ("this is balanced if each pool is balanced.", {"rational"}),
+        ("// For internal use only.", {"Ownership"}),
+        ("this impl delegates to the old filesystem", {"Expand"}),
+        ("/** Usage: new MyClass(arg1). */", {"usage"}),
+    ],
+)
+def test_java_mft(java_model, comment, expected_labels, get_predicted_labels):
+    preds = get_predicted_labels(java_model, comment, "java")
+    assert preds == expected_labels
+
+
+@pytest.mark.parametrize(
+    "comment, expected_labels",
+    [
+        ("a service specific account of type bar.", {"Summary"}),
+        (":param user_id: The ID of the user.", {"Parameters"}),
+        ("# TODO: Refactor this entire block.", {"DevelopmentNotes"}),
+        ("use this class if you want access to all of the mechanisms", {"Usage"}),
+        ("# create a new list by filtering duplicates from the input", {"Expand"}),
+    ],
+)
+def test_python_mft(python_model, comment, expected_labels, get_predicted_labels):
+    preds = get_predicted_labels(python_model, comment, "python")
+    assert preds == expected_labels
+
+
+@pytest.mark.parametrize(
+    "comment, expected_labels",
+    [
+        ("i am a simple arrow like arrowhead.", {"Intent"}),
+        ("the example below shows how to create a simple element", {"Example"}),
+        ("i provide a data structure independent api", {"Responsibilities"}),
+        ("the cache is cleared after each test to ensure isolation.", {"Keyimplementationpoints"}),
+        ("it is possible hovewer to customize a length fraction", {"Keymessages"}),
+        ("collaborators: BlElement, BlSpace", {"Collaborators"}),
+    ],
+)
+def test_pharo_mft(pharo_model, comment, expected_labels, get_predicted_labels):
+    """Tests basic keyword-to-label mapping for Pharo (e.g., 'I am...')."""
+    preds = get_predicted_labels(pharo_model, comment, "pharo")
+    assert preds == expected_labels
diff --git a/turing/tests/conftest.py b/turing/tests/conftest.py
new file mode 100644
index 0000000000000000000000000000000000000000..16c5c6e5a26b65bf0e7651248082882a7ffea364
--- /dev/null
+++ b/turing/tests/conftest.py
@@ -0,0 +1,305 @@
+import os
+from pathlib import Path
+import sys
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import turing.config as config
+from turing.dataset import DatasetManager
+from turing.reporting import TestReportGenerator
+
+# --- Path Setup ---
+script_dir = os.path.dirname(os.path.abspath(__file__))
+proj_root = os.path.dirname(os.path.dirname(script_dir))
+sys.path.append(proj_root)
+
+train_dir = os.path.join(proj_root, "turing", "modeling")
+sys.path.insert(1, train_dir)
+
+
+try:
+    # Import train.py
+    import turing.modeling.train as train
+except ImportError as e:
+    pytest.skip(
+        f"Could not import 'train.py'. Check sys.path. Error: {e}", allow_module_level=True
+    )
+
+# --- Reporting Setup ---
+execution_results = []
+active_categories = set()
+
+
+def clean_test_name(nodeid):
+    """Pulisce il nome del test rimuovendo parametri lunghi."""
+    parts = nodeid.split("::")
+    test_name = parts[-1]
+    if len(test_name) > 50:
+        test_name = test_name[:47] + "..."
+    return test_name
+
+
+def format_error_message(long_repr):
+    """Estrae solo l'errore principale."""
+    if not long_repr:
+        return ""
+    lines = str(long_repr).split("\n")
+    last_line = lines[-1]
+    clean_msg = last_line.replace("|", "-").strip()
+    if len(clean_msg) > 60:
+        clean_msg = clean_msg[:57] + "..."
+    return clean_msg
+
+
+@pytest.hookimpl(tryfirst=True, hookwrapper=True)
+def pytest_runtest_makereport(item, call):
+    outcome = yield
+    report = outcome.get_result()
+
+    if report.when == "call":
+        path_str = str(item.fspath)
+        category = "GENERAL"
+
+        if "unit" in path_str:
+            category = "UNIT"
+        elif "behavioral" in path_str:
+            category = "BEHAVIORAL"
+        elif "modeling" in path_str:
+            category = "MODELING"
+
+        active_categories.add(category)
+
+        # Simplified status mapping
+        status_map = {"passed": "PASS", "failed": "FAIL", "skipped": "SKIP"}
+        status_str = status_map.get(report.outcome, report.outcome.upper())
+
+        execution_results.append(
+            {
+                "Category": category,
+                "Module": item.fspath.basename,
+                "Test Case": clean_test_name(item.nodeid),
+                "Result": status_str,
+                "Time": f"{report.duration:.2f}s",
+                "Message": format_error_message(report.longrepr) if report.failed else "",
+            }
+        )
+
+
+def pytest_sessionfinish(session, exitstatus):
+    """Generate enhanced test report at session end."""
+    if not execution_results:
+        return
+
+    report_type = (
+        f"{list(active_categories)[0].lower()}_tests"
+        if len(active_categories) == 1
+        else "unit_and_behavioral_tests"
+    )
+
+    try:
+        manager = TestReportGenerator(context_name="turing", report_category=report_type)
+        
+        # Main title
+        manager.add_header("Turing Test Execution Report")
+        manager.add_divider("section")
+        
+        # Environment info
+        manager.add_environment_metadata()
+        manager.add_divider("thin")
+
+        df = pd.DataFrame(execution_results)
+
+        # Sommario
+        total = len(df)
+        passed = len(df[df["Result"] == "[ PASS ]"])
+        failed = len(df[df["Result"] == "[ FAILED ]"])
+        summary = pd.DataFrame(
+            [
+                {
+                    "Total": total,
+                    "Passed": passed,
+                    "Failed": failed,
+                    "Success Rate": f"{(passed / total) * 100:.1f}%",
+                }
+            ]
+        )
+        manager.add_dataframe(summary, title="Executive Summary")
+
+        # Detailed breakdown by category
+        cols = ["Module", "Test Case", "Result", "Time", "Message"]
+        
+        if len(active_categories) > 1:
+            manager.add_header("Detailed Test Results by Category", level=2)
+            manager.add_divider("thin")
+            
+            for cat in sorted(active_categories):
+                subset = df[df["Category"] == cat][cols]
+                manager.add_dataframe(subset, title=f"{cat} Tests")
+        else:
+            manager.add_alert_box(
+                "All tests passed successfully!",
+                box_type="success"
+            )
+
+        manager.save("report.md")
+    except Exception as e:
+        print(f"\nError generating report: {e}")
+
+
+# --- Fixtures ---
+
+
+@pytest.fixture(scope="function")
+def manager() -> DatasetManager:
+    """
+    Provides a instance of DatasetManager for each test.
+    """
+    return DatasetManager()
+
+
+@pytest.fixture(scope="function")
+def fake_csv_data_dir(tmp_path: Path) -> Path:
+    """
+    Creates a temporary directory structure mocking 'data/interim/features/clean-aug-soft-k5000'
+    and populates it with minimal, valid CSV files for testing.
+
+    Returns:
+        Path: The path to the *parent* of 'features' (e.g., the mocked INTERIM_DATA_DIR).
+    """
+    interim_dir = tmp_path / "interim_test"
+    features_dir = interim_dir / "features" / "clean-aug-soft-k5000"
+    features_dir.mkdir(parents=True, exist_ok=True)
+
+    # Define minimal valid CSV content
+    csv_content = (
+        "combo,labels\n"
+        '"java code text","[1, 0, 0, 0, 0, 0, 0]"\n'
+        '"other java code","[0, 1, 0, 0, 0, 0, 0]"\n'
+    )
+
+    # Write mock files
+    (features_dir / "java_train.csv").write_text(csv_content)
+    (features_dir / "java_test.csv").write_text(csv_content)
+
+    # Return the root of the mocked interim directory
+    return interim_dir
+
+
+@pytest.fixture(scope="session")
+def mock_data():
+    """
+    Provides a minimal, consistent, session-scoped dataset for model testing.
+    This simulates the (X, y) data structure used for training and evaluation.
+    """
+    X = [
+        "this is java code for summary",
+        "python is great for parameters",
+        "a java example for usage",
+        "running python script for development notes",
+        "pharo is a language for intent",
+        "another java rational example",
+    ]
+
+    # Mock labels for a 'java' model (7 categories)
+    # Shape (6 samples, 7 features)
+    y = np.array(
+        [
+            [1, 0, 0, 0, 0, 0, 0],
+            [0, 1, 0, 0, 0, 0, 0],
+            [1, 0, 0, 1, 0, 0, 0],
+            [0, 0, 1, 0, 0, 0, 0],
+            [0, 0, 0, 0, 1, 0, 0],
+            [1, 0, 0, 0, 0, 0, 1],
+        ]
+    )
+    return {"X": X, "y": y}
+
+
+@pytest.fixture(scope="module")
+def trained_rf_model(mock_data, tmp_path_factory):
+    """
+    Provides a fully-trained RandomForestTfIdf model instance.
+    """
+    # Import locally to ensure proj_root is set
+    from modeling.models.randomForestTfIdf import RandomForestTfIdf
+
+    # Arrange
+    model = RandomForestTfIdf(language="java")
+
+    # Monkeypatch grid search parameters for maximum speed
+    model.grid_params = {
+        "tfidf__max_features": [10, 20],  # Use minimal features
+        "clf__estimator__n_estimators": [2, 5],  # Use minimal trees
+    }
+    model.params["cv_folds"] = 2  # Use minimal CV folds
+
+    # Create a persistent temp dir for this module's run
+    model_path = tmp_path_factory.mktemp("trained_rf_model")
+
+    # Act: Train the model
+    model.train(mock_data["X"], mock_data["y"], path=str(model_path), model_name="test_model")
+
+    # Yield the trained model and its save path
+    yield model, model_path
+
+
+MODEL_CLASS_TO_TEST = train.MODEL_CLASS
+MODEL_EXPERIMENT_NAME = train.EXP_NAME
+MODEL_NAME_BASE = train.MODEL_NAME
+
+
+@pytest.fixture(scope="session")
+def get_predicted_labels():
+    def _helper(model, comment_sentence: str, lang: str) -> set:
+        if config.INPUT_COLUMN == "combo":
+            combo_input = f"DummyClass.{lang} | {comment_sentence}"
+            input_data = [combo_input]
+        else:
+            input_data = [comment_sentence]
+
+        prediction_array = model.predict(input_data)[0]
+        labels_map = config.LABELS_MAP[lang]
+        predicted_labels = {labels_map[i] for i, val in enumerate(prediction_array) if val == 1}
+        return predicted_labels
+
+    return _helper
+
+
+@pytest.fixture(scope="module")
+def java_model():
+    """Loads the Java model from the config path"""
+    model_path = os.path.join(config.MODELS_DIR, MODEL_EXPERIMENT_NAME, f"{MODEL_NAME_BASE}_java")
+    if not os.path.exists(model_path):
+        pytest.skip(
+            "Production model not found. Skipping behavioral tests for Java.",
+            allow_module_level=True,
+        )
+    return MODEL_CLASS_TO_TEST(language="java", path=model_path)
+
+
+@pytest.fixture(scope="module")
+def python_model():
+    """Loads the Python model from the config path"""
+    model_path = os.path.join(
+        config.MODELS_DIR, MODEL_EXPERIMENT_NAME, f"{MODEL_NAME_BASE}_python"
+    )
+    if not os.path.exists(model_path):
+        pytest.skip(
+            "Production model not found. Skipping behavioral tests for Python.",
+            allow_module_level=True,
+        )
+    return MODEL_CLASS_TO_TEST(language="python", path=model_path)
+
+
+@pytest.fixture(scope="module")
+def pharo_model():
+    """Loads the Pharo model from the config path"""
+    model_path = os.path.join(config.MODELS_DIR, MODEL_EXPERIMENT_NAME, f"{MODEL_NAME_BASE}_pharo")
+    if not os.path.exists(model_path):
+        pytest.skip(
+            "Production model not found. Skipping behavioral tests for Pharo.",
+            allow_module_level=True,
+        )
+    return MODEL_CLASS_TO_TEST(language="pharo", path=model_path)
diff --git a/turing/tests/unit/test_api.py b/turing/tests/unit/test_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..27d7dccf645c0c12bfd920cb86ea02ded61e3f7c
--- /dev/null
+++ b/turing/tests/unit/test_api.py
@@ -0,0 +1,201 @@
+from unittest.mock import patch
+
+from fastapi.testclient import TestClient
+import numpy as np
+import pytest
+
+from turing.api.app import app
+from turing.api.schemas import PredictionRequest, PredictionResponse
+
+
+@pytest.fixture
+def client():
+    """Fixture that provides a test client for the FastAPI app."""
+    return TestClient(app)
+
+
+@pytest.fixture
+def mock_inference_engine():
+    """Fixture that provides a mocked inference engine."""
+    with patch('turing.api.app.inference_engine') as mock:
+        yield mock
+
+
+class TestHealthCheck:
+    """Test suite for the health check endpoint."""
+    
+    def test_health_check_returns_ok(self, client):
+        """Test that the health check endpoint returns status ok."""
+        response = client.get("/")
+        assert response.status_code == 200
+        assert response.json() == {
+            "status": "ok",
+            "message": "Turing Code Classification API is ready."
+        }
+
+
+class TestPredictEndpoint:
+    """Test suite for the predict endpoint."""
+    
+    def test_predict_success_java(self, client, mock_inference_engine):
+        """Test successful prediction for Java code."""
+        # Setup mock
+        mock_inference_engine.predict_payload.return_value = (
+            np.array([0, 1]),  # raw predictions as numpy array
+            ["class", "method"],  # labels
+            "run_id_123",  # run_id
+            "models:/CodeBERTa_java/Production"  # artifact
+        )
+        
+        # Make request
+        request_data = {
+            "texts": ["public class Main", "public void test()"],
+            "language": "java"
+        }
+        response = client.post("/predict", json=request_data)
+        
+        # Assertions
+        assert response.status_code == 200
+        data = response.json()
+        assert "predictions" in data
+        assert "labels" in data
+        assert "model_info" in data
+        assert data["labels"] == ["class", "method"]
+        assert data["model_info"]["language"] == "java"
+    
+    def test_predict_success_python(self, client, mock_inference_engine):
+        """Test successful prediction for Python code."""
+        # Setup mock
+        mock_inference_engine.predict_payload.return_value = (
+            np.array([1, 0]),  # raw predictions as numpy array
+            ["function", "class"],  # labels
+            "run_id_456",  # run_id
+            "models:/CodeBERTa_python/Production"  # artifact
+        )
+        
+        # Make request
+        request_data = {
+            "texts": ["def main():", "class MyClass:"],
+            "language": "python"
+        }
+        response = client.post("/predict", json=request_data)
+        
+        # Assertions
+        assert response.status_code == 200
+        data = response.json()
+        assert data["labels"] == ["function", "class"]
+        assert data["model_info"]["language"] == "python"
+    
+    def test_predict_success_pharo(self, client, mock_inference_engine):
+        """Test successful prediction for Pharo code."""
+        # Setup mock
+        mock_inference_engine.predict_payload.return_value = (
+            np.array([0]),  # raw predictions as numpy array
+            ["method"],  # labels
+            "run_id_789",  # run_id
+            "models:/CodeBERTa_pharo/Production"  # artifact
+        )
+        
+        # Make request
+        request_data = {
+            "texts": ["initialize"],
+            "language": "pharo"
+        }
+        response = client.post("/predict", json=request_data)
+        
+        # Assertions
+        assert response.status_code == 200
+        data = response.json()
+        assert data["labels"] == ["method"]
+        assert data["model_info"]["language"] == "pharo"
+    
+    def test_predict_missing_texts(self, client):
+        """Test that prediction fails when texts are missing."""
+        request_data = {
+            "language": "java"
+        }
+        response = client.post("/predict", json=request_data)
+        assert response.status_code == 422  # Validation error
+    
+    def test_predict_missing_language(self, client):
+        """Test that prediction fails when language is missing."""
+        request_data = {
+            "texts": ["public class Main"]
+        }
+        response = client.post("/predict", json=request_data)
+        assert response.status_code == 422  # Validation error
+    
+    def test_predict_empty_texts(self, client, mock_inference_engine):
+        """Test prediction with empty texts list."""
+        mock_inference_engine.predict_payload.return_value = (
+            np.array([]),  # raw predictions as empty numpy array
+            [],  # labels
+            "run_id_000",  # run_id
+            "models:/CodeBERTa_java/Production"  # artifact
+        )
+        
+        request_data = {
+            "texts": [],
+            "language": "java"
+        }
+        response = client.post("/predict", json=request_data)
+        
+        # Should succeed with empty results
+        assert response.status_code == 200
+        data = response.json()
+        assert data["predictions"] == []
+        assert data["labels"] == []
+    
+    def test_predict_error_handling(self, client, mock_inference_engine):
+        """Test that prediction endpoint handles errors gracefully."""
+        # Setup mock to raise an exception
+        mock_inference_engine.predict_payload.side_effect = Exception("Model loading failed")
+        
+        request_data = {
+            "texts": ["public class Main"],
+            "language": "java"
+        }
+        response = client.post("/predict", json=request_data)
+        
+        # Should return 500 error
+        assert response.status_code == 500
+        assert "Model loading failed" in response.json()["detail"]
+    
+    def test_predict_invalid_language(self, client, mock_inference_engine):
+        """Test prediction with invalid language parameter."""
+        # The model might raise an error for unsupported language
+        mock_inference_engine.predict_payload.side_effect = ValueError("Unsupported language: cobol")
+        
+        request_data = {
+            "texts": ["IDENTIFICATION DIVISION."],
+            "language": "cobol"
+        }
+        response = client.post("/predict", json=request_data)
+        
+        # Should return 500 error
+        assert response.status_code == 500
+        assert "Unsupported language" in response.json()["detail"]
+
+
+class TestAPISchemas:
+    """Test suite for API schemas validation."""
+    
+    def test_prediction_request_valid(self):
+        """Test that PredictionRequest validates correct data."""
+        request = PredictionRequest(
+            texts=["public void main"],
+            language="java"
+        )
+        assert request.texts == ["public void main"]
+        assert request.language == "java"
+    
+    def test_prediction_response_valid(self):
+        """Test that PredictionResponse validates correct data."""
+        response = PredictionResponse(
+            predictions=[0, 1],
+            labels=["class", "method"],
+            model_info={"artifact": "models:/CodeBERTa_java/Production", "language": "java"}
+        )
+        assert response.predictions == [0, 1]
+        assert response.labels == ["class", "method"]
+        assert response.model_info["language"] == "java"
diff --git a/turing/tests/unit/test_config.py b/turing/tests/unit/test_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..fac2d83ad3d5ce0c0f15e658850e8b7f46b842e2
--- /dev/null
+++ b/turing/tests/unit/test_config.py
@@ -0,0 +1,133 @@
+import importlib
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+# Import the module to be tested
+import turing.config as config
+
+
+@pytest.mark.config
+class TestConfig:
+    """
+    Test suite for validating the project's configuration module (config.py).
+
+    These tests verify that paths are structured correctly, critical constants
+    are of the expected type and value, and module-level logic
+    (like calculations and .env loading) executes as intended.
+    """
+
+    def test_proj_root_is_correctly_identified(self):
+        """
+        Validates that PROJ_ROOT is a Path object and points to the
+        actual project root directory (which should contain 'pyproject.toml').
+        """
+        assert isinstance(config.PROJ_ROOT, Path)
+        assert config.PROJ_ROOT.is_dir()
+
+        # A common "sanity check" is to look for a known file at the root
+        expected_file = config.PROJ_ROOT / "pyproject.toml"
+        assert expected_file.is_file(), (
+            f"PROJ_ROOT ({config.PROJ_ROOT}) does not seem to be the project root. "
+            f"Could not find {expected_file}"
+        )
+
+    def test_directory_paths_are_correctly_structured(self):
+        """
+        Ensures all key directory variables are Path objects
+        and are correctly parented under PROJ_ROOT.
+        """
+        # List of all directory variables defined in config.py
+        path_vars = [
+            config.DATA_DIR,
+            config.RAW_DATA_DIR,
+            config.INTERIM_DATA_DIR,
+            config.PROCESSED_DATA_DIR,
+            config.EXTERNAL_DATA_DIR,
+            config.MODELS_DIR,
+            config.REPORTS_DIR,
+            config.FIGURES_DIR,
+        ]
+
+        for path_var in path_vars:
+            assert isinstance(path_var, Path)
+            # Check that PROJ_ROOT is an ancestor of this path
+            assert config.PROJ_ROOT in path_var.parents
+
+        # Spot-check a few for correct relative paths
+        assert config.DATA_DIR == config.PROJ_ROOT / "data"
+        assert config.RAW_DATA_DIR == config.PROJ_ROOT / "data" / "raw"
+        assert config.FIGURES_DIR == config.PROJ_ROOT / "reports" / "figures"
+
+    def test_dataset_constants_are_valid(self):
+        """
+        Validates that critical dataset constants are non-empty and of
+        the correct type.
+        """
+        assert isinstance(config.DATASET_HF_ID, str)
+        assert config.DATASET_HF_ID == "NLBSE/nlbse26-code-comment-classification"
+
+        assert isinstance(config.LANGS, list)
+        assert len(config.LANGS) == 3
+        assert "java" in config.LANGS
+
+        assert isinstance(config.INPUT_COLUMN, str) and config.INPUT_COLUMN
+        assert isinstance(config.LABEL_COLUMN, str) and config.LABEL_COLUMN
+
+    def test_labels_map_and_total_categories_are_correct(self):
+        """
+        Validates the LABELS_MAP structure and ensures TOTAL_CATEGORIES
+        is correctly calculated from it.
+        """
+        assert isinstance(config.LABELS_MAP, dict)
+
+        # Ensure all languages in LANGS are keys in LABELS_MAP
+        for lang in config.LANGS:
+            assert lang in config.LABELS_MAP
+            assert isinstance(config.LABELS_MAP[lang], list)
+            assert len(config.LABELS_MAP[lang]) > 0
+
+        # Validate the derived calculation
+        expected_total = (
+            len(config.LABELS_MAP["java"])
+            + len(config.LABELS_MAP["python"])
+            + len(config.LABELS_MAP["pharo"])
+        )
+        assert config.TOTAL_CATEGORIES == expected_total
+        assert config.TOTAL_CATEGORIES == 18  # 7 + 5 + 6
+
+    def test_numeric_parameters_are_positive(self):
+        """
+        Ensures that numeric scoring and training parameters are positive
+        and of the correct type.
+        """
+        numeric_params = {
+            "MAX_AVG_RUNTIME": config.MAX_AVG_RUNTIME,
+            "MAX_AVG_FLOPS": config.MAX_AVG_FLOPS,
+            "DEFAULT_BATCH_SIZE": config.DEFAULT_BATCH_SIZE,
+            "DEFAULT_NUM_ITERATIONS": config.DEFAULT_NUM_ITERATIONS,
+        }
+
+        for name, value in numeric_params.items():
+            assert isinstance(value, (int, float)), f"{name} is not numeric"
+            assert value > 0, f"{name} must be positive"
+
+    @patch("dotenv.load_dotenv")
+    def test_load_dotenv_is_called_on_module_load(self, mock_load_dotenv):
+        """
+        Tests that the load_dotenv() function is executed when the
+        config.py module is loaded.
+
+        This requires reloading the module, as it's likely already been
+        imported by pytest or conftest.
+        """
+        # Arrange (Patch is active)
+
+        # Act
+        # Reload the config module to trigger its top-level statements
+        importlib.reload(config)
+
+        # Assert
+        # Check that the patched load_dotenv was called
+        mock_load_dotenv.assert_called_once()
diff --git a/turing/tests/unit/test_dataset.py b/turing/tests/unit/test_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..271b19932e6fcf88faf65fc3b15cc846c8003961
--- /dev/null
+++ b/turing/tests/unit/test_dataset.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+
+import pytest
+
+# Project modules are importable thanks to conftest.py
+import turing.config as config
+from turing.dataset import DatasetManager
+
+
+@pytest.mark.data_loader
+class TestDatasetManager:
+    """
+    Unit tests for the DatasetManager class.
+    This test suite validates initialization, data transformation logic,
+    and data loading mechanisms, including error handling.
+    """
+
+    def test_initialization_paths_are_correct(self, manager: DatasetManager):
+        """
+        Verifies that the DatasetManager initializes with the correct
+        Hugging Face ID and constructs its paths as expected.
+        """
+        assert manager.hf_id == "NLBSE/nlbse26-code-comment-classification"
+        assert "data/raw" in str(manager.raw_data_dir)
+        # base_interim_path should contain either 'base' or 'features'
+        path_str = str(manager.base_interim_path)
+        assert "data/interim" in path_str and ("base" in path_str or "features" in path_str)
+
+    @pytest.mark.parametrize(
+        "input_labels, expected_output",
+        [
+            ([1, 0, 1], "[1, 0, 1]"),  # Case: Standard list
+            ("[1, 0, 1]", "[1, 0, 1]"),  # Case: Already a string
+            ([], "[]"),  # Case: Empty list
+            (None, None),  # Case: None value
+        ],
+    )
+    def test_format_labels_for_csv(self, manager: DatasetManager, input_labels, expected_output):
+        """
+        Tests the internal _format_labels_for_csv method to ensure
+        it correctly serializes label lists (or handles other inputs) to strings.
+        """
+        # Arrange
+        example = {"labels": input_labels}
+
+        # Act
+        formatted_example = manager._format_labels_for_csv(example)
+
+        # Assert
+        assert formatted_example["labels"] == expected_output
+
+    def test_get_dataset_raises_file_not_found(self, monkeypatch):
+        """
+        Ensures that get_dataset() raises a FileNotFoundError when
+        the target interim CSV files do not exist.
+        """
+        # Arrange
+        # Patch the config to point to a non-existent directory
+        fake_dir = Path("/path/that/is/totally/fake")
+        monkeypatch.setattr(config, "INTERIM_DATA_DIR", fake_dir)
+
+        # Manager must be initialized *after* patching config
+        manager_with_fake_path = DatasetManager()
+
+        # Act & Assert
+        with pytest.raises(FileNotFoundError, match="Dataset CSV files not found."):
+            manager_with_fake_path.get_dataset()
+
+    def test_get_dataset_success_and_label_parsing(self, fake_csv_data_dir: Path, monkeypatch):
+        """
+        Verifies that get_dataset() successfully loads data from mock CSVs
+        and correctly parses the string-formatted labels back into lists.
+        """
+        # Arrange
+        # Point the config at our temporary fixture directory
+        monkeypatch.setattr(config, "INTERIM_DATA_DIR", fake_csv_data_dir)
+        manager = DatasetManager()
+
+        # Act
+        dataset = manager.get_dataset()
+
+        # Assert
+        # Check that the correct splits were loaded
+        assert "java_train" in dataset
+        assert "java_test" in dataset
+        assert "python_train" not in dataset  # Confirms only found files are loaded
+
+        # Check content integrity
+        assert len(dataset["java_train"]) == 2
+        assert dataset["java_train"][0]["combo"] == "java code text"
+
+        # Ccheck that the string '[1, 0, ...]' was parsed back to a list
+        expected_labels = [1, 0, 0, 0, 0, 0, 0]
+        assert dataset["java_train"][0]["labels"] == expected_labels
+        assert isinstance(dataset["java_train"][0]["labels"], list)
diff --git a/turing/tests/unit/test_features.py b/turing/tests/unit/test_features.py
new file mode 100644
index 0000000000000000000000000000000000000000..6593a6425d4e8fb345c69db2e227308d5e90fb5d
--- /dev/null
+++ b/turing/tests/unit/test_features.py
@@ -0,0 +1,121 @@
+import pandas as pd
+import pytest
+
+from turing.features import (
+    FeatureEngineer,
+    FeaturePipelineConfig,
+    TextProcessor,
+)
+
+# --- Fixtures ---
+
+
+@pytest.fixture(scope="module")
+def full_config():
+    """Returns a config with stopwords and lemmatization enabled."""
+    return FeaturePipelineConfig(
+        use_stopwords=True,
+        use_lemmatization=True,
+        use_combo_feature=False,
+        max_features=5000,
+        min_comment_length=10,
+        max_comment_length=500,
+        enable_augmentation=False,
+        custom_tags="test",
+    )
+
+
+@pytest.fixture(scope="module")
+def basic_config():
+    """Returns a config with all extra steps disabled."""
+    return FeaturePipelineConfig(
+        use_stopwords=False,
+        use_lemmatization=False,
+        use_combo_feature=False,
+        max_features=100,
+        min_comment_length=5,
+        max_comment_length=200,
+        enable_augmentation=False,
+    )
+
+
+@pytest.fixture(scope="module")
+def full_processor(full_config):
+    """A TextProcessor with all steps enabled."""
+    return TextProcessor(config=full_config, language="english")
+
+
+@pytest.fixture(scope="module")
+def basic_processor(basic_config):
+    """A TextProcessor with only basic cleaning (lowercase, punctuation)."""
+    return TextProcessor(config=basic_config, language="english")
+
+
+# --- Tests ---
+
+
+class TestFeaturePipelineConfig:
+    def test_config_id_generation(self, full_config, basic_config):
+        """Tests that the readable ID is generated correctly."""
+        assert full_config.hash_id == "clean-k5000-test"
+        assert basic_config.hash_id == "clean-k100"
+
+    def test_config_attributes(self, full_config):
+        """Tests that attributes are set correctly."""
+        assert full_config.use_stopwords is True
+        assert full_config.use_lemmatization is True
+        assert full_config.max_features == 5000
+
+
+class TestTextProcessor:
+    def test_clean_text_basic(self, basic_processor):
+        """Tests lowercase and punctuation removal."""
+        text = "This is a TEST... with punctuation!!"
+        expected = "this is a test with punctuation"
+        assert basic_processor.clean_text(text) == expected
+
+    def test_clean_text_stopwords(self, full_processor, basic_processor):
+        """Tests stopword removal logic."""
+        text = "this is a test with a stopword"
+
+        # With stopwords enabled
+        expected_full = "test stopword"
+        assert full_processor.clean_text(text) == expected_full
+
+        # With stopwords disabled
+        expected_basic = "this is a test with a stopword"
+        assert basic_processor.clean_text(text) == expected_basic
+
+    def test_clean_text_lemmatization(self, full_processor, basic_processor):
+        """Tests lemmatization logic."""
+        text = "running tests while dogs are barking"
+
+        # With lemmatization enabled
+        expected_full = "running test dog barking"  # 'are' and 'while' are stopwords
+        assert full_processor.clean_text(text) == expected_full
+
+        # With lemmatization disabled
+        expected_basic = "running tests while dogs are barking"
+        assert basic_processor.clean_text(text) == expected_basic
+
+    def test_clean_text_handles_none(self, basic_processor):
+        """Tests that it doesn't crash on None or pd.NA."""
+        assert basic_processor.clean_text(None) == ""
+        assert basic_processor.clean_text(pd.NA) == ""
+
+
+class TestFeatureEngineer:
+    def test_extract_numeric_features(self, basic_config):
+        """Tests that extract_features_for_check adds metadata features."""
+        fe = FeatureEngineer(config=basic_config)
+        data = {"comment_sentence": ["This is short.", "This one is a bit longer.", ""]}
+        df = pd.DataFrame(data)
+        df_out = fe.extract_features_for_check(df)
+
+        assert "f_length" in df_out.columns
+        assert "f_word_count" in df_out.columns
+        assert "f_starts_verb" in df_out.columns
+        assert "text_hash" in df_out.columns
+
+        assert df_out["f_length"].tolist() == [14, 25, 0]
+        assert df_out["f_word_count"].tolist() == [3, 6, 0]
diff --git a/turing/tests/unit/test_model.py b/turing/tests/unit/test_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ee8173e09935280b17ae0eae20ec1b1dbcb764b
--- /dev/null
+++ b/turing/tests/unit/test_model.py
@@ -0,0 +1,135 @@
+import inspect
+
+import numpy as np
+import pytest
+
+from turing.config import EXISTING_MODELS
+import turing.modeling.models as my_models
+
+
+@pytest.fixture
+def get_model(request: str):
+    """Fixture that returns a list of existing model names."""
+    model_name = request.param
+
+    module = getattr(my_models, model_name, None)
+
+    classes = [
+        cls
+        for _, cls in inspect.getmembers(module, inspect.isclass)
+        if cls.__module__ == module.__name__
+    ]
+
+    cls = classes[0]
+
+    from turing.config import LANGS
+
+    lang = LANGS[0]
+    return cls(language=lang)
+
+
+@pytest.mark.parametrize("get_model", EXISTING_MODELS, indirect=True)
+def test_model_initialization(get_model):
+    """
+    Test that each model class can be initialized without errors.
+    """
+    model = get_model
+    assert model is not None
+    from turing.modeling.baseModel import BaseModel
+
+    assert isinstance(model, BaseModel)
+
+
+@pytest.mark.parametrize("get_model", EXISTING_MODELS, indirect=True)
+def test_model_setup(get_model):
+    """
+    Test that each model class sets up its internal model correctly.
+    """
+    model = get_model
+    model.setup_model()
+    assert model.model is not None
+
+
+@pytest.mark.parametrize("get_model", EXISTING_MODELS, indirect=True)
+def test_model_train(tmp_path, get_model):
+    """
+    Test that each model class can run the train method without errors.
+    """
+    model = get_model
+    model.setup_model()
+
+    # Using mock data for training
+    X_train = ["sample text data"] * 10
+
+    y_train = [0, 1] * 5
+
+    y_train = np.array(y_train).reshape(-1, 1)
+
+    # fake directory and model name
+    fake_path = tmp_path / "out"
+    fake_path.mkdir()
+
+    parameters = model.train(X_train, y_train)
+
+    assert isinstance(parameters, dict)
+    assert model.model is not None
+
+
+@pytest.mark.parametrize("get_model", EXISTING_MODELS, indirect=True)
+def test_model_evaluate(tmp_path, get_model):
+    """
+    Test that each model class can run the evaluate method without errors.
+    """
+    model = get_model
+    model.setup_model()
+
+    # Using mock data for training
+    X_train = ["sample text data"] * 10
+
+    y_train = [0, 1] * 5
+
+    y_train = np.array(y_train).reshape(-1, 1)
+
+    # fake directory and model name
+    fake_path = tmp_path / "out"
+    fake_path.mkdir()
+
+    _ = model.train(X_train, y_train)
+
+    # Using mock data for evaluation
+    X_test = ["sample text data"] * 10
+    y_test = [0, 1] * 5
+    metrics = model.evaluate(X_test, y_test)
+
+    assert isinstance(metrics, dict)
+    assert metrics and "accuracy" in metrics
+    assert "f1_score" in metrics or "f1_score_micro" in metrics
+
+
+@pytest.mark.parametrize("get_model", EXISTING_MODELS, indirect=True)
+def test_model_predict(tmp_path, get_model):
+    """
+    Test that each model class can run the predict method without errors.
+    """
+    model = get_model
+    model.setup_model()
+
+    # Using mock data for training
+    X_train = ["sample text data"] * 10
+
+    y_train = [0, 1] * 5
+
+    y_train = np.array(y_train).reshape(-1, 1)
+
+    # fake directory and model name
+    fake_path = tmp_path / "out"
+    fake_path.mkdir()
+
+    _ = model.train(X_train, y_train)
+
+    # Using mock data for prediction
+    X_input = ["sample text data"] * 3
+    predictions = model.predict(X_input)
+
+    assert predictions is not None
+    assert len(predictions) == len(X_input)