Spaces:
Running
Running
Added citation
Browse files- app.py +1 -6
- content.py +14 -7
app.py
CHANGED
|
@@ -18,12 +18,7 @@ from src.utils import(
|
|
| 18 |
load_raw_model_data,
|
| 19 |
build_year_column_mapping,
|
| 20 |
)
|
| 21 |
-
from content import LLMLAGBENCH_INTRO, LEADERBOARD_INTRO, MODEL_COMPARISON_INTRO, AUTHORS
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
# TODO move to file
|
| 25 |
-
CIT_BTN_TEXT = ""
|
| 26 |
-
CIT_BTN_LABEL = ""
|
| 27 |
|
| 28 |
|
| 29 |
### CONFIGURATION
|
|
|
|
| 18 |
load_raw_model_data,
|
| 19 |
build_year_column_mapping,
|
| 20 |
)
|
| 21 |
+
from content import LLMLAGBENCH_INTRO, LEADERBOARD_INTRO, MODEL_COMPARISON_INTRO, AUTHORS, CIT_BTN_TEXT, CIT_BTN_LABEL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
### CONFIGURATION
|
content.py
CHANGED
|
@@ -1,8 +1,3 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Content text for the LLMLagBench application.
|
| 3 |
-
Contains descriptive text for various sections of the UI.
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
# Section under main title
|
| 7 |
LLMLAGBENCH_INTRO = """
|
| 8 |
Large Language Models (LLMs) are pretrained on textual data up to a specific temporal cutoff, creating
|
|
@@ -11,7 +6,7 @@ external sources. More subtly, when this limitation is unknown or ignored, LLMs
|
|
| 11 |
outdated time-sensitive information with general knowledge during reasoning tasks, **potentially
|
| 12 |
compromising response accuracy**.
|
| 13 |
|
| 14 |
-
LLMLagBench provides a systematic approach for **identifying the earliest probable temporal boundaries** of
|
| 15 |
an LLM's training data by evaluating its knowledge of recent events. The benchmark comprises of **1,700+ curated questions** about events sampled from news reports published between 2020-2025 (we plan to update the question set regularly). Each
|
| 16 |
question could not be accurately answered before the event was reported in news media. We evaluate model
|
| 17 |
responses using a **0-2 scale faithfulness metric** and apply the **PELT (Pruned Exact Linear Time)** changepoint
|
|
@@ -62,4 +57,16 @@ AUTHORS = """
|
|
| 62 |
<div style='text-align: center; font-size: 0.9em; color: #666; margin-top: 5px; margin-bottom: 15px;'>
|
| 63 |
<em>Piotr Pęzik, Konrad Kaczyński, Maria Szymańska, Filip Żarnecki, Zuzanna Deckert, Jakub Kwiatkowski, Wojciech Janowski</em>
|
| 64 |
</div>
|
| 65 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Section under main title
|
| 2 |
LLMLAGBENCH_INTRO = """
|
| 3 |
Large Language Models (LLMs) are pretrained on textual data up to a specific temporal cutoff, creating
|
|
|
|
| 6 |
outdated time-sensitive information with general knowledge during reasoning tasks, **potentially
|
| 7 |
compromising response accuracy**.
|
| 8 |
|
| 9 |
+
LLMLagBench (https://arxiv.org/abs/2511.12116) provides a systematic approach for **identifying the earliest probable temporal boundaries** of
|
| 10 |
an LLM's training data by evaluating its knowledge of recent events. The benchmark comprises of **1,700+ curated questions** about events sampled from news reports published between 2020-2025 (we plan to update the question set regularly). Each
|
| 11 |
question could not be accurately answered before the event was reported in news media. We evaluate model
|
| 12 |
responses using a **0-2 scale faithfulness metric** and apply the **PELT (Pruned Exact Linear Time)** changepoint
|
|
|
|
| 57 |
<div style='text-align: center; font-size: 0.9em; color: #666; margin-top: 5px; margin-bottom: 15px;'>
|
| 58 |
<em>Piotr Pęzik, Konrad Kaczyński, Maria Szymańska, Filip Żarnecki, Zuzanna Deckert, Jakub Kwiatkowski, Wojciech Janowski</em>
|
| 59 |
</div>
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
CIT_BTN_TEXT = """@misc{pęzik2025llmlagbenchidentifyingtemporaltraining,
|
| 63 |
+
title={LLMLagBench: Identifying Temporal Training Boundaries in Large Language Models},
|
| 64 |
+
author={Piotr Pęzik and Konrad Kaczyński and Maria Szymańska and Filip Żarnecki and Zuzanna Deckert and Jakub Kwiatkowski and Wojciech Janowski},
|
| 65 |
+
year={2025},
|
| 66 |
+
eprint={2511.12116},
|
| 67 |
+
archivePrefix={arXiv},
|
| 68 |
+
primaryClass={cs.CL},
|
| 69 |
+
url={https://arxiv.org/abs/2511.12116},
|
| 70 |
+
}"""
|
| 71 |
+
|
| 72 |
+
CIT_BTN_LABEL = "📄 BibTeX Citation"
|