Tristan Thrush
commited on
Commit
·
3f9ceca
1
Parent(s):
ac14940
simplified pushing to hub
Browse files
app.py
CHANGED
|
@@ -11,8 +11,8 @@ from huggingface_hub import Repository
|
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
from pathlib import Path
|
| 13 |
import json
|
| 14 |
-
from filelock import FileLock
|
| 15 |
from utils import force_git_push
|
|
|
|
| 16 |
|
| 17 |
# These variables are for storing the mturk HITs in a Hugging Face dataset.
|
| 18 |
if Path(".env").is_file():
|
|
@@ -26,6 +26,25 @@ repo = Repository(
|
|
| 26 |
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
|
| 27 |
)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Now let's run the app!
|
| 30 |
pipe = pipeline("sentiment-analysis")
|
| 31 |
|
|
@@ -80,6 +99,14 @@ with demo:
|
|
| 80 |
toggle_final_submit_preview = gr.update(visible=done)
|
| 81 |
toggle_final_submit = gr.update(visible=False)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, toggle_final_submit_preview, new_state_md, dummy
|
| 84 |
|
| 85 |
# Input fields
|
|
@@ -96,32 +123,6 @@ with demo:
|
|
| 96 |
with gr.Column(visible=False) as final_submit_preview:
|
| 97 |
submit_hit_button_preview = gr.Button("Submit Work (preview mode; no mturk HIT credit)")
|
| 98 |
|
| 99 |
-
# Store the HIT data into a Hugging Face dataset.
|
| 100 |
-
# The HIT is also stored and logged on mturk when post_hit_js is run below.
|
| 101 |
-
# This _store_in_huggingface_dataset function just demonstrates how easy it is
|
| 102 |
-
# to automatically create a Hugging Face dataset from mturk.
|
| 103 |
-
def _store_in_huggingface_dataset(state):
|
| 104 |
-
lock = FileLock(DATA_FILE + ".lock")
|
| 105 |
-
lock.acquire()
|
| 106 |
-
try:
|
| 107 |
-
with open(DATA_FILE, "a") as jsonlfile:
|
| 108 |
-
json_data_with_assignment_id =\
|
| 109 |
-
[json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
|
| 110 |
-
jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
|
| 111 |
-
|
| 112 |
-
if repo.is_repo_clean():
|
| 113 |
-
logger.info("Repo currently clean. Ignoring push_to_hub")
|
| 114 |
-
return None
|
| 115 |
-
repo.git_add(auto_lfs_track=True)
|
| 116 |
-
repo.git_commit("Auto commit by space")
|
| 117 |
-
if FORCE_PUSH == "yes":
|
| 118 |
-
force_git_push(repo)
|
| 119 |
-
else:
|
| 120 |
-
repo.git_push()
|
| 121 |
-
finally:
|
| 122 |
-
lock.release()
|
| 123 |
-
return state
|
| 124 |
-
|
| 125 |
# Button event handlers
|
| 126 |
get_window_location_search_js = """
|
| 127 |
function(text_input, label_input, state, dummy) {
|
|
@@ -157,7 +158,7 @@ with demo:
|
|
| 157 |
"""
|
| 158 |
|
| 159 |
submit_hit_button.click(
|
| 160 |
-
|
| 161 |
inputs=[state],
|
| 162 |
outputs=[state],
|
| 163 |
_js=post_hit_js,
|
|
@@ -173,7 +174,7 @@ with demo:
|
|
| 173 |
"""
|
| 174 |
|
| 175 |
submit_hit_button_preview.click(
|
| 176 |
-
|
| 177 |
inputs=[state],
|
| 178 |
outputs=[state],
|
| 179 |
_js=refresh_app_js,
|
|
|
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
from pathlib import Path
|
| 13 |
import json
|
|
|
|
| 14 |
from utils import force_git_push
|
| 15 |
+
import threading
|
| 16 |
|
| 17 |
# These variables are for storing the mturk HITs in a Hugging Face dataset.
|
| 18 |
if Path(".env").is_file():
|
|
|
|
| 26 |
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
|
| 27 |
)
|
| 28 |
|
| 29 |
+
# This function pushes the HIT data written in data.jsonl to our Hugging Face
|
| 30 |
+
# dataset every minute. Adjust the frequency to suit your needs.
|
| 31 |
+
def asynchronous_push(f_stop):
|
| 32 |
+
if repo.is_repo_clean():
|
| 33 |
+
print("Repo currently clean. Ignoring push_to_hub")
|
| 34 |
+
else:
|
| 35 |
+
repo.git_add(auto_lfs_track=True)
|
| 36 |
+
repo.git_commit("Auto commit by space")
|
| 37 |
+
if FORCE_PUSH == "yes":
|
| 38 |
+
force_git_push(repo)
|
| 39 |
+
else:
|
| 40 |
+
repo.git_push()
|
| 41 |
+
if not f_stop.is_set():
|
| 42 |
+
# call again in 60 seconds
|
| 43 |
+
threading.Timer(60, asynchronous_push, [f_stop]).start()
|
| 44 |
+
|
| 45 |
+
f_stop = threading.Event()
|
| 46 |
+
asynchronous_push(f_stop)
|
| 47 |
+
|
| 48 |
# Now let's run the app!
|
| 49 |
pipe = pipeline("sentiment-analysis")
|
| 50 |
|
|
|
|
| 99 |
toggle_final_submit_preview = gr.update(visible=done)
|
| 100 |
toggle_final_submit = gr.update(visible=False)
|
| 101 |
|
| 102 |
+
if state["cnt"] == total_cnt:
|
| 103 |
+
# Write the HIT data to our local dataset because the person has
|
| 104 |
+
# submitted everything now.
|
| 105 |
+
with open(DATA_FILE, "a") as jsonlfile:
|
| 106 |
+
json_data_with_assignment_id =\
|
| 107 |
+
[json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
|
| 108 |
+
jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
|
| 109 |
+
|
| 110 |
return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, toggle_final_submit_preview, new_state_md, dummy
|
| 111 |
|
| 112 |
# Input fields
|
|
|
|
| 123 |
with gr.Column(visible=False) as final_submit_preview:
|
| 124 |
submit_hit_button_preview = gr.Button("Submit Work (preview mode; no mturk HIT credit)")
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
# Button event handlers
|
| 127 |
get_window_location_search_js = """
|
| 128 |
function(text_input, label_input, state, dummy) {
|
|
|
|
| 158 |
"""
|
| 159 |
|
| 160 |
submit_hit_button.click(
|
| 161 |
+
lambda state: state,
|
| 162 |
inputs=[state],
|
| 163 |
outputs=[state],
|
| 164 |
_js=post_hit_js,
|
|
|
|
| 174 |
"""
|
| 175 |
|
| 176 |
submit_hit_button_preview.click(
|
| 177 |
+
lambda state: state,
|
| 178 |
inputs=[state],
|
| 179 |
outputs=[state],
|
| 180 |
_js=refresh_app_js,
|