Spaces:

ben-epstein
/

ner-spans-to-tokens-tags

Runtime error

ben-epstein commited on May 6, 2023

Commit

4bad7b8

unverified ·

1 Parent(s): c417e30

small demo

Files changed (3) hide show

README.md CHANGED Viewed

@@ -11,3 +11,8 @@ license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Simple app showcasing the [spacy-to-hf](https://github.com/Ben-Epstein/spacy-to-hf) repo
+Add in your NER Span data and get back a huggingface dataset ready
+for fine-tuning Token Classification!

app.py ADDED Viewed

+import streamlit as st
+from spacy_to_hf import spacy_to_hf
+import os
+import spacy
+from datasets import Dataset
+import json
+from json import JSONDecodeError
+try:
+    nlp = spacy.load("en_core_web_sm")
+except:
+    os.system("python -m spacy download en_core_web_sm")
+    st.experimental_rerun()
+demo_option = [
+    {
+        "text": "Planned to go to the Apple Storefront on Tuesday",
+        "spans": [
+                {"start": 0, "end": 7, "label": "Action"},
+                {"start": 21, "end": 37, "label": "Loc"},
+                {"start": 41, "end": 48, "label": "Date"},
+            ]
+    }
+]
+tokenizers = [
+    "bert-base-uncased",
+    "bert-base-cased",
+    "distilbert-base-uncased",
+    "distilbert-base-cased",
+    "roberta-base",
+]
+tok = st.selectbox("Pick a tokenizer", tokenizers)
+spacy_data = st.text_area("Input your NER Span data here")
+if spacy_data or st.button("Or try an example"):
+    run_data = None
+    if spacy_data:
+        try:
+            run_data = json.loads(spacy_data)
+        except JSONDecodeError as e:
+            st.warning(f"Invalid JSON data, try again\n{str(e)}")
+    else:
+        run_data = demo_option
+    if run_data:
+        st.write("Spacy input data:")
+        st.json(run_data)
+        hf_data = spacy_to_hf(run_data, tok)
+        df = Dataset.from_dict(hf_data).to_pandas()
+        st.write("Output huggingface format:")
+        st.dataframe(df)

requirements.txt ADDED Viewed

+streamlit
+spacy-to-hf
+datasets