Spaces:
Sleeping
Sleeping
feat: Init
Browse files- Dockerfile +17 -0
- README.md +54 -11
- app.py +299 -0
- cards.py +177 -0
- constants.py +125 -0
- requirements.txt +1 -0
Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM python:3.9
|
| 5 |
+
|
| 6 |
+
WORKDIR /code
|
| 7 |
+
|
| 8 |
+
COPY ./requirements.txt /code/requirements.txt
|
| 9 |
+
|
| 10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 11 |
+
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
ENV H2O_WAVE_LISTEN=":7860"
|
| 15 |
+
ENV H2O_WAVE_ADDRESS='http://127.0.0.1:7860'
|
| 16 |
+
|
| 17 |
+
CMD ["wave", "run", "app", "--no-reload"]
|
README.md
CHANGED
|
@@ -1,11 +1,54 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div align='center'>
|
| 2 |
+
|
| 3 |
+
<h1>WaveTon</h1>
|
| 4 |
+
💯 Wave applications
|
| 5 |
+
|
| 6 |
+
<br>
|
| 7 |
+
<br>
|
| 8 |
+
|
| 9 |
+
[](https://github.com/vopani/waveton/blob/master/LICENSE)
|
| 10 |
+
[](https://img.shields.io/github/stars/vopani/waveton?color=yellowgreen&logo=github)
|
| 11 |
+
[](https://twitter.com/vopani)
|
| 12 |
+
|
| 13 |
+
</div>
|
| 14 |
+
|
| 15 |
+
## NER Annotation 🖥️
|
| 16 |
+
|
| 17 |
+
Annotate entities for Named-Entity Recognition tasks.
|
| 18 |
+
|
| 19 |
+
## Setup ⚙️
|
| 20 |
+
|
| 21 |
+
1. Check the version of Python, must be Python 3.9+ but recommended to use Python 3.10+ for best experience
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
python3 --version
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
2. Clone the repository
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
git clone https://github.com/vopani/waveton.git
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
3. Create a virtual environment
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
cd waveton/apps/data_apps/ner_annotation
|
| 37 |
+
python3 -m venv venv
|
| 38 |
+
source venv/bin/activate
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
4. Install the packages
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
python3 -m pip install -U pip
|
| 45 |
+
python3 -m pip install -r requirements.txt
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
5. Run the application
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
wave run app
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
6. View the application on your local browser: [http://localhost:10101](http://localhost:10101)
|
app.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from random import randint
|
| 3 |
+
|
| 4 |
+
from h2o_wave import Q, main, app, copy_expando, handle_on, on
|
| 5 |
+
|
| 6 |
+
import cards
|
| 7 |
+
import constants
|
| 8 |
+
|
| 9 |
+
# Set up logging
|
| 10 |
+
logging.basicConfig(format='%(levelname)s:\t[%(asctime)s]\t%(message)s', level=logging.INFO)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@app('/')
|
| 14 |
+
async def serve(q: Q):
|
| 15 |
+
"""
|
| 16 |
+
Main entry point. All queries pass through this function.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
# Initialize the app if not already
|
| 21 |
+
if not q.app.initialized:
|
| 22 |
+
await initialize_app(q)
|
| 23 |
+
|
| 24 |
+
# Initialize the client (browser tab) if not already
|
| 25 |
+
if not q.client.initialized:
|
| 26 |
+
await initialize_client(q)
|
| 27 |
+
|
| 28 |
+
# Update theme if toggled
|
| 29 |
+
elif q.args.theme_dark is not None and q.args.theme_dark != q.client.theme_dark:
|
| 30 |
+
await update_theme(q)
|
| 31 |
+
|
| 32 |
+
# Delegate query to query handlers
|
| 33 |
+
elif await handle_on(q):
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
# Adding this condition to help in identifying bugs (instead of seeing a blank page in the browser)
|
| 37 |
+
else:
|
| 38 |
+
await handle_fallback(q)
|
| 39 |
+
|
| 40 |
+
except Exception as error:
|
| 41 |
+
await show_error(q, error=str(error))
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
async def initialize_app(q: Q):
|
| 45 |
+
"""
|
| 46 |
+
Initialize the app.
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
logging.info('Initializing app')
|
| 50 |
+
|
| 51 |
+
# Set initial argument values
|
| 52 |
+
q.app.cards = ['ner_entities', 'ner_annotator', 'error']
|
| 53 |
+
|
| 54 |
+
q.app.initialized = True
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
async def initialize_client(q: Q):
|
| 58 |
+
"""
|
| 59 |
+
Initialize the client (browser tab).
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
logging.info('Initializing client')
|
| 63 |
+
|
| 64 |
+
# Set initial argument values
|
| 65 |
+
q.client.theme_dark = True
|
| 66 |
+
|
| 67 |
+
q.client.ner_tags = constants.NER_TAGS
|
| 68 |
+
q.client.ner_data = constants.NER_DATA
|
| 69 |
+
q.client.ner_index = 0
|
| 70 |
+
q.client.disable_next = False
|
| 71 |
+
q.client.disable_previous = True
|
| 72 |
+
|
| 73 |
+
# Add layouts, header and footer
|
| 74 |
+
q.page['meta'] = cards.meta
|
| 75 |
+
q.page['header'] = cards.header
|
| 76 |
+
q.page['footer'] = cards.footer
|
| 77 |
+
|
| 78 |
+
# Add cards for main page
|
| 79 |
+
q.page['ner_entities'] = cards.ner_entities(ner_tags=q.client.ner_tags)
|
| 80 |
+
q.page['ner_annotator'] = cards.ner_annotator(
|
| 81 |
+
ner_tags=q.client.ner_tags,
|
| 82 |
+
ner_items=q.client.ner_data[q.client.ner_index],
|
| 83 |
+
disable_next=q.client.disable_next,
|
| 84 |
+
disable_previous=q.client.disable_previous
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
q.client.initialized = True
|
| 88 |
+
|
| 89 |
+
await q.page.save()
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
async def update_theme(q: Q):
|
| 93 |
+
"""
|
| 94 |
+
Update theme of app.
|
| 95 |
+
"""
|
| 96 |
+
|
| 97 |
+
# Copying argument values to client
|
| 98 |
+
copy_expando(q.args, q.client)
|
| 99 |
+
|
| 100 |
+
if q.client.theme_dark:
|
| 101 |
+
logging.info('Updating theme to dark mode')
|
| 102 |
+
|
| 103 |
+
# Update theme from light to dark mode
|
| 104 |
+
q.page['meta'].theme = 'h2o-dark'
|
| 105 |
+
q.page['header'].icon_color = 'black'
|
| 106 |
+
else:
|
| 107 |
+
logging.info('Updating theme to light mode')
|
| 108 |
+
|
| 109 |
+
# Update theme from dark to light mode
|
| 110 |
+
q.page['meta'].theme = 'light'
|
| 111 |
+
q.page['header'].icon_color = '#FEC924'
|
| 112 |
+
|
| 113 |
+
await q.page.save()
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
@on('next')
|
| 117 |
+
async def show_next_text(q: Q):
|
| 118 |
+
"""
|
| 119 |
+
Show next NER data.
|
| 120 |
+
"""
|
| 121 |
+
|
| 122 |
+
logging.info('Showing the next NER data')
|
| 123 |
+
|
| 124 |
+
# Save annotation
|
| 125 |
+
copy_expando(q.args, q.client)
|
| 126 |
+
q.client.ner_data[q.client.ner_index] = q.client.ner_annotator
|
| 127 |
+
|
| 128 |
+
# Move to next text
|
| 129 |
+
q.client.ner_index += 1
|
| 130 |
+
q.client.disable_previous = False
|
| 131 |
+
|
| 132 |
+
# Disable 'Next' if last text
|
| 133 |
+
if q.client.ner_index == len(q.client.ner_data) - 1:
|
| 134 |
+
q.client.disable_next = True
|
| 135 |
+
|
| 136 |
+
# Display data
|
| 137 |
+
q.page['ner_annotator'] = cards.ner_annotator(
|
| 138 |
+
ner_tags=q.client.ner_tags,
|
| 139 |
+
ner_items=q.client.ner_data[q.client.ner_index],
|
| 140 |
+
disable_next=q.client.disable_next,
|
| 141 |
+
disable_previous=q.client.disable_previous
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
await q.page.save()
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
@on('previous')
|
| 148 |
+
async def show_previous_text(q: Q):
|
| 149 |
+
"""
|
| 150 |
+
Show previous NER data.
|
| 151 |
+
"""
|
| 152 |
+
|
| 153 |
+
logging.info('Showing the previous NER data')
|
| 154 |
+
|
| 155 |
+
# Save annotation
|
| 156 |
+
copy_expando(q.args, q.client)
|
| 157 |
+
q.client.ner_data[q.client.ner_index] = q.client.ner_annotator
|
| 158 |
+
|
| 159 |
+
# Move to previous text
|
| 160 |
+
q.client.ner_index -= 1
|
| 161 |
+
q.client.disable_next = False
|
| 162 |
+
|
| 163 |
+
# Disable 'Previous' if first text
|
| 164 |
+
if q.client.ner_index == 0:
|
| 165 |
+
q.client.disable_previous = True
|
| 166 |
+
|
| 167 |
+
# Display data
|
| 168 |
+
q.page['ner_annotator'] = cards.ner_annotator(
|
| 169 |
+
ner_tags=q.client.ner_tags,
|
| 170 |
+
ner_items=q.client.ner_data[q.client.ner_index],
|
| 171 |
+
disable_next=q.client.disable_next,
|
| 172 |
+
disable_previous=q.client.disable_previous
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
await q.page.save()
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
@on('add')
|
| 179 |
+
async def add_entity(q: Q):
|
| 180 |
+
"""
|
| 181 |
+
Add a new entity to NER tags.
|
| 182 |
+
"""
|
| 183 |
+
|
| 184 |
+
logging.info('Adding a new entity')
|
| 185 |
+
|
| 186 |
+
# Save annotation
|
| 187 |
+
copy_expando(q.args, q.client)
|
| 188 |
+
q.client.ner_data[q.client.ner_index] = q.client.ner_annotator
|
| 189 |
+
|
| 190 |
+
# Add new entity
|
| 191 |
+
if len(q.client.new_entity_name) > 0:
|
| 192 |
+
q.client.ner_tags.append({
|
| 193 |
+
'name': q.client.new_entity_name.lower(),
|
| 194 |
+
'label': q.client.new_entity_name,
|
| 195 |
+
'color': '#{:02x}{:02x}{:02x}'.format(randint(0, 255), randint(0, 255), randint(0, 255))
|
| 196 |
+
})
|
| 197 |
+
|
| 198 |
+
# Refresh data with new entity
|
| 199 |
+
q.page['ner_entities'] = cards.ner_entities(ner_tags=q.client.ner_tags)
|
| 200 |
+
q.page['ner_annotator'] = cards.ner_annotator(
|
| 201 |
+
ner_tags=q.client.ner_tags,
|
| 202 |
+
ner_items=q.client.ner_data[q.client.ner_index],
|
| 203 |
+
disable_next=q.client.disable_next,
|
| 204 |
+
disable_previous=q.client.disable_previous
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
await q.page.save()
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
@on('delete')
|
| 211 |
+
async def delete_entity(q: Q):
|
| 212 |
+
"""
|
| 213 |
+
Delete an entity from NER tags.
|
| 214 |
+
"""
|
| 215 |
+
|
| 216 |
+
logging.info('Deleting an entity')
|
| 217 |
+
|
| 218 |
+
# Save annotation
|
| 219 |
+
copy_expando(q.args, q.client)
|
| 220 |
+
q.client.ner_data[q.client.ner_index] = q.client.ner_annotator
|
| 221 |
+
|
| 222 |
+
# Delete entity and it's tags
|
| 223 |
+
if len(q.client.ner_tags) > 1:
|
| 224 |
+
q.client.ner_tags = [tag for tag in q.client.ner_tags if tag['name'] != q.client.delete_entity_name]
|
| 225 |
+
for text in q.client.ner_data:
|
| 226 |
+
for item in text:
|
| 227 |
+
if 'tag' in item.keys():
|
| 228 |
+
if item['tag'] == q.client.delete_entity_name:
|
| 229 |
+
item.pop('tag')
|
| 230 |
+
else:
|
| 231 |
+
logging.info('No entities deleted since annotator requires at least one entity available')
|
| 232 |
+
|
| 233 |
+
# Refresh data with remaining entities
|
| 234 |
+
q.page['ner_entities'] = cards.ner_entities(ner_tags=q.client.ner_tags)
|
| 235 |
+
q.page['ner_annotator'] = cards.ner_annotator(
|
| 236 |
+
ner_tags=q.client.ner_tags,
|
| 237 |
+
ner_items=q.client.ner_data[q.client.ner_index],
|
| 238 |
+
disable_next=q.client.disable_next,
|
| 239 |
+
disable_previous=q.client.disable_previous
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
await q.page.save()
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
def clear_cards(q: Q, card_names: list):
|
| 246 |
+
"""
|
| 247 |
+
Clear cards from the page.
|
| 248 |
+
"""
|
| 249 |
+
|
| 250 |
+
logging.info('Clearing cards')
|
| 251 |
+
|
| 252 |
+
# Delete cards from the page
|
| 253 |
+
for card_name in card_names:
|
| 254 |
+
del q.page[card_name]
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
async def show_error(q: Q, error: str):
|
| 258 |
+
"""
|
| 259 |
+
Displays errors.
|
| 260 |
+
"""
|
| 261 |
+
|
| 262 |
+
logging.error(error)
|
| 263 |
+
|
| 264 |
+
# Clear all cards
|
| 265 |
+
clear_cards(q, q.app.cards)
|
| 266 |
+
|
| 267 |
+
# Format and display the error
|
| 268 |
+
q.page['error'] = cards.crash_report(q)
|
| 269 |
+
|
| 270 |
+
await q.page.save()
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
@on('reload')
|
| 274 |
+
async def reload_client(q: Q):
|
| 275 |
+
"""
|
| 276 |
+
Reset the client (browser tab).
|
| 277 |
+
This function is called when the user clicks "Reload" on the crash report.
|
| 278 |
+
"""
|
| 279 |
+
|
| 280 |
+
logging.info('Reloading client')
|
| 281 |
+
|
| 282 |
+
# Clear all cards
|
| 283 |
+
clear_cards(q, q.app.cards)
|
| 284 |
+
|
| 285 |
+
# Reload the client
|
| 286 |
+
await initialize_client(q)
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
async def handle_fallback(q: Q):
|
| 290 |
+
"""
|
| 291 |
+
Handle fallback cases.
|
| 292 |
+
This function should never get called unless there is a bug in our code or query handling logic.
|
| 293 |
+
"""
|
| 294 |
+
|
| 295 |
+
logging.info('Adding fallback page')
|
| 296 |
+
|
| 297 |
+
q.page['fallback'] = cards.fallback
|
| 298 |
+
|
| 299 |
+
await q.page.save()
|
cards.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import traceback
|
| 3 |
+
|
| 4 |
+
from h2o_wave import Q, expando_to_dict, ui
|
| 5 |
+
|
| 6 |
+
# App name
|
| 7 |
+
app_name = 'NER Annotation'
|
| 8 |
+
|
| 9 |
+
# Link to repo. Report bugs/features here :)
|
| 10 |
+
repo_url = 'https://github.com/vopani/waveton'
|
| 11 |
+
issue_url = f'{repo_url}/issues/new?assignees=vopani&labels=bug&template=error-report.md&title=%5BERROR%5D'
|
| 12 |
+
|
| 13 |
+
# A meta card to hold the app's title, layouts, dialogs, theme and other meta information
|
| 14 |
+
meta = ui.meta_card(
|
| 15 |
+
box='',
|
| 16 |
+
title='WaveTon',
|
| 17 |
+
layouts=[
|
| 18 |
+
ui.layout(
|
| 19 |
+
breakpoint='xs',
|
| 20 |
+
zones=[
|
| 21 |
+
ui.zone(name='header'),
|
| 22 |
+
ui.zone(
|
| 23 |
+
name='main',
|
| 24 |
+
size='calc(100vh - 150px)',
|
| 25 |
+
direction='row',
|
| 26 |
+
zones=[
|
| 27 |
+
ui.zone(name='ner_entities', size='20%'),
|
| 28 |
+
ui.zone(name='ner_annotator', size='80%')
|
| 29 |
+
]
|
| 30 |
+
),
|
| 31 |
+
ui.zone(name='footer')
|
| 32 |
+
]
|
| 33 |
+
)
|
| 34 |
+
],
|
| 35 |
+
theme='h2o-dark'
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# The header shown on all the app's pages
|
| 39 |
+
header = ui.header_card(
|
| 40 |
+
box='header',
|
| 41 |
+
title='NER Annotation',
|
| 42 |
+
subtitle='Annotate entities for Named-Entity Recognition tasks',
|
| 43 |
+
icon='Handwriting',
|
| 44 |
+
icon_color='black',
|
| 45 |
+
items=[
|
| 46 |
+
ui.toggle(name='theme_dark', label='Dark Mode', value=True, trigger=True)
|
| 47 |
+
]
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# The footer shown on all the app's pages
|
| 51 |
+
footer = ui.footer_card(
|
| 52 |
+
box='footer',
|
| 53 |
+
caption=f'Learn more about <a href="{repo_url}" target="_blank"> WaveTon: 💯 Wave Applications</a>'
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# A fallback card for handling bugs
|
| 57 |
+
fallback = ui.form_card(
|
| 58 |
+
box='fallback',
|
| 59 |
+
items=[ui.text('Uh-oh, something went wrong!')]
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def ner_entities(ner_tags: list[dict]) -> ui.FormCard:
|
| 64 |
+
"""
|
| 65 |
+
Card for NER entities.
|
| 66 |
+
"""
|
| 67 |
+
|
| 68 |
+
card = ui.form_card(
|
| 69 |
+
box='ner_entities',
|
| 70 |
+
items=[
|
| 71 |
+
ui.textbox(name='new_entity_name', label='Type a new entity to be added'),
|
| 72 |
+
ui.buttons(
|
| 73 |
+
items=[
|
| 74 |
+
ui.button(name='add', label='Add', primary=True)
|
| 75 |
+
],
|
| 76 |
+
justify='center'
|
| 77 |
+
),
|
| 78 |
+
ui.separator(),
|
| 79 |
+
ui.dropdown(
|
| 80 |
+
name='delete_entity_name',
|
| 81 |
+
label='Select an entity to delete',
|
| 82 |
+
choices=[ui.choice(name=tag['name'], label=tag['label']) for tag in ner_tags]
|
| 83 |
+
),
|
| 84 |
+
ui.buttons(
|
| 85 |
+
items=[
|
| 86 |
+
ui.button(name='delete', label='Delete', primary=True)
|
| 87 |
+
],
|
| 88 |
+
justify='center'
|
| 89 |
+
)
|
| 90 |
+
]
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
return card
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def ner_annotator(
|
| 97 |
+
ner_tags: list[dict],
|
| 98 |
+
ner_items: list[dict],
|
| 99 |
+
disable_next: bool = False,
|
| 100 |
+
disable_previous: bool = False
|
| 101 |
+
) -> ui.FormCard:
|
| 102 |
+
"""
|
| 103 |
+
Card for NER annotator.
|
| 104 |
+
"""
|
| 105 |
+
|
| 106 |
+
card = ui.form_card(
|
| 107 |
+
box='ner_annotator',
|
| 108 |
+
items=[
|
| 109 |
+
ui.text_annotator(
|
| 110 |
+
name='ner_annotator',
|
| 111 |
+
title='Click and/or drag text to annotate',
|
| 112 |
+
tags=[ui.text_annotator_tag(**tag) for tag in ner_tags],
|
| 113 |
+
items=[ui.text_annotator_item(**item) for item in ner_items]
|
| 114 |
+
),
|
| 115 |
+
ui.buttons(
|
| 116 |
+
items=[
|
| 117 |
+
ui.button(name='next', label='Next', primary=True, disabled=disable_next),
|
| 118 |
+
ui.button(name='previous', label='Previous', disabled=disable_previous)
|
| 119 |
+
]
|
| 120 |
+
)
|
| 121 |
+
]
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
return card
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def crash_report(q: Q) -> ui.FormCard:
|
| 128 |
+
"""
|
| 129 |
+
Card for capturing the stack trace and current application state, for error reporting.
|
| 130 |
+
This function is called by the main serve() loop on uncaught exceptions.
|
| 131 |
+
"""
|
| 132 |
+
|
| 133 |
+
def code_block(content): return '\n'.join(['```', *content, '```'])
|
| 134 |
+
|
| 135 |
+
type_, value_, traceback_ = sys.exc_info()
|
| 136 |
+
stack_trace = traceback.format_exception(type_, value_, traceback_)
|
| 137 |
+
|
| 138 |
+
dump = [
|
| 139 |
+
'### Stack Trace',
|
| 140 |
+
code_block(stack_trace),
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
states = [
|
| 144 |
+
('q.app', q.app),
|
| 145 |
+
('q.user', q.user),
|
| 146 |
+
('q.client', q.client),
|
| 147 |
+
('q.events', q.events),
|
| 148 |
+
('q.args', q.args)
|
| 149 |
+
]
|
| 150 |
+
for name, source in states:
|
| 151 |
+
dump.append(f'### {name}')
|
| 152 |
+
dump.append(code_block([f'{k}: {v}' for k, v in expando_to_dict(source).items()]))
|
| 153 |
+
|
| 154 |
+
return ui.form_card(
|
| 155 |
+
box='main',
|
| 156 |
+
items=[
|
| 157 |
+
ui.stats(
|
| 158 |
+
items=[
|
| 159 |
+
ui.stat(
|
| 160 |
+
label='',
|
| 161 |
+
value='Oops!',
|
| 162 |
+
caption='Something went wrong',
|
| 163 |
+
icon='Error'
|
| 164 |
+
)
|
| 165 |
+
],
|
| 166 |
+
),
|
| 167 |
+
ui.separator(),
|
| 168 |
+
ui.text_l(content='Apologies for the inconvenience!'),
|
| 169 |
+
ui.buttons(items=[ui.button(name='reload', label='Reload', primary=True)]),
|
| 170 |
+
ui.expander(name='report', label='Error Details', items=[
|
| 171 |
+
ui.text(
|
| 172 |
+
f'To report this issue, <a href="{issue_url}" target="_blank">please open an issue</a> with the details below:'),
|
| 173 |
+
ui.text_l(content=f'Report Issue in App: **{app_name}**'),
|
| 174 |
+
ui.text(content='\n'.join(dump)),
|
| 175 |
+
])
|
| 176 |
+
]
|
| 177 |
+
)
|
constants.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NER_TAGS = [
|
| 2 |
+
{"name": "organization", "label": "Organization", "color": "#F1CBCB"},
|
| 3 |
+
{"name": "metric", "label": "Metric", "color": "#CAEACA"}
|
| 4 |
+
]
|
| 5 |
+
|
| 6 |
+
NER_DATA = [
|
| 7 |
+
[
|
| 8 |
+
{"text": "At "},
|
| 9 |
+
{"text": "Santander", "tag": "organization"},
|
| 10 |
+
{"text": " our mission is to help people and businesses prosper. "},
|
| 11 |
+
{"text": "We are always looking for ways to help our customers understand their financial health "},
|
| 12 |
+
{"text": "and identify which products and services might help them achieve their monetary goals. "},
|
| 13 |
+
{"text": "Our data science team is continually challenging our machine learning algorithms, working with "},
|
| 14 |
+
{"text": "the global data science community to make sure we can more accurately identify new ways "},
|
| 15 |
+
{"text": "to solve our most common challenge, binary classification problems such as: "},
|
| 16 |
+
{"text": "is a customer satisfied? Will a customer buy this product? Can a customer pay this loan? "},
|
| 17 |
+
{"text": "In this challenge, we invite Kagglers to help us identify which customers will make "},
|
| 18 |
+
{"text": "a specific transaction in the future, irrespective of the amount of money transacted. "},
|
| 19 |
+
{"text": "The data provided for this competition has the same structure as the real data we have available "},
|
| 20 |
+
{"text": "to solve this problem."}
|
| 21 |
+
],
|
| 22 |
+
[
|
| 23 |
+
{"text": "Many people struggle to get loans due to insufficient or non-existent credit histories. "},
|
| 24 |
+
{"text": "And, unfortunately, this population is often taken advantage of by untrustworthy lenders. "},
|
| 25 |
+
{"text": "Home Credit", "tag": "organization"},
|
| 26 |
+
{"text": " strives to broaden financial inclusion for the unbanked population by providing "},
|
| 27 |
+
{"text": "a positive and safe borrowing experience. "},
|
| 28 |
+
{"text": "In order to make sure this underserved population has a positive loan experience, "},
|
| 29 |
+
{"text": "Home Credit", "tag": "organization"},
|
| 30 |
+
{"text": " makes use of a variety of alternative data--including telco & transactional information"},
|
| 31 |
+
{"text": "--to predict their clients repayment abilities. While "},
|
| 32 |
+
{"text": "Home Credit", "tag": "organization"},
|
| 33 |
+
{"text": " is currently using various statistical and machine learning methods to make "},
|
| 34 |
+
{"text": "predictions, they're challenging Kagglers to help them unlock "},
|
| 35 |
+
{"text": "the full potential of their data. "},
|
| 36 |
+
{"text": "Doing so will ensure that clients capable of repayment are not rejected "},
|
| 37 |
+
{"text": "and that loans are given with a principal, maturity, and repayment calendar that will empower "},
|
| 38 |
+
{"text": "their clients to be successful."}
|
| 39 |
+
],
|
| 40 |
+
[
|
| 41 |
+
{"text": "Imagine standing at the check-out counter at the grocery store with a long line behind you "},
|
| 42 |
+
{"text": "and the cashier not-so-quietly announces that your card has been declined. "},
|
| 43 |
+
{"text": "In this moment, you probably aren’t thinking about the data science that determined your fate. "},
|
| 44 |
+
{"text": "Embarrassed, and certain you have the funds to cover everything needed for an epic "},
|
| 45 |
+
{"text": "nacho party for 50 of your closest friends, you try your card again. "},
|
| 46 |
+
{"text": "Same result. As you step aside and allow the cashier to tend to the next customer, "},
|
| 47 |
+
{"text": "you receive a text message from your bank. "},
|
| 48 |
+
{"text": "'Press 1 if you really tried to spend $500 on cheddar cheese.' "},
|
| 49 |
+
{"text": "While perhaps cumbersome (and often embarrassing) in the moment, "},
|
| 50 |
+
{"text": "this fraud prevention system is actually saving consumers millions of dollars per year. "},
|
| 51 |
+
{"text": "Researchers from the "},
|
| 52 |
+
{"text": "IEEE Computational Intelligence Society (IEEE-CIS)", "tag": "organization"},
|
| 53 |
+
{"text": " want to improve this figure, while also improving the customer experience. With higher "},
|
| 54 |
+
{"text": "accuracy", "tag": "metric"},
|
| 55 |
+
{"text": " fraud detection, you can get on with your chips without the hassle. "},
|
| 56 |
+
{"text": "IEEE-CIS", "tag": "organization"},
|
| 57 |
+
{"text": " works across a variety of AI and machine learning areas, including deep neural networks, "},
|
| 58 |
+
{"text": "fuzzy systems, evolutionary computation, and swarm intelligence. "},
|
| 59 |
+
{"text": "Today they’re partnering with the world’s leading payment service company, "},
|
| 60 |
+
{"text": "Vesta Corporation", "tag": "organization"},
|
| 61 |
+
{"text": ", seeking the best solutions for fraud prevention industry, "},
|
| 62 |
+
{"text": "and now you are invited to join the challenge. "},
|
| 63 |
+
{"text": "In this competition, you’ll benchmark machine learning models on a challenging large-scale dataset. "},
|
| 64 |
+
{"text": "The data comes from "},
|
| 65 |
+
{"text": "Vesta", "tag": "organization"},
|
| 66 |
+
{"text": "'s real-world e-commerce transactions "},
|
| 67 |
+
{"text": "and contains a wide range of features from device type to product features. "},
|
| 68 |
+
{"text": "You also have the opportunity to create new features to improve your results. "},
|
| 69 |
+
{"text": "If successful, you’ll improve the efficacy of fraudulent transaction alerts for millions of people "},
|
| 70 |
+
{"text": "around the world, helping hundreds of thousands of businesses reduce their "},
|
| 71 |
+
{"text": "fraud loss", "tag": "metric"},
|
| 72 |
+
{"text": " and increase their "},
|
| 73 |
+
{"text": "revenue", "tag": "metric"},
|
| 74 |
+
{"text": ". And of course, you will save party people just like you the hassle of "},
|
| 75 |
+
{"text": "false positives", "tag": "metric"},
|
| 76 |
+
{"text": "."}
|
| 77 |
+
],
|
| 78 |
+
[
|
| 79 |
+
{"text": "How much camping gear will one store sell each month in a year? "},
|
| 80 |
+
{"text": "To the uninitiated, calculating sales at this level may seem as difficult as predicting the weather. "},
|
| 81 |
+
{"text": "Both types of forecasting rely on science and historical data. "},
|
| 82 |
+
{"text": "While a wrong weather forecast may result in you carrying around an umbrella on a sunny day, "},
|
| 83 |
+
{"text": "inaccurate business forecasts could result in actual or opportunity losses. "},
|
| 84 |
+
{"text": "In this competition, in addition to traditional forecasting methods you’re also challenged to use "},
|
| 85 |
+
{"text": "machine learning to improve forecast "},
|
| 86 |
+
{"text": "accuracy", "tag": "metric"},
|
| 87 |
+
{"text": ". The Makridakis Open Forecasting Center (MOFC) at the "},
|
| 88 |
+
{"text": "University of Nicosia", "tag": "organization"},
|
| 89 |
+
{"text": " conducts cutting-edge forecasting research and provides business forecast training. "},
|
| 90 |
+
{"text": "It helps companies achieve accurate predictions, estimate the levels of uncertainty, "},
|
| 91 |
+
{"text": "avoiding costly mistakes, and apply best forecasting practices. "},
|
| 92 |
+
{"text": "The MOFC is well known for its Makridakis Competitions, the first of which ran in the 1980s. "},
|
| 93 |
+
{"text": "In this competition, the fifth iteration, you will use hierarchical sales data from Walmart, "},
|
| 94 |
+
{"text": "the world’s largest company by "},
|
| 95 |
+
{"text": "revenue", "tag": "metric"},
|
| 96 |
+
{"text": ", to forecast daily sales for the next 28 days. "},
|
| 97 |
+
{"text": "The data, covers stores in three US States (California, Texas, and Wisconsin) "},
|
| 98 |
+
{"text": "and includes item level, department, product categories, and store details. "},
|
| 99 |
+
{"text": "In addition, it has explanatory variables such as "},
|
| 100 |
+
{"text": "price, promotions, day of the week, and special events. "},
|
| 101 |
+
{"text": "Together, this robust dataset can be used to improve forecasting "},
|
| 102 |
+
{"text": "accuracy", "tag": "metric"},
|
| 103 |
+
{"text": ". If successful, your work will continue to advance the theory and practice of forecasting. "},
|
| 104 |
+
{"text": "The methods used can be applied in various business areas, such as setting up appropriate "},
|
| 105 |
+
{"text": "inventory or service levels. Through its business support and training, "},
|
| 106 |
+
{"text": "the MOFC will help distribute the tools and knowledge so others can achieve more accurate "},
|
| 107 |
+
{"text": "and better calibrated forecasts, reduce waste and be able to appreciate uncertainty and its risk "},
|
| 108 |
+
{"text": "implications."}
|
| 109 |
+
],
|
| 110 |
+
[
|
| 111 |
+
{"text": "Nothing ruins the thrill of buying a brand new car more quickly than seeing your new insurance bill. "},
|
| 112 |
+
{"text": "The sting’s even more painful when you know you’re a good driver. "},
|
| 113 |
+
{"text": "It doesn’t seem fair that you have to pay so much if you’ve been cautious on the road for years. "},
|
| 114 |
+
{"text": "Porto Seguro, one of Brazil’s largest auto and homeowner insurance companies, completely agrees. "},
|
| 115 |
+
{"text": "Inaccuracies in car insurance company’s claim predictions raise the cost of insurance for "},
|
| 116 |
+
{"text": "good drivers and reduce the price for bad ones. "},
|
| 117 |
+
{"text": "In this competition, you’re challenged to build a model that predicts the probability that "},
|
| 118 |
+
{"text": "a driver will initiate an auto insurance claim in the next year. While "},
|
| 119 |
+
{"text": "Porto Seguro", "tag": "organization"},
|
| 120 |
+
{"text": " has used machine learning for the past 20 years, "},
|
| 121 |
+
{"text": "they’re looking to Kaggle’s machine learning community to explore new, more powerful methods. "},
|
| 122 |
+
{"text": "A more accurate prediction will allow them to further tailor their prices, and hopefully "},
|
| 123 |
+
{"text": "make auto insurance coverage more accessible to more drivers."}
|
| 124 |
+
]
|
| 125 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
h2o_wave==0.23.1
|