Spaces:
Sleeping
Sleeping
Commit
·
f639a6f
1
Parent(s):
a1c3586
adding files again
Browse files- .gitignore +124 -0
- README.md +205 -1
- __init__.py +0 -0
- app copy.py +38 -0
- docs/PDF_TESTING_README.md +27 -0
- docs/auroraai_report.txt +62 -0
- docs/auroraai_technical_specs.xlsx +0 -0
- docs/configuration.md +226 -0
- docs/overview.md +86 -0
- docs/prompting-guidelines.md +23 -0
- docs/setup.md +42 -0
- docs/troubleshooting.md +247 -0
- document_intelligence.py +403 -0
- guides/FIXES_APPLIED.md +89 -0
- guides/GETTING_STARTED.md +355 -0
- guides/INTELLIGENT_TOOLS_GUIDE.md +236 -0
- guides/UI_ENHANCEMENT_GUIDE.md +227 -0
- pyproject.toml +18 -0
- requirements.txt +5 -0
- server.py +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Virtual environments
|
| 55 |
+
.env
|
| 56 |
+
.venv
|
| 57 |
+
env/
|
| 58 |
+
venv/
|
| 59 |
+
ENV/
|
| 60 |
+
env.bak/
|
| 61 |
+
venv.bak/
|
| 62 |
+
|
| 63 |
+
# IDEs and editors
|
| 64 |
+
.vscode/
|
| 65 |
+
.idea/
|
| 66 |
+
*.swp
|
| 67 |
+
*.swo
|
| 68 |
+
*~
|
| 69 |
+
|
| 70 |
+
# Operating System
|
| 71 |
+
.DS_Store
|
| 72 |
+
.DS_Store?
|
| 73 |
+
._*
|
| 74 |
+
.Spotlight-V100
|
| 75 |
+
.Trashes
|
| 76 |
+
ehthumbs.db
|
| 77 |
+
Thumbs.db
|
| 78 |
+
|
| 79 |
+
# uv
|
| 80 |
+
uv.lock
|
| 81 |
+
|
| 82 |
+
# Logs
|
| 83 |
+
*.log
|
| 84 |
+
|
| 85 |
+
# Runtime data
|
| 86 |
+
pids
|
| 87 |
+
*.pid
|
| 88 |
+
*.seed
|
| 89 |
+
*.pid.lock
|
| 90 |
+
|
| 91 |
+
# Optional npm cache directory (if you use any JS tools)
|
| 92 |
+
.npm
|
| 93 |
+
|
| 94 |
+
# Optional eslint cache
|
| 95 |
+
.eslintcache
|
| 96 |
+
|
| 97 |
+
# Microbundle cache
|
| 98 |
+
.rpt2_cache/
|
| 99 |
+
.rts2_cache_cjs/
|
| 100 |
+
.rts2_cache_es/
|
| 101 |
+
.rts2_cache_umd/
|
| 102 |
+
|
| 103 |
+
# Optional REPL history
|
| 104 |
+
.node_repl_history
|
| 105 |
+
|
| 106 |
+
# Output of 'npm pack'
|
| 107 |
+
*.tgz
|
| 108 |
+
|
| 109 |
+
# Yarn Integrity file
|
| 110 |
+
.yarn-integrity
|
| 111 |
+
|
| 112 |
+
# dotenv environment variables file
|
| 113 |
+
.env.local
|
| 114 |
+
.env.development.local
|
| 115 |
+
.env.test.local
|
| 116 |
+
.env.production.local
|
| 117 |
+
|
| 118 |
+
# Temporary folders
|
| 119 |
+
tmp/
|
| 120 |
+
temp/
|
| 121 |
+
|
| 122 |
+
# Project-specific
|
| 123 |
+
*.sqlite
|
| 124 |
+
*.db
|
README.md
CHANGED
|
@@ -11,4 +11,208 @@ license: mit
|
|
| 11 |
short_description: An MCP server that navigates through your docs!
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
short_description: An MCP server that navigates through your docs!
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Docs Navigator MCP
|
| 15 |
+
A powerful documentation assistant that combines **Model Context Protocol (MCP)** with **Claude AI** to provide intelligent Q&A over your documentation files. Built with FastMCP and Gradio for an easy-to-use web interface.
|
| 16 |
+
|
| 17 |
+
## ✨ Features
|
| 18 |
+
|
| 19 |
+
- 🔍 **Smart Document Search**: Full-text search across your documentation files
|
| 20 |
+
- 🤖 **AI-Powered Responses**: Uses Claude AI to provide intelligent answers based on your docs
|
| 21 |
+
- 📁 **Multi-Format Support**: Works with `.md`, `.txt`, `.rst`, and `.pdf` files
|
| 22 |
+
- 🌐 **Web Interface**: Clean Gradio-based chat interface
|
| 23 |
+
- ⚡ **MCP Integration**: Leverages Model Context Protocol for seamless tool integration
|
| 24 |
+
- 🔧 **Easy Setup**: Simple configuration and deployment
|
| 25 |
+
- 📄 **PDF Support**: Extract and analyze text from PDF documents
|
| 26 |
+
|
| 27 |
+
## 🏗️ Architecture
|
| 28 |
+
|
| 29 |
+
```
|
| 30 |
+
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
| 31 |
+
│ Gradio UI │───▶│ Client Agent │───▶│ Claude AI │
|
| 32 |
+
│ (Chat Interface)│ │ (MCP Client) │ │ (Anthropic) │
|
| 33 |
+
└─────────────────┘ └──────────────────┘ └─────────────────┘
|
| 34 |
+
│
|
| 35 |
+
▼
|
| 36 |
+
┌──────────────────┐
|
| 37 |
+
│ MCP Server │
|
| 38 |
+
│ (FastMCP) │
|
| 39 |
+
└──────────────────┘
|
| 40 |
+
│
|
| 41 |
+
▼
|
| 42 |
+
┌──────────────────┐
|
| 43 |
+
│ docs/ folder │
|
| 44 |
+
│ (.md, .txt, │
|
| 45 |
+
│ .rst files) │
|
| 46 |
+
└──────────────────┘
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
## 🚀 Quick Start
|
| 50 |
+
|
| 51 |
+
### Prerequisites
|
| 52 |
+
|
| 53 |
+
- Python 3.10 or higher
|
| 54 |
+
- An Anthropic API key
|
| 55 |
+
- UV package manager (recommended) or pip
|
| 56 |
+
|
| 57 |
+
### 1. Clone and Setup
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
git clone <your-repo-url>
|
| 61 |
+
cd docs-navigator
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### 2. Install Dependencies
|
| 65 |
+
|
| 66 |
+
Using UV (recommended):
|
| 67 |
+
```bash
|
| 68 |
+
uv sync
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
Or using pip:
|
| 72 |
+
```bash
|
| 73 |
+
pip install -r requirements.txt
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### 3. Configure Environment
|
| 77 |
+
|
| 78 |
+
Create a `.env` file:
|
| 79 |
+
```bash
|
| 80 |
+
echo "ANTHROPIC_API_KEY=your_api_key_here" > .env
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
### 4. Add Your Documentation
|
| 84 |
+
|
| 85 |
+
Place your documentation files in the `docs/` directory:
|
| 86 |
+
```
|
| 87 |
+
docs/
|
| 88 |
+
├── overview.md
|
| 89 |
+
├── setup.md
|
| 90 |
+
├── troubleshooting.md
|
| 91 |
+
└── your-other-docs.txt
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
### 5. Launch the Application
|
| 95 |
+
|
| 96 |
+
```bash
|
| 97 |
+
# Using UV
|
| 98 |
+
uv run app_gradio.py
|
| 99 |
+
|
| 100 |
+
# Or directly with Python
|
| 101 |
+
python app_gradio.py
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
The app will be available at `http://127.0.0.1:7860`
|
| 105 |
+
|
| 106 |
+
## 📚 Usage Examples
|
| 107 |
+
|
| 108 |
+
Once the app is running, you can ask questions like:
|
| 109 |
+
|
| 110 |
+
- "How do I set up the authentication?"
|
| 111 |
+
- "What are the troubleshooting steps for connection issues?"
|
| 112 |
+
- "Where can I find information about API endpoints?"
|
| 113 |
+
- "Summarize the main features mentioned in the docs"
|
| 114 |
+
|
| 115 |
+
The AI will search through your documentation and provide contextual answers with references to the source files.
|
| 116 |
+
|
| 117 |
+
## 🛠️ Development
|
| 118 |
+
|
| 119 |
+
### Project Structure
|
| 120 |
+
|
| 121 |
+
```
|
| 122 |
+
docs-navigator/
|
| 123 |
+
├── app_gradio.py # Gradio web interface
|
| 124 |
+
├── client_agent.py # MCP client and Claude integration
|
| 125 |
+
├── server_docs.py # MCP server with doc tools
|
| 126 |
+
├── docs/ # Your documentation files
|
| 127 |
+
├── tests/ # Test scripts
|
| 128 |
+
│ ├── test_mcp.py # Test MCP server functionality
|
| 129 |
+
│ ├── test_anthropic.py # Test Claude API connection
|
| 130 |
+
│ └── test_complete.py # End-to-end functionality test
|
| 131 |
+
├── .env # Environment variables
|
| 132 |
+
├── pyproject.toml # Project configuration
|
| 133 |
+
└── requirements.txt # Python dependencies
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
### Available MCP Tools
|
| 137 |
+
|
| 138 |
+
The server exposes these tools to the AI:
|
| 139 |
+
|
| 140 |
+
1. **`list_docs()`**: Get a list of all available documentation files
|
| 141 |
+
2. **`search_docs(query, max_results)`**: Search for specific content across all docs
|
| 142 |
+
|
| 143 |
+
### Testing
|
| 144 |
+
|
| 145 |
+
Run the test suite:
|
| 146 |
+
|
| 147 |
+
```bash
|
| 148 |
+
# Test MCP server functionality
|
| 149 |
+
python test_mcp.py
|
| 150 |
+
|
| 151 |
+
# Test Claude API connection
|
| 152 |
+
python test_anthropic.py
|
| 153 |
+
|
| 154 |
+
# Test complete end-to-end functionality
|
| 155 |
+
python test_complete.py
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
## 🔧 Configuration
|
| 159 |
+
|
| 160 |
+
### Environment Variables
|
| 161 |
+
|
| 162 |
+
| Variable | Description | Required |
|
| 163 |
+
|----------|-------------|----------|
|
| 164 |
+
| `ANTHROPIC_API_KEY` | Your Anthropic Claude API key | Yes |
|
| 165 |
+
|
| 166 |
+
### Supported File Formats
|
| 167 |
+
|
| 168 |
+
- **Markdown**: `.md`
|
| 169 |
+
- **Text**: `.txt`
|
| 170 |
+
- **reStructuredText**: `.rst`
|
| 171 |
+
- **PDF Documents**: `.pdf` (text extraction)
|
| 172 |
+
|
| 173 |
+
### Model Configuration
|
| 174 |
+
|
| 175 |
+
The app currently uses `claude-3-haiku-20240307`. To change the model, edit the model name in `client_agent.py`:
|
| 176 |
+
|
| 177 |
+
```python
|
| 178 |
+
model="claude-3-haiku-20240307" # Change to your preferred model
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
## 🐛 Troubleshooting
|
| 182 |
+
|
| 183 |
+
### Common Issues
|
| 184 |
+
|
| 185 |
+
1. **"Model not found" error**: Your API key may not have access to the specified Claude model. The app will automatically test available models.
|
| 186 |
+
|
| 187 |
+
2. **MCP connection issues**: Ensure the `server_docs.py` script is executable and in the correct location.
|
| 188 |
+
|
| 189 |
+
3. **No documents found**: Make sure your documentation files are in the `docs/` folder with supported extensions.
|
| 190 |
+
|
| 191 |
+
4. **Gradio interface not loading**: Check that port 7860 is available or modify the port in `app_gradio.py`.
|
| 192 |
+
|
| 193 |
+
### Debug Mode
|
| 194 |
+
|
| 195 |
+
Enable verbose logging by modifying the logging level in the respective files.
|
| 196 |
+
|
| 197 |
+
## 🤝 Contributing
|
| 198 |
+
|
| 199 |
+
1. Fork the repository
|
| 200 |
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
| 201 |
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
| 202 |
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
| 203 |
+
5. Open a Pull Request
|
| 204 |
+
|
| 205 |
+
## 📄 License
|
| 206 |
+
|
| 207 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
| 208 |
+
|
| 209 |
+
## 🙏 Acknowledgments
|
| 210 |
+
|
| 211 |
+
- Built with [FastMCP](https://github.com/modelcontextprotocol/mcp) for Model Context Protocol integration
|
| 212 |
+
- Powered by [Anthropic Claude](https://www.anthropic.com/) for AI responses
|
| 213 |
+
- UI created with [Gradio](https://gradio.app/)
|
| 214 |
+
- Package management with [UV](https://docs.astral.sh/uv/)
|
| 215 |
+
|
| 216 |
+
---
|
| 217 |
+
|
| 218 |
+
For more detailed instructions, see the [Getting Started Guide](/guides/GETTING_STARTED.md).
|
__init__.py
ADDED
|
File without changes
|
app copy.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Docs Navigator MCP - Launcher Script
|
| 4 |
+
|
| 5 |
+
This script launches Gradio UI for the Docs Navigator MCP application.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
# Add the project root to the Python path
|
| 13 |
+
project_root = Path(__file__).parent
|
| 14 |
+
sys.path.insert(0, str(project_root))
|
| 15 |
+
|
| 16 |
+
# Change to project directory to ensure relative paths work
|
| 17 |
+
os.chdir(project_root)
|
| 18 |
+
|
| 19 |
+
# Import and run the app
|
| 20 |
+
from src.ui.app import demo
|
| 21 |
+
|
| 22 |
+
def main():
|
| 23 |
+
"""Main entry point for the application."""
|
| 24 |
+
print("🚀 Starting Docs Navigator MCP...")
|
| 25 |
+
print("📚 AI-Powered Documentation Assistant")
|
| 26 |
+
print("🌐 The app will be available at: http://127.0.0.1:7863")
|
| 27 |
+
print("💡 Ask questions about your documentation!")
|
| 28 |
+
print("-" * 50)
|
| 29 |
+
|
| 30 |
+
demo.launch(
|
| 31 |
+
server_name="127.0.0.1",
|
| 32 |
+
server_port=7863,
|
| 33 |
+
show_error=True,
|
| 34 |
+
share=False # Set to True if you want a public link
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
if __name__ == "__main__":
|
| 38 |
+
main()
|
docs/PDF_TESTING_README.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PDF Support Testing
|
| 2 |
+
|
| 3 |
+
## How to Test PDF Functionality
|
| 4 |
+
|
| 5 |
+
1. **Add a PDF file** to the `docs/` folder
|
| 6 |
+
2. **Run the test script**: `python tests/test_pdf_support.py`
|
| 7 |
+
3. **Use the Gradio interface** to ask questions about your PDF content
|
| 8 |
+
|
| 9 |
+
## Supported PDF Features
|
| 10 |
+
|
| 11 |
+
- Text extraction from all pages
|
| 12 |
+
- Full-text search across PDF content
|
| 13 |
+
- AI-powered Q&A over PDF documents
|
| 14 |
+
- Section analysis and summarization
|
| 15 |
+
- PDF content integrated with other docs
|
| 16 |
+
|
| 17 |
+
## Example Questions to Try
|
| 18 |
+
|
| 19 |
+
- "What topics are covered in the PDF?"
|
| 20 |
+
- "Summarize the main points from the PDF"
|
| 21 |
+
- "Search for [specific term] in all documents"
|
| 22 |
+
|
| 23 |
+
## Notes
|
| 24 |
+
|
| 25 |
+
- PDFs with images/scanned text require OCR (not yet implemented)
|
| 26 |
+
- Complex layouts may have formatting issues
|
| 27 |
+
- Page numbers are preserved for reference
|
docs/auroraai_report.txt
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
AuroraAI Technical Report
|
| 2 |
+
|
| 3 |
+
AuroraAI is an enterprise-grade multimodal assistant designed for
|
| 4 |
+
documentation, engineering analysis, and workflow automation.
|
| 5 |
+
|
| 6 |
+
Core Capabilities
|
| 7 |
+
|
| 8 |
+
- Long-context reasoning (200k tokens)
|
| 9 |
+
- Code generation and API reasoning
|
| 10 |
+
- Retrieval-augmented responses
|
| 11 |
+
- Document drafting, editing, and summarization
|
| 12 |
+
- Secure, role-based enterprise integration
|
| 13 |
+
|
| 14 |
+
Architecture Summary
|
| 15 |
+
|
| 16 |
+
AuroraAI is a 70B-parameter transformer model featuring: - Hierarchical
|
| 17 |
+
and sliding-window attention mechanisms - 8192-dimensional hidden
|
| 18 |
+
layers - 96 transformer blocks - 64 attention heads - SwiGLU-activated
|
| 19 |
+
feed-forward networks - Rotary positional embeddings with extended
|
| 20 |
+
scaling
|
| 21 |
+
|
| 22 |
+
Training Methodology
|
| 23 |
+
|
| 24 |
+
AuroraAI was trained using: - Autoregressive next-token prediction - A
|
| 25 |
+
mixture of technical documentation, code corpora, research texts, RFCs,
|
| 26 |
+
and synthetic alignment sets - AdamW optimization with warmup and cosine
|
| 27 |
+
decay - Distributed training across 1024–4096 GPUs - BF16
|
| 28 |
+
mixed-precision gradient computation - Redundant asynchronous
|
| 29 |
+
checkpointing
|
| 30 |
+
|
| 31 |
+
Evaluation Results
|
| 32 |
+
|
| 33 |
+
- HumanEval+ accuracy: 86%
|
| 34 |
+
- Document QA improvement: +22% over baseline
|
| 35 |
+
- Retrieval fidelity: 99% at 200k-token context
|
| 36 |
+
- Hallucination rate reduction: 38% via strict retrieval routing
|
| 37 |
+
|
| 38 |
+
Safety & Alignment
|
| 39 |
+
|
| 40 |
+
AuroraAI employs: - Supervised fine-tuning on technical reasoning
|
| 41 |
+
tasks - Reinforcement learning from human preferences - Multi-layer
|
| 42 |
+
content filtering - Policy-driven guardrails engine - Encrypted opt-in
|
| 43 |
+
user memory with immediate deletion controls
|
| 44 |
+
|
| 45 |
+
Intended Use Cases
|
| 46 |
+
|
| 47 |
+
AuroraAI performs best in: - Developer documentation workflows - API
|
| 48 |
+
lifecycle and architectural content creation - Engineering ticket
|
| 49 |
+
summarization - Knowledge retrieval from large document bases -
|
| 50 |
+
Enterprise search and analysis tasks
|
| 51 |
+
|
| 52 |
+
Limitations
|
| 53 |
+
|
| 54 |
+
AuroraAI is not optimized for: - Unsupervised high-risk autonomous
|
| 55 |
+
decision making - Medical, legal, or hazardous domain outputs without
|
| 56 |
+
human review - Context-free speculation or queries lacking grounding
|
| 57 |
+
data
|
| 58 |
+
|
| 59 |
+
Version Information
|
| 60 |
+
|
| 61 |
+
AuroraAI v1.0 - Expanded to 200k token window - Upgraded retrieval
|
| 62 |
+
engine - Added enterprise integration modules
|
docs/auroraai_technical_specs.xlsx
ADDED
|
Binary file (14.5 kB). View file
|
|
|
docs/configuration.md
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Configuration Options
|
| 2 |
+
|
| 3 |
+
This document discusses the various configuration options available for Aurora AI.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
Aurora AI provides a comprehensive configuration framework supporting multi-tenancy, enterprise-grade security, and extensible integration patterns. The system employs a hierarchical configuration model with environment-specific overrides, schema validation, and runtime hot-reloading capabilities.
|
| 8 |
+
|
| 9 |
+
## Core Configuration Architecture
|
| 10 |
+
|
| 11 |
+
### Configuration Hierarchy
|
| 12 |
+
|
| 13 |
+
Aurora AI implements a cascading configuration system with the following precedence order:
|
| 14 |
+
|
| 15 |
+
1. **Runtime overrides** - Programmatic configuration via API
|
| 16 |
+
2. **Environment variables** - System-level configuration with `AURORA_` prefix
|
| 17 |
+
3. **Configuration files** - YAML/JSON/TOML format files
|
| 18 |
+
4. **Default values** - Embedded fallback configuration
|
| 19 |
+
|
| 20 |
+
### Configuration File Structure
|
| 21 |
+
|
| 22 |
+
```yaml
|
| 23 |
+
aurora:
|
| 24 |
+
engine:
|
| 25 |
+
inference_backend: "transformers"
|
| 26 |
+
model_path: "/models/aurora-v3"
|
| 27 |
+
device_map: "auto"
|
| 28 |
+
quantization:
|
| 29 |
+
enabled: true
|
| 30 |
+
bits: 4
|
| 31 |
+
scheme: "gptq"
|
| 32 |
+
runtime:
|
| 33 |
+
max_concurrent_requests: 128
|
| 34 |
+
request_timeout_ms: 30000
|
| 35 |
+
graceful_shutdown_timeout: 60
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
## Model Configuration
|
| 39 |
+
|
| 40 |
+
### Inference Engine Parameters
|
| 41 |
+
|
| 42 |
+
- **`model_path`**: Filesystem path or Hugging Face model identifier
|
| 43 |
+
- **`device_map`**: Hardware allocation strategy (`auto`, `balanced`, `sequential`, or custom JSON mapping)
|
| 44 |
+
- **`torch_dtype`**: Precision mode (`float32`, `float16`, `bfloat16`, `int8`, `int4`)
|
| 45 |
+
- **`attention_implementation`**: Mechanism selection (`flash_attention_2`, `sdpa`, `eager`)
|
| 46 |
+
- **`rope_scaling`**: Rotary Position Embedding interpolation configuration
|
| 47 |
+
- **`kv_cache_dtype`**: Key-value cache quantization type
|
| 48 |
+
|
| 49 |
+
### Quantization Strategies
|
| 50 |
+
|
| 51 |
+
Aurora AI supports multiple quantization backends:
|
| 52 |
+
|
| 53 |
+
- **GPTQ**: 4-bit grouped quantization with calibration datasets
|
| 54 |
+
- **AWQ**: Activation-aware weight quantization
|
| 55 |
+
- **GGUF**: CPU-optimized quantization format
|
| 56 |
+
- **BitsAndBytes**: Dynamic 8-bit and 4-bit quantization
|
| 57 |
+
|
| 58 |
+
## API Configuration
|
| 59 |
+
|
| 60 |
+
### REST API Settings
|
| 61 |
+
|
| 62 |
+
```yaml
|
| 63 |
+
api:
|
| 64 |
+
host: "0.0.0.0"
|
| 65 |
+
port: 8080
|
| 66 |
+
workers: 4
|
| 67 |
+
uvicorn:
|
| 68 |
+
loop: "uvloop"
|
| 69 |
+
http: "httptools"
|
| 70 |
+
log_level: "info"
|
| 71 |
+
cors:
|
| 72 |
+
enabled: true
|
| 73 |
+
origins: ["https://*.example.com"]
|
| 74 |
+
allow_credentials: true
|
| 75 |
+
rate_limiting:
|
| 76 |
+
enabled: true
|
| 77 |
+
requests_per_minute: 60
|
| 78 |
+
burst_size: 10
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
### Authentication & Authorization
|
| 82 |
+
|
| 83 |
+
- **API Key Authentication**: Header-based (`X-API-Key`) or query parameter
|
| 84 |
+
- **OAuth 2.0**: Support for Authorization Code and Client Credentials flows
|
| 85 |
+
- **JWT Tokens**: RS256/ES256 signature verification with JWKS endpoints
|
| 86 |
+
- **mTLS**: Mutual TLS authentication for service-to-service communication
|
| 87 |
+
|
| 88 |
+
## Integration Patterns
|
| 89 |
+
|
| 90 |
+
### Vector Database Integration
|
| 91 |
+
|
| 92 |
+
Aurora AI integrates with enterprise vector stores:
|
| 93 |
+
|
| 94 |
+
```yaml
|
| 95 |
+
vector_store:
|
| 96 |
+
provider: "pinecone" # or "weaviate", "qdrant", "milvus", "chromadb"
|
| 97 |
+
connection:
|
| 98 |
+
api_key: "${PINECONE_API_KEY}"
|
| 99 |
+
environment: "us-west1-gcp"
|
| 100 |
+
index_name: "aurora-embeddings"
|
| 101 |
+
embedding:
|
| 102 |
+
model: "text-embedding-3-large"
|
| 103 |
+
dimensions: 3072
|
| 104 |
+
batch_size: 100
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### Message Queue Integration
|
| 108 |
+
|
| 109 |
+
Asynchronous processing via message brokers:
|
| 110 |
+
|
| 111 |
+
- **RabbitMQ**: AMQP 0-9-1 protocol with exchange routing
|
| 112 |
+
- **Apache Kafka**: High-throughput event streaming with consumer groups
|
| 113 |
+
- **Redis Streams**: Lightweight pub/sub with consumer group support
|
| 114 |
+
- **AWS SQS/SNS**: Cloud-native queue and notification services
|
| 115 |
+
|
| 116 |
+
### Observability Stack
|
| 117 |
+
|
| 118 |
+
```yaml
|
| 119 |
+
observability:
|
| 120 |
+
metrics:
|
| 121 |
+
provider: "prometheus"
|
| 122 |
+
port: 9090
|
| 123 |
+
path: "/metrics"
|
| 124 |
+
tracing:
|
| 125 |
+
provider: "opentelemetry"
|
| 126 |
+
exporter: "otlp"
|
| 127 |
+
endpoint: "http://jaeger:4317"
|
| 128 |
+
sampling_rate: 0.1
|
| 129 |
+
logging:
|
| 130 |
+
level: "INFO"
|
| 131 |
+
format: "json"
|
| 132 |
+
output: "stdout"
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
## Memory Management
|
| 136 |
+
|
| 137 |
+
### Cache Configuration
|
| 138 |
+
|
| 139 |
+
```yaml
|
| 140 |
+
cache:
|
| 141 |
+
inference_cache:
|
| 142 |
+
enabled: true
|
| 143 |
+
backend: "redis"
|
| 144 |
+
ttl_seconds: 3600
|
| 145 |
+
max_size_mb: 2048
|
| 146 |
+
prompt_cache:
|
| 147 |
+
enabled: true
|
| 148 |
+
strategy: "semantic_hash"
|
| 149 |
+
similarity_threshold: 0.95
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
### Context Window Management
|
| 153 |
+
|
| 154 |
+
- **Sliding Window**: Maintains fixed-size context with FIFO eviction
|
| 155 |
+
- **Semantic Compression**: Entropy-based summarization for long contexts
|
| 156 |
+
- **Hierarchical Attention**: Multi-level context representation
|
| 157 |
+
- **External Memory**: Vector store-backed infinite context
|
| 158 |
+
|
| 159 |
+
## Distributed Deployment
|
| 160 |
+
|
| 161 |
+
### Kubernetes Configuration
|
| 162 |
+
|
| 163 |
+
```yaml
|
| 164 |
+
deployment:
|
| 165 |
+
replicas: 3
|
| 166 |
+
strategy: "RollingUpdate"
|
| 167 |
+
resources:
|
| 168 |
+
requests:
|
| 169 |
+
cpu: "4000m"
|
| 170 |
+
memory: "16Gi"
|
| 171 |
+
nvidia.com/gpu: "1"
|
| 172 |
+
limits:
|
| 173 |
+
cpu: "8000m"
|
| 174 |
+
memory: "32Gi"
|
| 175 |
+
nvidia.com/gpu: "1"
|
| 176 |
+
autoscaling:
|
| 177 |
+
enabled: true
|
| 178 |
+
min_replicas: 2
|
| 179 |
+
max_replicas: 10
|
| 180 |
+
target_cpu_utilization: 70
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
+
### Service Mesh Integration
|
| 184 |
+
|
| 185 |
+
Aurora AI supports Istio, Linkerd, and Consul service mesh architectures with:
|
| 186 |
+
|
| 187 |
+
- **Traffic management**: Weighted routing, circuit breaking, retries
|
| 188 |
+
- **Security**: mTLS encryption, authorization policies
|
| 189 |
+
- **Observability**: Distributed tracing, metrics aggregation
|
| 190 |
+
|
| 191 |
+
## Advanced Features
|
| 192 |
+
|
| 193 |
+
### Custom Plugin System
|
| 194 |
+
|
| 195 |
+
```yaml
|
| 196 |
+
plugins:
|
| 197 |
+
enabled: true
|
| 198 |
+
plugin_path: "/opt/aurora/plugins"
|
| 199 |
+
plugins:
|
| 200 |
+
- name: "custom_tokenizer"
|
| 201 |
+
module: "aurora.plugins.tokenizers"
|
| 202 |
+
config:
|
| 203 |
+
vocab_size: 65536
|
| 204 |
+
- name: "retrieval_augmentation"
|
| 205 |
+
module: "aurora.plugins.rag"
|
| 206 |
+
config:
|
| 207 |
+
top_k: 5
|
| 208 |
+
rerank: true
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
### Multi-Model Orchestration
|
| 212 |
+
|
| 213 |
+
Configure model routing and ensemble strategies:
|
| 214 |
+
|
| 215 |
+
- **Load-based routing**: Distribute requests based on model server load
|
| 216 |
+
- **A/B testing**: Traffic splitting for model evaluation
|
| 217 |
+
- **Cascade patterns**: Fallback to alternative models on failure
|
| 218 |
+
- **Ensemble voting**: Aggregate predictions from multiple models
|
| 219 |
+
|
| 220 |
+
## Security Hardening
|
| 221 |
+
|
| 222 |
+
- **Secrets management**: Integration with HashiCorp Vault, AWS Secrets Manager
|
| 223 |
+
- **Network policies**: Zero-trust networking with pod security policies
|
| 224 |
+
- **Input sanitization**: Prompt injection and jailbreak detection
|
| 225 |
+
- **Output filtering**: PII redaction and content safety validation
|
| 226 |
+
- **Audit logging**: Immutable logs with cryptographic verification
|
docs/overview.md
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Aurora AI Overview
|
| 2 |
+
|
| 3 |
+
This documentation provides an overview of Aurora AI, its architecture, and how it works.
|
| 4 |
+
|
| 5 |
+
## Architecture
|
| 6 |
+
|
| 7 |
+
AuroraAI follows a modular, plugin-based architecture that enables extensibility and scalability:
|
| 8 |
+
|
| 9 |
+
```mermaid
|
| 10 |
+
graph TD
|
| 11 |
+
A[User Interface] --> B[Core Engine]
|
| 12 |
+
B --> C[Plugin Manager]
|
| 13 |
+
C --> D[Knowledge Base Plugin]
|
| 14 |
+
C --> E[API Integration Plugin]
|
| 15 |
+
C --> F[Analytics Plugin]
|
| 16 |
+
B --> G[Context Manager]
|
| 17 |
+
G --> H[Session State]
|
| 18 |
+
G --> I[Memory Store]
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
## Supported Models
|
| 22 |
+
|
| 23 |
+
AuroraAI supports multiple AI model providers to suit different use cases and deployment requirements:
|
| 24 |
+
|
| 25 |
+
| Provider | Model | Context Window | Best For |
|
| 26 |
+
|----------|-------|----------------|----------|
|
| 27 |
+
| OpenAI | GPT-4 Turbo | 128K tokens | Complex reasoning, code generation |
|
| 28 |
+
| OpenAI | GPT-3.5 Turbo | 16K tokens | Fast responses, general queries |
|
| 29 |
+
| Anthropic | Claude 3 Opus | 200K tokens | Long document analysis |
|
| 30 |
+
| Anthropic | Claude 3 Sonnet | 200K tokens | Balanced performance |
|
| 31 |
+
| Local | Llama 2 | 4K tokens | Privacy-sensitive deployments |
|
| 32 |
+
|
| 33 |
+
## System Requirements
|
| 34 |
+
|
| 35 |
+
| Component | Minimum | Recommended |
|
| 36 |
+
|-----------|---------|-------------|
|
| 37 |
+
| CPU | 4 cores | 8+ cores |
|
| 38 |
+
| RAM | 8 GB | 16+ GB |
|
| 39 |
+
| Storage | 10 GB | 50+ GB (with local models) |
|
| 40 |
+
| Network | 10 Mbps | 100+ Mbps |
|
| 41 |
+
|
| 42 |
+
## Plugin Ecosystem
|
| 43 |
+
|
| 44 |
+
AuroraAI's functionality can be extended through plugins:
|
| 45 |
+
|
| 46 |
+
- **Documentation Plugins**: Confluence, SharePoint, Notion, GitBook
|
| 47 |
+
- **Development Tools**: GitHub, GitLab, Jira, Linear
|
| 48 |
+
- **Communication**: Slack, Microsoft Teams, Discord
|
| 49 |
+
- **Data Sources**: SQL databases, REST APIs, GraphQL endpoints
|
| 50 |
+
|
| 51 |
+
## Security & Compliance
|
| 52 |
+
|
| 53 |
+
- **Authentication**: OAuth 2.0, SAML, API keys
|
| 54 |
+
- **Encryption**: AES-256 at rest, TLS 1.3 in transit
|
| 55 |
+
- **Access Control**: Role-based permissions (RBAC)
|
| 56 |
+
- **Audit Logging**: Complete activity tracking
|
| 57 |
+
- **Compliance**: SOC 2 Type II, GDPR, HIPAA-ready
|
| 58 |
+
|
| 59 |
+
## Performance Metrics
|
| 60 |
+
|
| 61 |
+
Typical response times under standard load:
|
| 62 |
+
|
| 63 |
+
| Operation | Avg Response Time | Throughput |
|
| 64 |
+
|-----------|------------------|------------|
|
| 65 |
+
| Simple query | < 2 seconds | 100 req/min |
|
| 66 |
+
| Document search | < 3 seconds | 60 req/min |
|
| 67 |
+
| Code generation | 3-5 seconds | 40 req/min |
|
| 68 |
+
| Long document analysis | 8-15 seconds | 20 req/min |
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
## What Is AuroraAI?
|
| 72 |
+
AuroraAI is a lightweight, multimodal AI assistant designed to help teams write, search, analyze, and automate workflows across enterprise environments. It integrates seamlessly with documentation platforms, internal tools, and external APIs.
|
| 73 |
+
|
| 74 |
+
## Key Features
|
| 75 |
+
- Natural language understanding for queries, tasks, and workflows.
|
| 76 |
+
- Plugin-based architecture for integrating knowledge bases, project systems, and developer tools.
|
| 77 |
+
- Context-aware responses optimized for technical writing and software development.
|
| 78 |
+
- Secure by design, with role-based access and encrypted local config.
|
| 79 |
+
|
| 80 |
+
## Ideal Use Cases
|
| 81 |
+
- Generating and editing documentation.
|
| 82 |
+
- Summarizing tickets, requirements, and engineering discussions.
|
| 83 |
+
- Producing API specs, code snippets, troubleshooting trees, and templates.
|
| 84 |
+
- Providing real-time answers based on uploaded files or internal docs.
|
| 85 |
+
|
| 86 |
+
|
docs/prompting-guidelines.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AuroraAI Prompting Guidelines
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
## Best Practices
|
| 5 |
+
- **Be explicit** about the output format (tables, JSON, Markdown, etc.).
|
| 6 |
+
- **Provide context** such as code samples, user stories, or error logs.
|
| 7 |
+
- **Use constraints** like tone, word limits, or style rules.
|
| 8 |
+
- **Iterate** by refining instructions rather than restarting.
|
| 9 |
+
|
| 10 |
+
## Example Prompts
|
| 11 |
+
### Technical Writing Prompt
|
| 12 |
+
"Rewrite this API overview in a concise style and include an authentication section. Output in Markdown."
|
| 13 |
+
|
| 14 |
+
### Engineering Prompt
|
| 15 |
+
"Given this error log, diagnose the root cause and propose three fixes ranked by difficulty."
|
| 16 |
+
|
| 17 |
+
### Documentation Prompt
|
| 18 |
+
"Create troubleshooting steps for users who can't authenticate. Include a table of error codes."
|
| 19 |
+
|
| 20 |
+
## Anti-Patterns to Avoid
|
| 21 |
+
- Vague commands like "Fix this".
|
| 22 |
+
- Multi-task prompts that blend unrelated asks.
|
| 23 |
+
- Lack of constraints when requesting decisions.
|
docs/setup.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AuroraAI Setup Guide
|
| 2 |
+
|
| 3 |
+
## Prerequisites
|
| 4 |
+
- Node.js 18+ or Python 3.10+
|
| 5 |
+
- Access to the AuroraAI developer portal
|
| 6 |
+
- API key with Assistant-level permissions
|
| 7 |
+
- Optional: GitHub, Jira, or Confluence integration tokens
|
| 8 |
+
|
| 9 |
+
## Installation
|
| 10 |
+
```bash
|
| 11 |
+
yarn global add auroraai
|
| 12 |
+
# OR
|
| 13 |
+
pip install auroraai-cli
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
## Initialization
|
| 17 |
+
```bash
|
| 18 |
+
aurora init
|
| 19 |
+
```
|
| 20 |
+
This command:
|
| 21 |
+
- Creates a project directory
|
| 22 |
+
- Generates a `.aurora/config.json` file
|
| 23 |
+
- Prompts you to add your API key
|
| 24 |
+
|
| 25 |
+
## Configuration File Example
|
| 26 |
+
```json
|
| 27 |
+
{
|
| 28 |
+
"apiKey": "YOUR_API_KEY",
|
| 29 |
+
"model": "aurora-pro",
|
| 30 |
+
"contextWindow": 200000,
|
| 31 |
+
"integrations": {
|
| 32 |
+
"github": true,
|
| 33 |
+
"jira": false
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
## Connecting Knowledge Sources
|
| 39 |
+
```bash
|
| 40 |
+
aurora connect ./docs
|
| 41 |
+
```
|
| 42 |
+
This indexes your local documentation for retrieval.
|
docs/troubleshooting.md
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AuroraAI Troubleshooting Guide
|
| 2 |
+
|
| 3 |
+
## Common Issues & Fixes
|
| 4 |
+
|
| 5 |
+
### 1. Assistant Not Responding
|
| 6 |
+
**Symptoms:** Long delays or no output.
|
| 7 |
+
|
| 8 |
+
**Possible Causes & Fixes:**
|
| 9 |
+
- Network issue → Check VPN/firewall.
|
| 10 |
+
- Expired API key → Run `aurora auth refresh`.
|
| 11 |
+
- Large file uploads throttled → Compress or split files.
|
| 12 |
+
|
| 13 |
+
### 2. Incorrect or Irrelevant Answers
|
| 14 |
+
**Symptoms:** Hallucinations, outdated info.
|
| 15 |
+
|
| 16 |
+
**Fixes:**
|
| 17 |
+
- Re-index documentation: `aurora connect ./docs --force`.
|
| 18 |
+
## Troubleshooting Connection Issues
|
| 19 |
+
|
| 20 |
+
### Symptoms
|
| 21 |
+
- Failed API requests or timeouts
|
| 22 |
+
- Unable to connect to external services (GitHub, Jira, etc.)
|
| 23 |
+
- Network-related error messages
|
| 24 |
+
|
| 25 |
+
### Common Causes and Solutions
|
| 26 |
+
|
| 27 |
+
#### Network Connectivity
|
| 28 |
+
- **Check internet connection**: Verify your network is stable and accessible
|
| 29 |
+
- **Firewall settings**: Ensure Aurora is allowed through your firewall
|
| 30 |
+
- **Proxy configuration**: If behind a corporate proxy, configure proxy settings in Aurora's config file
|
| 31 |
+
|
| 32 |
+
#### DNS Resolution
|
| 33 |
+
- **Verify domain resolution**: Test connectivity to API endpoints using `ping` or `nslookup`
|
| 34 |
+
- **DNS cache**: Clear system DNS cache if experiencing intermittent issues
|
| 35 |
+
- **Alternative DNS**: Try switching to public DNS servers (e.g., 8.8.8.8)
|
| 36 |
+
|
| 37 |
+
#### SSL/TLS Errors
|
| 38 |
+
- **Certificate validation**: Update system certificates or use `--insecure` flag for testing only
|
| 39 |
+
- **Protocol version**: Ensure TLS 1.2+ is supported on your system
|
| 40 |
+
- **Corporate SSL inspection**: Request certificate authority (CA) certificate from IT if applicable
|
| 41 |
+
|
| 42 |
+
#### Timeout Issues
|
| 43 |
+
- **Increase timeout values**: Use `--timeout=60` flag to extend wait time
|
| 44 |
+
- **Check service status**: Verify third-party service availability at their status pages
|
| 45 |
+
- **Reduce payload size**: Break large requests into smaller batches
|
| 46 |
+
|
| 47 |
+
#### Debug Mode
|
| 48 |
+
Enable verbose logging to diagnose connection problems:
|
| 49 |
+
- Add explicit context to prompts.
|
| 50 |
+
- Enable strict retrieval with: `--retrieval=strict`.
|
| 51 |
+
|
| 52 |
+
### 3. Authentication Errors
|
| 53 |
+
| Code | Meaning | Fix |
|
| 54 |
+
|------|---------|------|
|
| 55 |
+
| 401 | Invalid token | Re-enter API key |
|
| 56 |
+
| 403 | Permission denied | Check role settings |
|
| 57 |
+
| 429 | Rate limit exceeded | Reduce request size or upgrade plan |
|
| 58 |
+
|
| 59 |
+
### 4. Integration Sync Failures
|
| 60 |
+
**Fixes:**
|
| 61 |
+
- Validate GitHub/Jira tokens.
|
| 62 |
+
- Re-auth via: `aurora integrate github --reset`.
|
| 63 |
+
- Clear cache: `aurora cache purge`.
|
| 64 |
+
|
| 65 |
+
### 5. Advanced Connection Diagnostics
|
| 66 |
+
|
| 67 |
+
#### Network Layer Analysis
|
| 68 |
+
**TCP/IP Stack Verification:**
|
| 69 |
+
- Run `netstat -an | grep ESTABLISHED` to check active connections
|
| 70 |
+
- Monitor packet loss: `ping -c 100 api.aurora.ai` and analyze statistics
|
| 71 |
+
- Use `traceroute api.aurora.ai` to identify routing bottlenecks
|
| 72 |
+
- Check MTU settings: `ping -M do -s 1472 api.aurora.ai` to test fragmentation
|
| 73 |
+
|
| 74 |
+
**Port Availability:**
|
| 75 |
+
- Verify required ports are open: 443 (HTTPS), 80 (HTTP fallback)
|
| 76 |
+
- Test with `telnet api.aurora.ai 443` or `nc -zv api.aurora.ai 443`
|
| 77 |
+
- Check for port conflicts: `lsof -i :443` (Linux/macOS) or `netstat -ano | findstr :443` (Windows)
|
| 78 |
+
|
| 79 |
+
#### Protocol-Level Debugging
|
| 80 |
+
**HTTP/HTTPS Traffic Inspection:**
|
| 81 |
+
- Capture traffic with `tcpdump -i any -w aurora.pcap host api.aurora.ai`
|
| 82 |
+
- Analyze with Wireshark to inspect TLS handshakes and HTTP headers
|
| 83 |
+
- Use `curl -vvv https://api.aurora.ai/health` for detailed handshake output
|
| 84 |
+
- Enable request/response logging: `export AURORA_DEBUG_HTTP=1`
|
| 85 |
+
|
| 86 |
+
**TLS Handshake Failures:**
|
| 87 |
+
- Check cipher suite compatibility: `openssl s_client -connect api.aurora.ai:443 -tls1_2`
|
| 88 |
+
- Verify certificate chain: `openssl s_client -showcerts -connect api.aurora.ai:443`
|
| 89 |
+
- Test SNI (Server Name Indication): `openssl s_client -servername api.aurora.ai -connect api.aurora.ai:443`
|
| 90 |
+
- Inspect client certificate requirements if mutual TLS is enabled
|
| 91 |
+
|
| 92 |
+
#### DNS Deep Dive
|
| 93 |
+
**Resolution Path Analysis:**
|
| 94 |
+
- Query authoritative nameservers: `dig @8.8.8.8 api.aurora.ai +trace`
|
| 95 |
+
- Check DNS response times: `dig api.aurora.ai | grep "Query time"`
|
| 96 |
+
- Verify DNSSEC validation: `dig api.aurora.ai +dnssec`
|
| 97 |
+
- Test DNS-over-HTTPS: Configure DoH provider in system settings
|
| 98 |
+
|
| 99 |
+
**DNS Cache Management:**
|
| 100 |
+
- Linux: `sudo systemd-resolve --flush-caches` or `sudo service nscd restart`
|
| 101 |
+
- macOS: `sudo dscacheutil -flushcache; sudo killall -HUP mDNSResponder`
|
| 102 |
+
- Windows: `ipconfig /flushdns`
|
| 103 |
+
- Verify `/etc/hosts` (Unix) or `C:\Windows\System32\drivers\etc\hosts` (Windows) for override entries
|
| 104 |
+
|
| 105 |
+
#### Proxy and VPN Troubleshooting
|
| 106 |
+
**Proxy Configuration Validation:**
|
| 107 |
+
- Check environment variables: `echo $HTTP_PROXY $HTTPS_PROXY $NO_PROXY`
|
| 108 |
+
- Test proxy authentication: `curl -x http://proxy.corp.com:8080 --proxy-user user:pass https://api.aurora.ai`
|
| 109 |
+
- Configure in Aurora config: `aurora config set proxy.url http://proxy.corp.com:8080`
|
| 110 |
+
- Bypass proxy for testing: `export NO_PROXY=api.aurora.ai`
|
| 111 |
+
|
| 112 |
+
**VPN-Specific Issues:**
|
| 113 |
+
- Check split-tunneling configuration
|
| 114 |
+
- Verify VPN MTU: `ip link show` and adjust if < 1500
|
| 115 |
+
- Test with VPN disabled to isolate issue
|
| 116 |
+
- Check for IPv6 leakage: `curl -6 https://api.aurora.ai` vs `curl -4 https://api.aurora.ai`
|
| 117 |
+
|
| 118 |
+
#### Application-Level Diagnostics
|
| 119 |
+
**Aurora Internal Logs:**
|
| 120 |
+
- Enable maximum verbosity: `aurora --log-level=trace <command>`
|
| 121 |
+
- Tail log file in real-time: `tail -f ~/.aurora/logs/aurora.log`
|
| 122 |
+
- Filter connection events: `grep -i "connection\|socket\|timeout" ~/.aurora/logs/aurora.log`
|
| 123 |
+
- Structured JSON logging: `aurora --log-format=json <command> | jq '.level="error"'`
|
| 124 |
+
|
| 125 |
+
**Request/Response Inspection:**
|
| 126 |
+
- Capture full HTTP exchange: `AURORA_DUMP_REQUESTS=1 aurora <command>`
|
| 127 |
+
- Measure round-trip time: `time aurora api healthcheck`
|
| 128 |
+
- Test with minimal request: `aurora api raw --method GET --endpoint /v1/health`
|
| 129 |
+
- Verify request headers: Check `User-Agent`, `Authorization`, `Content-Type` in debug output
|
| 130 |
+
|
| 131 |
+
#### System Resource Constraints
|
| 132 |
+
**Connection Pool Exhaustion:**
|
| 133 |
+
- Check open file descriptors: `ulimit -n` and increase if needed
|
| 134 |
+
- Monitor active sockets: `lsof -p $(pgrep aurora) | grep TCP`
|
| 135 |
+
- Review connection pool settings: `aurora config get http.max_connections`
|
| 136 |
+
- Adjust keep-alive timeout: `aurora config set http.keepalive_timeout 30`
|
| 137 |
+
|
| 138 |
+
**Memory and CPU Impact:**
|
| 139 |
+
- Profile resource usage: `top -p $(pgrep aurora)` or `htop`
|
| 140 |
+
- Check for memory leaks during long-running operations
|
| 141 |
+
- Monitor thread count: `ps -eLf | grep aurora | wc -l`
|
| 142 |
+
- Enable profiling: `aurora --profile=cpu <command>`
|
| 143 |
+
|
| 144 |
+
#### Enterprise Environment Considerations
|
| 145 |
+
**Corporate Security Appliances:**
|
| 146 |
+
- SSL/TLS Inspection: Export CA certificate and install system-wide
|
| 147 |
+
- Web Application Firewalls: Whitelist Aurora user-agent string
|
| 148 |
+
- DLP (Data Loss Prevention): Configure exceptions for Aurora traffic
|
| 149 |
+
- CASB (Cloud Access Security Broker): Add Aurora domains to allowlist
|
| 150 |
+
|
| 151 |
+
**Authentication Mechanisms:**
|
| 152 |
+
- NTLM proxy authentication: `aurora config set proxy.auth ntlm`
|
| 153 |
+
- Kerberos/SPNEGO: Ensure valid ticket with `klist`
|
| 154 |
+
- Client certificates: Specify with `--client-cert=/path/to/cert.pem --client-key=/path/to/key.pem`
|
| 155 |
+
- OAuth token refresh: `aurora auth token --refresh`
|
| 156 |
+
|
| 157 |
+
#### Advanced Debugging Techniques
|
| 158 |
+
**Packet Capture and Analysis:**
|
| 159 |
+
```bash
|
| 160 |
+
# Capture on specific interface
|
| 161 |
+
sudo tcpdump -i eth0 -s 0 -w aurora_debug.pcap 'host api.aurora.ai'
|
| 162 |
+
|
| 163 |
+
# Filter by port and decode HTTP
|
| 164 |
+
sudo tcpdump -i any -A 'tcp port 443 and host api.aurora.ai'
|
| 165 |
+
|
| 166 |
+
# Real-time monitoring with timestamps
|
| 167 |
+
sudo tcpdump -i any -tttt 'host api.aurora.ai'
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
**SystemTap/eBPF Tracing (Linux):**
|
| 171 |
+
- Trace system calls: `strace -f -e trace=network aurora <command>`
|
| 172 |
+
- Monitor DNS queries: `sudo tcpdump -i any port 53`
|
| 173 |
+
- Track connection states: Use `ss -tan state established`
|
| 174 |
+
|
| 175 |
+
**Performance Profiling:**
|
| 176 |
+
- Generate flame graph: `aurora --profile=cpu --profile-output=profile.pb.gz <command>`
|
| 177 |
+
- Analyze with pprof: `go tool pprof -http=:8080 profile.pb.gz`
|
| 178 |
+
- Network timing breakdown: Use browser DevTools Network tab for web UI
|
| 179 |
+
|
| 180 |
+
#### Configuration File Troubleshooting
|
| 181 |
+
**Verify Configuration Syntax:**
|
| 182 |
+
```bash
|
| 183 |
+
# Validate config file
|
| 184 |
+
aurora config validate
|
| 185 |
+
|
| 186 |
+
# Show effective configuration
|
| 187 |
+
aurora config show --resolved
|
| 188 |
+
|
| 189 |
+
# Reset to defaults
|
| 190 |
+
aurora config reset --confirm
|
| 191 |
+
|
| 192 |
+
# Override specific setting
|
| 193 |
+
aurora --config=/tmp/test.yaml <command>
|
| 194 |
+
```
|
| 195 |
+
|
| 196 |
+
**Common Configuration Issues:**
|
| 197 |
+
- Incorrect endpoint URLs: Verify `api.base_url` setting
|
| 198 |
+
- Timeout values too low: Increase `http.timeout` and `http.read_timeout`
|
| 199 |
+
- Retry logic disabled: Enable with `http.retry.enabled=true`
|
| 200 |
+
- Connection limits: Adjust `http.max_connections_per_host`
|
| 201 |
+
|
| 202 |
+
#### External Service Integration Testing
|
| 203 |
+
**GitHub Connectivity:**
|
| 204 |
+
```bash
|
| 205 |
+
# Test GitHub API access
|
| 206 |
+
curl -H "Authorization: token YOUR_TOKEN" https://api.github.com/user
|
| 207 |
+
|
| 208 |
+
# Verify webhook endpoint
|
| 209 |
+
curl -X POST https://api.aurora.ai/webhooks/github/test
|
| 210 |
+
|
| 211 |
+
# Check SSH access
|
| 212 |
+
ssh -T [email protected]
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
**Jira Connectivity:**
|
| 216 |
+
```bash
|
| 217 |
+
# Test Jira REST API
|
| 218 |
+
curl -u [email protected]:api_token https://your-domain.atlassian.net/rest/api/3/myself
|
| 219 |
+
|
| 220 |
+
# Verify webhook delivery
|
| 221 |
+
aurora integrate jira test-webhook --project KEY
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
#### Logging and Monitoring
|
| 225 |
+
**Continuous Monitoring Setup:**
|
| 226 |
+
- Configure log aggregation: Ship logs to ELK, Splunk, or Datadog
|
| 227 |
+
- Set up alerts for connection failures
|
| 228 |
+
- Monitor error rates: `grep -c "connection refused\|timeout" aurora.log`
|
| 229 |
+
- Track success rates over time
|
| 230 |
+
|
| 231 |
+
**Metrics Collection:**
|
| 232 |
+
- Enable Prometheus metrics: `aurora serve --metrics-port=9090`
|
| 233 |
+
- Export connection statistics: `aurora metrics export --format=json`
|
| 234 |
+
- Dashboard visualization: Import Aurora Grafana dashboard
|
| 235 |
+
|
| 236 |
+
#### Emergency Workarounds
|
| 237 |
+
**Temporary Fixes:**
|
| 238 |
+
- Use alternative endpoint: `aurora --api-url=https://backup.aurora.ai`
|
| 239 |
+
- Offline mode (if supported): `aurora --offline <command>`
|
| 240 |
+
- Fallback to local processing: `aurora --no-remote <command>`
|
| 241 |
+
- Manual API calls: Use `curl` with stored authentication token
|
| 242 |
+
|
| 243 |
+
**Escalation Path:**
|
| 244 |
+
1. Collect diagnostic bundle: `aurora diagnostic collect --output=aurora-diag.zip`
|
| 245 |
+
2. Include system information: `aurora version --verbose`
|
| 246 |
+
3. Attach relevant logs and packet captures
|
| 247 |
+
4. Submit to support with issue details
|
document_intelligence.py
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Document Intelligence Module for Advanced Text Analysis and Processing
|
| 3 |
+
"""
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import List, Dict, Any
|
| 6 |
+
import re
|
| 7 |
+
import math
|
| 8 |
+
from collections import Counter
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class DocumentIntelligence:
|
| 12 |
+
"""Advanced document intelligence for smart analysis and summarization."""
|
| 13 |
+
|
| 14 |
+
def __init__(self, docs_root: Path):
|
| 15 |
+
self.docs_root = docs_root
|
| 16 |
+
|
| 17 |
+
def generate_smart_summary(self, content: str, summary_type: str = "medium") -> str:
|
| 18 |
+
"""Generate an intelligent summary based on content analysis."""
|
| 19 |
+
# Handle PDF page markers
|
| 20 |
+
content = self._clean_pdf_content(content)
|
| 21 |
+
|
| 22 |
+
sentences = self._split_into_sentences(content)
|
| 23 |
+
|
| 24 |
+
if not sentences:
|
| 25 |
+
return "No content available for summarization."
|
| 26 |
+
|
| 27 |
+
# Score sentences based on multiple factors
|
| 28 |
+
sentence_scores = {}
|
| 29 |
+
|
| 30 |
+
# Factor 1: Word frequency
|
| 31 |
+
words = self._extract_words(content)
|
| 32 |
+
word_freq = Counter(words)
|
| 33 |
+
|
| 34 |
+
# Factor 2: Position (early sentences often important)
|
| 35 |
+
# Factor 3: Length (moderate length sentences preferred)
|
| 36 |
+
# Factor 4: Keywords (technical terms, action words)
|
| 37 |
+
|
| 38 |
+
for i, sentence in enumerate(sentences):
|
| 39 |
+
score = 0
|
| 40 |
+
sentence_words = self._extract_words(sentence)
|
| 41 |
+
|
| 42 |
+
# Word frequency score
|
| 43 |
+
for word in sentence_words:
|
| 44 |
+
score += word_freq.get(word, 0)
|
| 45 |
+
|
| 46 |
+
# Position score (first and last sentences get bonus)
|
| 47 |
+
if i < 3:
|
| 48 |
+
score += 5
|
| 49 |
+
elif i >= len(sentences) - 2:
|
| 50 |
+
score += 3
|
| 51 |
+
|
| 52 |
+
# Length score (prefer moderate length)
|
| 53 |
+
word_count = len(sentence_words)
|
| 54 |
+
if 10 <= word_count <= 25:
|
| 55 |
+
score += 3
|
| 56 |
+
elif 5 <= word_count <= 35:
|
| 57 |
+
score += 1
|
| 58 |
+
|
| 59 |
+
# Keyword bonus
|
| 60 |
+
keywords = ['important', 'key', 'main', 'primary', 'essential',
|
| 61 |
+
'note', 'must', 'should', 'required', 'configure',
|
| 62 |
+
'setup', 'install', 'create', 'build']
|
| 63 |
+
for keyword in keywords:
|
| 64 |
+
if keyword in sentence.lower():
|
| 65 |
+
score += 2
|
| 66 |
+
|
| 67 |
+
sentence_scores[i] = score / max(len(sentence_words), 1)
|
| 68 |
+
|
| 69 |
+
# Select top sentences based on summary type
|
| 70 |
+
if summary_type == "short":
|
| 71 |
+
top_count = min(3, len(sentences))
|
| 72 |
+
elif summary_type == "long":
|
| 73 |
+
top_count = min(10, len(sentences))
|
| 74 |
+
else: # medium
|
| 75 |
+
top_count = min(6, len(sentences))
|
| 76 |
+
|
| 77 |
+
# Get top scoring sentences, maintaining order
|
| 78 |
+
top_sentence_indices = sorted(
|
| 79 |
+
sorted(sentence_scores.items(), key=lambda x: x[1], reverse=True)[:top_count],
|
| 80 |
+
key=lambda x: x[0]
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
summary_sentences = [sentences[i] for i, _ in top_sentence_indices]
|
| 84 |
+
return ' '.join(summary_sentences)
|
| 85 |
+
|
| 86 |
+
def extract_key_concepts(self, content: str, min_frequency: int = 2) -> List[Dict[str, Any]]:
|
| 87 |
+
"""Extract key concepts and terms from content."""
|
| 88 |
+
# Clean PDF content for better concept extraction
|
| 89 |
+
content = self._clean_pdf_content(content)
|
| 90 |
+
|
| 91 |
+
concepts = []
|
| 92 |
+
|
| 93 |
+
# Extract technical terms (words in backticks)
|
| 94 |
+
tech_terms = re.findall(r'`([^`]+)`', content)
|
| 95 |
+
tech_term_freq = Counter(tech_terms)
|
| 96 |
+
|
| 97 |
+
for term, freq in tech_term_freq.items():
|
| 98 |
+
if freq >= min_frequency:
|
| 99 |
+
concepts.append({
|
| 100 |
+
'concept': term,
|
| 101 |
+
'frequency': freq,
|
| 102 |
+
'type': 'technical_term'
|
| 103 |
+
})
|
| 104 |
+
|
| 105 |
+
# Extract important phrases (words in bold)
|
| 106 |
+
bold_terms = re.findall(r'\*\*([^*]+)\*\*', content)
|
| 107 |
+
bold_term_freq = Counter(bold_terms)
|
| 108 |
+
|
| 109 |
+
for term, freq in bold_term_freq.items():
|
| 110 |
+
if freq >= min_frequency:
|
| 111 |
+
concepts.append({
|
| 112 |
+
'concept': term,
|
| 113 |
+
'frequency': freq,
|
| 114 |
+
'type': 'emphasized_term'
|
| 115 |
+
})
|
| 116 |
+
|
| 117 |
+
# Extract capitalized words (potential proper nouns/concepts)
|
| 118 |
+
words = re.findall(r'\b[A-Z][a-z]+\b', content)
|
| 119 |
+
cap_word_freq = Counter(words)
|
| 120 |
+
|
| 121 |
+
for word, freq in cap_word_freq.items():
|
| 122 |
+
if freq >= min_frequency and len(word) > 3:
|
| 123 |
+
concepts.append({
|
| 124 |
+
'concept': word,
|
| 125 |
+
'frequency': freq,
|
| 126 |
+
'type': 'proper_noun'
|
| 127 |
+
})
|
| 128 |
+
|
| 129 |
+
# Sort by frequency and return top concepts
|
| 130 |
+
concepts.sort(key=lambda x: x['frequency'], reverse=True)
|
| 131 |
+
return concepts[:20]
|
| 132 |
+
|
| 133 |
+
def analyze_readability(self, content: str) -> Dict[str, Any]:
|
| 134 |
+
"""Analyze content readability using various metrics."""
|
| 135 |
+
# Clean PDF content for better analysis
|
| 136 |
+
content = self._clean_pdf_content(content)
|
| 137 |
+
|
| 138 |
+
sentences = self._split_into_sentences(content)
|
| 139 |
+
words = self._extract_words(content)
|
| 140 |
+
|
| 141 |
+
if not sentences or not words:
|
| 142 |
+
return {"flesch_score": 0, "grade_level": 0, "complexity": "unknown"}
|
| 143 |
+
|
| 144 |
+
# Basic counts
|
| 145 |
+
sentence_count = len(sentences)
|
| 146 |
+
word_count = len(words)
|
| 147 |
+
syllable_count = sum(self._count_syllables(word) for word in words)
|
| 148 |
+
|
| 149 |
+
# Average sentence length
|
| 150 |
+
avg_sentence_length = word_count / sentence_count
|
| 151 |
+
|
| 152 |
+
# Average syllables per word
|
| 153 |
+
avg_syllables = syllable_count / word_count if word_count > 0 else 0
|
| 154 |
+
|
| 155 |
+
# Flesch Reading Ease Score
|
| 156 |
+
flesch_score = 206.835 - (1.015 * avg_sentence_length) - (84.6 * avg_syllables)
|
| 157 |
+
flesch_score = max(0, min(100, flesch_score)) # Clamp to 0-100
|
| 158 |
+
|
| 159 |
+
# Grade level estimation
|
| 160 |
+
grade_level = 0.39 * avg_sentence_length + 11.8 * avg_syllables - 15.59
|
| 161 |
+
grade_level = max(1, grade_level)
|
| 162 |
+
|
| 163 |
+
# Complexity assessment
|
| 164 |
+
if flesch_score >= 70:
|
| 165 |
+
complexity = "easy"
|
| 166 |
+
elif flesch_score >= 50:
|
| 167 |
+
complexity = "moderate"
|
| 168 |
+
elif flesch_score >= 30:
|
| 169 |
+
complexity = "difficult"
|
| 170 |
+
else:
|
| 171 |
+
complexity = "very difficult"
|
| 172 |
+
|
| 173 |
+
return {
|
| 174 |
+
"flesch_score": round(flesch_score, 1),
|
| 175 |
+
"grade_level": round(grade_level, 1),
|
| 176 |
+
"complexity": complexity,
|
| 177 |
+
"avg_sentence_length": round(avg_sentence_length, 1),
|
| 178 |
+
"avg_syllables_per_word": round(avg_syllables, 2),
|
| 179 |
+
"total_sentences": sentence_count,
|
| 180 |
+
"total_words": word_count
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
def extract_questions_and_answers(self, content: str) -> List[Dict[str, str]]:
|
| 184 |
+
"""Extract Q&A pairs from content."""
|
| 185 |
+
qa_pairs = []
|
| 186 |
+
|
| 187 |
+
# Look for FAQ sections
|
| 188 |
+
sections = self._extract_sections(content)
|
| 189 |
+
for section in sections:
|
| 190 |
+
if any(keyword in section['title'].lower() for keyword in ['faq', 'question', 'q&a', 'troubleshoot']):
|
| 191 |
+
pairs = self._extract_qa_from_section(section['content'])
|
| 192 |
+
qa_pairs.extend(pairs)
|
| 193 |
+
|
| 194 |
+
# Look for question patterns throughout the text
|
| 195 |
+
question_patterns = [
|
| 196 |
+
r'(?:Q:|Question:|Q\d+:)\s*([^?]+\?)\s*(?:A:|Answer:)?\s*([^Q\n]+)',
|
| 197 |
+
r'(?:^|\n)([^.!?\n]*\?)\s*\n([^?\n]+)',
|
| 198 |
+
r'How (?:do|to|can) ([^?]+\?)\s*([^?\n]+)'
|
| 199 |
+
]
|
| 200 |
+
|
| 201 |
+
for pattern in question_patterns:
|
| 202 |
+
matches = re.findall(pattern, content, re.MULTILINE | re.IGNORECASE)
|
| 203 |
+
for match in matches:
|
| 204 |
+
if len(match) == 2:
|
| 205 |
+
question, answer = match
|
| 206 |
+
qa_pairs.append({
|
| 207 |
+
"question": question.strip(),
|
| 208 |
+
"answer": answer.strip()[:300], # Limit answer length
|
| 209 |
+
"type": "extracted"
|
| 210 |
+
})
|
| 211 |
+
|
| 212 |
+
return qa_pairs[:15] # Return top 15 Q&A pairs
|
| 213 |
+
|
| 214 |
+
def find_related_content(self, query: str, doc_paths: List[Path], max_results: int = 5) -> List[Dict[str, Any]]:
|
| 215 |
+
"""Find documents related to a query using TF-IDF-like scoring."""
|
| 216 |
+
query_words = set(self._extract_words(query.lower()))
|
| 217 |
+
results = []
|
| 218 |
+
|
| 219 |
+
for path in doc_paths:
|
| 220 |
+
try:
|
| 221 |
+
content = path.read_text(encoding='utf-8', errors='ignore')
|
| 222 |
+
content_words = self._extract_words(content.lower())
|
| 223 |
+
|
| 224 |
+
if not content_words:
|
| 225 |
+
continue
|
| 226 |
+
|
| 227 |
+
# Calculate similarity score
|
| 228 |
+
word_freq = Counter(content_words)
|
| 229 |
+
score = 0
|
| 230 |
+
|
| 231 |
+
for query_word in query_words:
|
| 232 |
+
if query_word in word_freq:
|
| 233 |
+
# TF-IDF like scoring
|
| 234 |
+
tf = word_freq[query_word] / len(content_words)
|
| 235 |
+
score += tf * len(query_word) # Longer words get more weight
|
| 236 |
+
|
| 237 |
+
if score > 0:
|
| 238 |
+
# Normalize by document length
|
| 239 |
+
normalized_score = score / math.log(len(content_words) + 1)
|
| 240 |
+
|
| 241 |
+
# Get context snippet
|
| 242 |
+
snippet = self._extract_snippet(content, query_words)
|
| 243 |
+
|
| 244 |
+
results.append({
|
| 245 |
+
'path': str(path.relative_to(self.docs_root)),
|
| 246 |
+
'relevance_score': normalized_score,
|
| 247 |
+
'snippet': snippet,
|
| 248 |
+
'word_count': len(content_words)
|
| 249 |
+
})
|
| 250 |
+
|
| 251 |
+
except Exception:
|
| 252 |
+
continue
|
| 253 |
+
|
| 254 |
+
# Sort by relevance and return top results
|
| 255 |
+
results.sort(key=lambda x: x['relevance_score'], reverse=True)
|
| 256 |
+
return results[:max_results]
|
| 257 |
+
|
| 258 |
+
def _split_into_sentences(self, content: str) -> List[str]:
|
| 259 |
+
"""Split content into sentences."""
|
| 260 |
+
# Simple sentence splitting
|
| 261 |
+
sentences = re.split(r'[.!?]+', content)
|
| 262 |
+
return [s.strip() for s in sentences if s.strip() and len(s.strip()) > 10]
|
| 263 |
+
|
| 264 |
+
def _extract_words(self, text: str) -> List[str]:
|
| 265 |
+
"""Extract words from text."""
|
| 266 |
+
words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
|
| 267 |
+
# Filter out common stop words
|
| 268 |
+
stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those', 'it', 'its', 'they', 'them', 'their'}
|
| 269 |
+
return [word for word in words if word not in stop_words and len(word) > 2]
|
| 270 |
+
|
| 271 |
+
def _count_syllables(self, word: str) -> int:
|
| 272 |
+
"""Estimate syllable count for a word."""
|
| 273 |
+
word = word.lower()
|
| 274 |
+
if len(word) <= 3:
|
| 275 |
+
return 1
|
| 276 |
+
|
| 277 |
+
vowels = 'aeiouy'
|
| 278 |
+
syllable_count = 0
|
| 279 |
+
prev_was_vowel = False
|
| 280 |
+
|
| 281 |
+
for char in word:
|
| 282 |
+
if char in vowels:
|
| 283 |
+
if not prev_was_vowel:
|
| 284 |
+
syllable_count += 1
|
| 285 |
+
prev_was_vowel = True
|
| 286 |
+
else:
|
| 287 |
+
prev_was_vowel = False
|
| 288 |
+
|
| 289 |
+
# Handle silent e
|
| 290 |
+
if word.endswith('e') and syllable_count > 1:
|
| 291 |
+
syllable_count -= 1
|
| 292 |
+
|
| 293 |
+
return max(1, syllable_count)
|
| 294 |
+
|
| 295 |
+
def _extract_sections(self, content: str) -> List[Dict[str, str]]:
|
| 296 |
+
"""Extract sections from markdown content."""
|
| 297 |
+
sections = []
|
| 298 |
+
lines = content.split('\n')
|
| 299 |
+
current_section = None
|
| 300 |
+
current_content = []
|
| 301 |
+
|
| 302 |
+
for line in lines:
|
| 303 |
+
if line.strip().startswith('#'):
|
| 304 |
+
if current_section:
|
| 305 |
+
sections.append({
|
| 306 |
+
'title': current_section,
|
| 307 |
+
'content': '\n'.join(current_content).strip()
|
| 308 |
+
})
|
| 309 |
+
current_section = line.strip()
|
| 310 |
+
current_content = []
|
| 311 |
+
else:
|
| 312 |
+
current_content.append(line)
|
| 313 |
+
|
| 314 |
+
if current_section:
|
| 315 |
+
sections.append({
|
| 316 |
+
'title': current_section,
|
| 317 |
+
'content': '\n'.join(current_content).strip()
|
| 318 |
+
})
|
| 319 |
+
|
| 320 |
+
return sections
|
| 321 |
+
|
| 322 |
+
def _extract_qa_from_section(self, section_content: str) -> List[Dict[str, str]]:
|
| 323 |
+
"""Extract Q&A pairs from a section."""
|
| 324 |
+
qa_pairs = []
|
| 325 |
+
lines = section_content.split('\n')
|
| 326 |
+
current_question = None
|
| 327 |
+
current_answer = []
|
| 328 |
+
|
| 329 |
+
for line in lines:
|
| 330 |
+
line = line.strip()
|
| 331 |
+
if line.endswith('?') and not current_question:
|
| 332 |
+
current_question = line
|
| 333 |
+
elif current_question and line and not line.endswith('?'):
|
| 334 |
+
current_answer.append(line)
|
| 335 |
+
elif current_question and (line.endswith('?') or not line):
|
| 336 |
+
if current_answer:
|
| 337 |
+
qa_pairs.append({
|
| 338 |
+
"question": current_question,
|
| 339 |
+
"answer": ' '.join(current_answer),
|
| 340 |
+
"type": "faq"
|
| 341 |
+
})
|
| 342 |
+
current_question = line if line.endswith('?') else None
|
| 343 |
+
current_answer = []
|
| 344 |
+
|
| 345 |
+
# Don't forget the last Q&A pair
|
| 346 |
+
if current_question and current_answer:
|
| 347 |
+
qa_pairs.append({
|
| 348 |
+
"question": current_question,
|
| 349 |
+
"answer": ' '.join(current_answer),
|
| 350 |
+
"type": "faq"
|
| 351 |
+
})
|
| 352 |
+
|
| 353 |
+
return qa_pairs
|
| 354 |
+
|
| 355 |
+
def _extract_snippet(self, content: str, query_words: set, snippet_length: int = 150) -> str:
|
| 356 |
+
"""Extract a relevant snippet containing query words."""
|
| 357 |
+
content_lower = content.lower()
|
| 358 |
+
|
| 359 |
+
# Find the first occurrence of any query word
|
| 360 |
+
first_pos = len(content)
|
| 361 |
+
for word in query_words:
|
| 362 |
+
pos = content_lower.find(word)
|
| 363 |
+
if pos != -1:
|
| 364 |
+
first_pos = min(first_pos, pos)
|
| 365 |
+
|
| 366 |
+
if first_pos == len(content):
|
| 367 |
+
# No query words found, return beginning
|
| 368 |
+
return content[:snippet_length] + "..." if len(content) > snippet_length else content
|
| 369 |
+
|
| 370 |
+
# Extract snippet around the found position
|
| 371 |
+
start = max(0, first_pos - snippet_length // 2)
|
| 372 |
+
end = min(len(content), start + snippet_length)
|
| 373 |
+
snippet = content[start:end]
|
| 374 |
+
|
| 375 |
+
if start > 0:
|
| 376 |
+
snippet = "..." + snippet
|
| 377 |
+
if end < len(content):
|
| 378 |
+
snippet = snippet + "..."
|
| 379 |
+
|
| 380 |
+
return snippet.replace('\n', ' ')
|
| 381 |
+
|
| 382 |
+
def _clean_pdf_content(self, content: str) -> str:
|
| 383 |
+
"""Clean PDF content by removing page markers and fixing formatting."""
|
| 384 |
+
import re
|
| 385 |
+
|
| 386 |
+
# Remove page markers like "--- Page 1 ---"
|
| 387 |
+
content = re.sub(r'\n--- Page \d+ ---\n', '\n\n', content)
|
| 388 |
+
content = re.sub(r'\n--- Page \d+ \(Error reading:.*?\) ---\n', '\n\n', content)
|
| 389 |
+
|
| 390 |
+
# Fix common PDF extraction issues
|
| 391 |
+
# Remove excessive whitespace
|
| 392 |
+
content = re.sub(r'\n\s*\n\s*\n+', '\n\n', content)
|
| 393 |
+
|
| 394 |
+
# Fix broken words (common in PDF extraction)
|
| 395 |
+
content = re.sub(r'(\w)-\s*\n\s*(\w)', r'\1\2', content)
|
| 396 |
+
|
| 397 |
+
# Fix spacing issues
|
| 398 |
+
content = re.sub(r'([a-z])([A-Z])', r'\1 \2', content)
|
| 399 |
+
|
| 400 |
+
# Remove extra spaces
|
| 401 |
+
content = re.sub(r' +', ' ', content)
|
| 402 |
+
|
| 403 |
+
return content.strip()
|
guides/FIXES_APPLIED.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Document Summarization Issues - Fixed
|
| 2 |
+
|
| 3 |
+
## Problem Summary
|
| 4 |
+
The docs-navigator agent was having issues with document summarization and content extraction. When users asked questions about content indirectly mentioned in documents, the agent would show error messages like:
|
| 5 |
+
|
| 6 |
+
- "Hmm, it looks like there was an issue summarizing that document"
|
| 7 |
+
- "still having issues with the summarization"
|
| 8 |
+
- "Oops, looks like I don't have a tool to directly extract a specific section"
|
| 9 |
+
|
| 10 |
+
## Root Cause Analysis
|
| 11 |
+
|
| 12 |
+
The issues were in the `server_docs.py` file, specifically in these functions:
|
| 13 |
+
|
| 14 |
+
1. **`_generate_overview_summary`**: Was only taking the first 3 sections and limiting to 30 words each, causing truncated/incomplete summaries
|
| 15 |
+
2. **`_extract_key_points`**: Was not properly processing bullet points from sections
|
| 16 |
+
3. **`_generate_detailed_summary`**: Was limiting content to 200 characters per section
|
| 17 |
+
4. **Missing functionality**: No way to extract specific sections by name
|
| 18 |
+
|
| 19 |
+
## Fixes Implemented
|
| 20 |
+
|
| 21 |
+
### 1. Improved Overview Summary Generation
|
| 22 |
+
```python
|
| 23 |
+
def _generate_overview_summary(content: str, sections: List[Dict[str, str]]) -> str:
|
| 24 |
+
"""Generate a concise overview summary."""
|
| 25 |
+
# Now processes ALL meaningful sections (skip empty ones)
|
| 26 |
+
# Increased word limit to 50 words per section
|
| 27 |
+
# Added fallback handling for edge cases
|
| 28 |
+
# Limits to 5 sections to avoid excessive text
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### 2. Enhanced Key Points Extraction
|
| 32 |
+
```python
|
| 33 |
+
def _extract_key_points(content: str, sections: List[Dict[str, str]]) -> str:
|
| 34 |
+
"""Extract key points from content."""
|
| 35 |
+
# Now processes bullet points from ALL sections
|
| 36 |
+
# Better bullet point cleaning and formatting
|
| 37 |
+
# Enhanced fallback with more keywords
|
| 38 |
+
# Increased limit to 15 points
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### 3. Improved Detailed Summary
|
| 42 |
+
```python
|
| 43 |
+
def _generate_detailed_summary(content: str, sections: List[Dict[str, str]]) -> str:
|
| 44 |
+
"""Generate a detailed summary with all sections."""
|
| 45 |
+
# Increased content limit to 400 characters per section
|
| 46 |
+
# Skip empty sections properly
|
| 47 |
+
# Better fallback handling
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
### 4. New Section Extraction Tool
|
| 51 |
+
Added a new MCP tool `extract_section` that allows:
|
| 52 |
+
- Case-insensitive partial matching of section titles
|
| 53 |
+
- Direct extraction of specific document sections
|
| 54 |
+
- Helpful error messages with available sections listed
|
| 55 |
+
- Support for multiple matching sections
|
| 56 |
+
|
| 57 |
+
### 5. Enhanced Error Handling
|
| 58 |
+
- Added try-catch blocks in `intelligent_summarize`
|
| 59 |
+
- Improved error messages with fallback options
|
| 60 |
+
- Better handling of edge cases in document intelligence module
|
| 61 |
+
|
| 62 |
+
## Testing Results
|
| 63 |
+
|
| 64 |
+
The fixes have been tested with various scenarios:
|
| 65 |
+
|
| 66 |
+
✅ **Anti-patterns extraction**: Now correctly extracts and lists the 3 anti-patterns from prompting-guidelines.md
|
| 67 |
+
✅ **Best practices analysis**: Properly summarizes the 4 best practices with full content
|
| 68 |
+
✅ **Section-specific queries**: Can extract specific sections like "Anti-Patterns to Avoid"
|
| 69 |
+
✅ **Complex analysis**: Handles multi-document searches and analysis requests
|
| 70 |
+
✅ **Error recovery**: Graceful handling when sections are empty or missing
|
| 71 |
+
|
| 72 |
+
## Key Improvements
|
| 73 |
+
|
| 74 |
+
1. **Complete Content**: No more truncated summaries - users get full information
|
| 75 |
+
2. **Better Structure**: Proper section detection and processing
|
| 76 |
+
3. **Flexible Extraction**: New tool for extracting specific sections by name
|
| 77 |
+
4. **Robust Error Handling**: Fallback mechanisms prevent tool failures
|
| 78 |
+
5. **Enhanced Readability**: Better formatting and organization of extracted content
|
| 79 |
+
|
| 80 |
+
## Impact
|
| 81 |
+
|
| 82 |
+
Users can now ask complex questions about documentation content and receive complete, accurate responses instead of error messages. The agent can:
|
| 83 |
+
|
| 84 |
+
- Extract specific sections by name (e.g., "What are the anti-patterns?")
|
| 85 |
+
- Provide comprehensive summaries without truncation
|
| 86 |
+
- Handle edge cases gracefully
|
| 87 |
+
- Offer helpful suggestions when content isn't found
|
| 88 |
+
|
| 89 |
+
The fixes maintain backward compatibility while significantly improving the reliability and usefulness of the documentation analysis tools.
|
guides/GETTING_STARTED.md
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Getting Started with Docs Navigator MCP
|
| 2 |
+
|
| 3 |
+
Welcome! This guide will walk you through setting up and using the Docs Navigator MCP system step by step. By the end, you'll have a working AI assistant that can answer questions about your documentation.
|
| 4 |
+
|
| 5 |
+
## 📋 Prerequisites
|
| 6 |
+
|
| 7 |
+
Before you begin, make sure you have:
|
| 8 |
+
|
| 9 |
+
- **Python 3.10+** installed on your system
|
| 10 |
+
- **An Anthropic API key** (sign up at [console.anthropic.com](https://console.anthropic.com))
|
| 11 |
+
- **Command line access** (Terminal on macOS/Linux, Command Prompt on Windows)
|
| 12 |
+
- **UV package manager** (recommended) or pip
|
| 13 |
+
|
| 14 |
+
### Installing UV (Recommended)
|
| 15 |
+
|
| 16 |
+
UV provides faster dependency management than pip:
|
| 17 |
+
|
| 18 |
+
**macOS/Linux:**
|
| 19 |
+
```bash
|
| 20 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
**Windows:**
|
| 24 |
+
```powershell
|
| 25 |
+
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## 🛠️ Step-by-Step Setup
|
| 29 |
+
|
| 30 |
+
### Step 1: Get the Code
|
| 31 |
+
|
| 32 |
+
Clone or download this repository:
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
git clone <your-repo-url>
|
| 36 |
+
cd docs-navigator
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
### Step 2: Set Up Python Environment
|
| 40 |
+
|
| 41 |
+
**Using UV (recommended):**
|
| 42 |
+
```bash
|
| 43 |
+
uv sync
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
**Using pip:**
|
| 47 |
+
```bash
|
| 48 |
+
# Create virtual environment
|
| 49 |
+
python -m venv .venv
|
| 50 |
+
|
| 51 |
+
# Activate it
|
| 52 |
+
# On Windows:
|
| 53 |
+
.venv\Scripts\activate
|
| 54 |
+
# On macOS/Linux:
|
| 55 |
+
source .venv/bin/activate
|
| 56 |
+
|
| 57 |
+
# Install dependencies
|
| 58 |
+
pip install -r requirements.txt
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### Step 3: Configure Your API Key
|
| 62 |
+
|
| 63 |
+
Create a `.env` file in the project root:
|
| 64 |
+
|
| 65 |
+
```bash
|
| 66 |
+
# Create the file
|
| 67 |
+
touch .env # On macOS/Linux
|
| 68 |
+
# Or just create the file manually on Windows
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
Add your Anthropic API key to the `.env` file:
|
| 72 |
+
|
| 73 |
+
```
|
| 74 |
+
ANTHROPIC_API_KEY= your-actual-api-key-here
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
**Important**: Never commit your `.env` file to version control!
|
| 78 |
+
|
| 79 |
+
### Step 4: Verify Your API Key
|
| 80 |
+
|
| 81 |
+
Test that your API key works:
|
| 82 |
+
|
| 83 |
+
```bash
|
| 84 |
+
# Using UV
|
| 85 |
+
uv run test_anthropic.py
|
| 86 |
+
|
| 87 |
+
# Using Python directly
|
| 88 |
+
python test_anthropic.py
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
You should see output like:
|
| 92 |
+
```
|
| 93 |
+
Testing model: claude-3-haiku-20240307
|
| 94 |
+
✅ claude-3-haiku-20240307: API working
|
| 95 |
+
Done testing models.
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
### Step 5: Add Your Documentation
|
| 99 |
+
|
| 100 |
+
Place your documentation files in the `docs/` folder. The system supports:
|
| 101 |
+
|
| 102 |
+
- **Markdown files** (`.md`)
|
| 103 |
+
- **Text files** (`.txt`)
|
| 104 |
+
- **reStructuredText files** (`.rst`)
|
| 105 |
+
|
| 106 |
+
Example structure:
|
| 107 |
+
```
|
| 108 |
+
docs/
|
| 109 |
+
├── getting-started.md
|
| 110 |
+
├── api-reference.md
|
| 111 |
+
├── troubleshooting.txt
|
| 112 |
+
├── faq.md
|
| 113 |
+
└── installation.rst
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
**Sample content to try**: The project already includes sample docs you can test with:
|
| 117 |
+
- `overview.md`
|
| 118 |
+
- `setup.md`
|
| 119 |
+
- `troubleshooting.md`
|
| 120 |
+
- `prompting-guidelines.md`
|
| 121 |
+
- `auroraai_report.txt`
|
| 122 |
+
|
| 123 |
+
### Step 6: Test the MCP Server
|
| 124 |
+
|
| 125 |
+
Verify that the MCP server can read your docs:
|
| 126 |
+
|
| 127 |
+
```bash
|
| 128 |
+
# Using UV
|
| 129 |
+
uv run test_mcp.py
|
| 130 |
+
|
| 131 |
+
# Using Python directly
|
| 132 |
+
python test_mcp.py
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
Expected output:
|
| 136 |
+
```
|
| 137 |
+
Connecting to MCP server...
|
| 138 |
+
Listing available docs...
|
| 139 |
+
Available tools: ['list_docs', 'search_docs']
|
| 140 |
+
Available docs: [overview.md, setup.md, ...]
|
| 141 |
+
Search results for 'setup': [{"path": "setup.md", "snippet": "..."}]
|
| 142 |
+
✅ MCP connection and tools working correctly!
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
### Step 7: Test End-to-End Functionality
|
| 146 |
+
|
| 147 |
+
Run a complete test:
|
| 148 |
+
|
| 149 |
+
```bash
|
| 150 |
+
# Using UV
|
| 151 |
+
uv run test_complete.py
|
| 152 |
+
|
| 153 |
+
# Using Python directly
|
| 154 |
+
python test_complete.py
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
This will ask the AI a question about your docs and show you the response.
|
| 158 |
+
|
| 159 |
+
### Step 8: Launch the Web Interface
|
| 160 |
+
|
| 161 |
+
Start the Gradio app:
|
| 162 |
+
|
| 163 |
+
```bash
|
| 164 |
+
# Using UV
|
| 165 |
+
uv run app_gradio.py
|
| 166 |
+
|
| 167 |
+
# Using Python directly
|
| 168 |
+
python app_gradio.py
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
You'll see:
|
| 172 |
+
```
|
| 173 |
+
* Running on local URL: http://127.0.0.1:7860
|
| 174 |
+
* To create a public link, set `share=True` in `launch()`.
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
Open http://127.0.0.1:7860 in your browser.
|
| 178 |
+
|
| 179 |
+
## 💬 Using the Chat Interface
|
| 180 |
+
|
| 181 |
+
Once the web interface opens, you can:
|
| 182 |
+
|
| 183 |
+
### Example Questions to Try:
|
| 184 |
+
|
| 185 |
+
1. **Discovery**: "What documentation do you have available?"
|
| 186 |
+
|
| 187 |
+
2. **Specific lookup**: "How do I set up authentication?"
|
| 188 |
+
|
| 189 |
+
3. **Troubleshooting**: "What should I do if I get connection errors?"
|
| 190 |
+
|
| 191 |
+
4. **Summarization**: "Give me an overview of the main features"
|
| 192 |
+
|
| 193 |
+
5. **Search**: "Find information about API endpoints"
|
| 194 |
+
|
| 195 |
+
### How It Works:
|
| 196 |
+
|
| 197 |
+
1. **You ask a question** in the chat interface
|
| 198 |
+
2. **The AI agent** receives your question
|
| 199 |
+
3. **MCP tools search** through your documentation files
|
| 200 |
+
4. **Claude AI analyzes** the search results
|
| 201 |
+
5. **You get an answer** with references to source files
|
| 202 |
+
|
| 203 |
+
## 🔍 Understanding the Components
|
| 204 |
+
|
| 205 |
+
### Files You'll Work With:
|
| 206 |
+
|
| 207 |
+
- **`app_gradio.py`**: The web interface (you probably won't need to modify this)
|
| 208 |
+
- **`client_agent.py`**: Connects to Claude AI and MCP server
|
| 209 |
+
- **`server_docs.py`**: Provides document search tools to the AI
|
| 210 |
+
- **`docs/`**: Your documentation files go here
|
| 211 |
+
- **`.env`**: Your API key and other secrets
|
| 212 |
+
|
| 213 |
+
### What Happens When You Ask a Question:
|
| 214 |
+
|
| 215 |
+
```
|
| 216 |
+
Your Question → Gradio → Client Agent → Claude AI
|
| 217 |
+
↓
|
| 218 |
+
"I need to search docs"
|
| 219 |
+
↓
|
| 220 |
+
MCP Server → docs/ folder
|
| 221 |
+
↓
|
| 222 |
+
Search Results
|
| 223 |
+
↓
|
| 224 |
+
Claude AI (generates answer)
|
| 225 |
+
↓
|
| 226 |
+
Gradio → Your Answer
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
## ⚙️ Customization Options
|
| 230 |
+
|
| 231 |
+
### Change the AI Model
|
| 232 |
+
|
| 233 |
+
Edit `client_agent.py` and modify the model name:
|
| 234 |
+
|
| 235 |
+
```python
|
| 236 |
+
model="claude-3-haiku-20240307" # Current model
|
| 237 |
+
model="claude-3-5-sonnet-20241022" # Higher quality (requires API access)
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
### Change the Port
|
| 241 |
+
|
| 242 |
+
Edit `app_gradio.py`:
|
| 243 |
+
|
| 244 |
+
```python
|
| 245 |
+
demo.launch() # Default port 7860
|
| 246 |
+
demo.launch(server_port=8080) # Custom port
|
| 247 |
+
```
|
| 248 |
+
|
| 249 |
+
### Add More File Types
|
| 250 |
+
|
| 251 |
+
Edit `server_docs.py`:
|
| 252 |
+
|
| 253 |
+
```python
|
| 254 |
+
exts = {".md", ".txt", ".rst"} # Current formats
|
| 255 |
+
exts = {".md", ".txt", ".rst", ".pdf", ".docx"} # Add more (requires additional code)
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
## 🐛 Troubleshooting Common Issues
|
| 259 |
+
|
| 260 |
+
### "Model not found" Error
|
| 261 |
+
|
| 262 |
+
**Problem**: Your API key doesn't have access to the specified Claude model.
|
| 263 |
+
|
| 264 |
+
**Solution**: The system will automatically test and find a working model. If this fails, check that your API key is valid.
|
| 265 |
+
|
| 266 |
+
### "No such file or directory" Error
|
| 267 |
+
|
| 268 |
+
**Problem**: Python path or virtual environment issues.
|
| 269 |
+
|
| 270 |
+
**Solution**:
|
| 271 |
+
```bash
|
| 272 |
+
# Make sure you're in the right directory
|
| 273 |
+
pwd # Should show /path/to/docs-navigator
|
| 274 |
+
|
| 275 |
+
# Make sure virtual environment is activated
|
| 276 |
+
which python # Should show .venv path
|
| 277 |
+
```
|
| 278 |
+
|
| 279 |
+
### No Documents Found
|
| 280 |
+
|
| 281 |
+
**Problem**: The system can't find your documentation files.
|
| 282 |
+
|
| 283 |
+
**Solution**:
|
| 284 |
+
- Check that files are in the `docs/` folder
|
| 285 |
+
- Verify file extensions (`.md`, `.txt`, `.rst`)
|
| 286 |
+
- Check file permissions
|
| 287 |
+
|
| 288 |
+
### Port Already in Use
|
| 289 |
+
|
| 290 |
+
**Problem**: Port 7860 is already taken.
|
| 291 |
+
|
| 292 |
+
**Solution**:
|
| 293 |
+
- Stop other applications using the port
|
| 294 |
+
- Or change the port in `app_gradio.py`
|
| 295 |
+
|
| 296 |
+
### Connection Refused
|
| 297 |
+
|
| 298 |
+
**Problem**: MCP server can't start.
|
| 299 |
+
|
| 300 |
+
**Solution**:
|
| 301 |
+
- Check that `server_docs.py` is executable
|
| 302 |
+
- Verify all dependencies are installed
|
| 303 |
+
- Check for Python syntax errors
|
| 304 |
+
|
| 305 |
+
## 📈 Next Steps
|
| 306 |
+
|
| 307 |
+
Once you have the basic system working:
|
| 308 |
+
|
| 309 |
+
1. **Add more documentation**: Populate the `docs/` folder with your content
|
| 310 |
+
|
| 311 |
+
2. **Customize prompts**: Modify the system prompts in `client_agent.py` to better suit your use case
|
| 312 |
+
|
| 313 |
+
3. **Improve search**: Enhance the search functionality in `server_docs.py`
|
| 314 |
+
|
| 315 |
+
4. **Add more tools**: Create additional MCP tools for specific documentation tasks
|
| 316 |
+
|
| 317 |
+
5. **Deploy**: Set up the system on a server for team access
|
| 318 |
+
|
| 319 |
+
## 💡 Tips for Better Results
|
| 320 |
+
|
| 321 |
+
### Organizing Your Docs:
|
| 322 |
+
|
| 323 |
+
- Use clear, descriptive filenames
|
| 324 |
+
- Include section headings in markdown
|
| 325 |
+
- Keep related information in the same file
|
| 326 |
+
- Use consistent terminology
|
| 327 |
+
|
| 328 |
+
### Writing Good Questions:
|
| 329 |
+
|
| 330 |
+
- Be specific about what you need
|
| 331 |
+
- Reference topics from your documentation
|
| 332 |
+
- Ask for examples when appropriate
|
| 333 |
+
- Request sources for verification
|
| 334 |
+
|
| 335 |
+
### Optimizing Performance:
|
| 336 |
+
|
| 337 |
+
- Keep individual doc files reasonably sized
|
| 338 |
+
- Use markdown headers for better structure
|
| 339 |
+
- Remove irrelevant or outdated content
|
| 340 |
+
- Test questions regularly to improve prompts
|
| 341 |
+
|
| 342 |
+
## 🆘 Getting Help
|
| 343 |
+
|
| 344 |
+
If you run into issues:
|
| 345 |
+
|
| 346 |
+
1. **Check the test scripts**: Run `test_mcp.py` and `test_anthropic.py`
|
| 347 |
+
2. **Review the logs**: Look for error messages in the terminal
|
| 348 |
+
3. **Verify your setup**: Double-check API keys and file paths
|
| 349 |
+
4. **Start fresh**: Create a new virtual environment if needed
|
| 350 |
+
|
| 351 |
+
## 🎉 Success!
|
| 352 |
+
|
| 353 |
+
You should now have a working documentation assistant! The AI can search through your docs and provide intelligent answers to your questions.
|
| 354 |
+
|
| 355 |
+
Try asking: "What can you help me with?" to get started!
|
guides/INTELLIGENT_TOOLS_GUIDE.md
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Intelligent Documentation Tools Guide
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
Your docs-navigator project has been enhanced with powerful intelligent tools for document analysis, summarization, and knowledge extraction. These tools use advanced text processing algorithms to provide insights that go far beyond simple search and retrieval.
|
| 6 |
+
|
| 7 |
+
## 🧠 New Intelligent Tools
|
| 8 |
+
|
| 9 |
+
### 1. **Intelligent Summarization** (`intelligent_summarize`)
|
| 10 |
+
Creates context-aware summaries using sentence scoring and key concept analysis.
|
| 11 |
+
|
| 12 |
+
**Features:**
|
| 13 |
+
- Multiple summary lengths (short, medium, long)
|
| 14 |
+
- Key concept extraction
|
| 15 |
+
- Readability analysis
|
| 16 |
+
- Focus keyword highlighting
|
| 17 |
+
|
| 18 |
+
**Example Usage:**
|
| 19 |
+
```python
|
| 20 |
+
# Through the chat interface:
|
| 21 |
+
"Create a medium-length summary of the setup guide with focus on configuration"
|
| 22 |
+
|
| 23 |
+
# Direct tool call:
|
| 24 |
+
{
|
| 25 |
+
"relative_path": "setup.md",
|
| 26 |
+
"summary_type": "medium",
|
| 27 |
+
"focus_keywords": "configuration, installation"
|
| 28 |
+
}
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### 2. **Document Structure Analysis** (`analyze_document_structure`)
|
| 32 |
+
Provides comprehensive structural analysis of documents.
|
| 33 |
+
|
| 34 |
+
**Features:**
|
| 35 |
+
- Header hierarchy and outline generation
|
| 36 |
+
- Content statistics (words, lines, sections)
|
| 37 |
+
- Code block and link detection
|
| 38 |
+
- Table and image identification
|
| 39 |
+
|
| 40 |
+
### 3. **Q&A Extraction** (`extract_qa_pairs`)
|
| 41 |
+
Automatically extracts question-answer pairs for FAQ generation.
|
| 42 |
+
|
| 43 |
+
**Features:**
|
| 44 |
+
- Pattern-based question detection
|
| 45 |
+
- Context-aware answer extraction
|
| 46 |
+
- Support for multiple Q&A formats
|
| 47 |
+
- Bulk extraction from all documents
|
| 48 |
+
|
| 49 |
+
### 4. **Semantic Document Search** (`semantic_search`)
|
| 50 |
+
Advanced search using relevance scoring and context analysis.
|
| 51 |
+
|
| 52 |
+
**Features:**
|
| 53 |
+
- Keyword relevance scoring
|
| 54 |
+
- Context snippet extraction
|
| 55 |
+
- Document ranking by similarity
|
| 56 |
+
- Word frequency analysis
|
| 57 |
+
|
| 58 |
+
### 5. **Document Comparison** (`compare_documents`)
|
| 59 |
+
Side-by-side analysis of document similarities and differences.
|
| 60 |
+
|
| 61 |
+
**Features:**
|
| 62 |
+
- Statistical comparison
|
| 63 |
+
- Word overlap analysis
|
| 64 |
+
- Structure comparison
|
| 65 |
+
- Unique content identification
|
| 66 |
+
|
| 67 |
+
### 6. **Definition Extraction** (`extract_definitions`)
|
| 68 |
+
Identifies and extracts definitions, terms, and explanations.
|
| 69 |
+
|
| 70 |
+
**Features:**
|
| 71 |
+
- Multiple definition pattern recognition
|
| 72 |
+
- Glossary section detection
|
| 73 |
+
- Technical term identification
|
| 74 |
+
- Definition density analysis
|
| 75 |
+
|
| 76 |
+
### 7. **Table of Contents Generation** (`generate_table_of_contents`)
|
| 77 |
+
Creates hierarchical TOCs for documents or entire documentation sets.
|
| 78 |
+
|
| 79 |
+
**Features:**
|
| 80 |
+
- Header-based outline generation
|
| 81 |
+
- Multi-level hierarchy support
|
| 82 |
+
- Cross-document TOC creation
|
| 83 |
+
- Depth analysis
|
| 84 |
+
|
| 85 |
+
### 8. **Related Document Discovery** (`find_related_documents`)
|
| 86 |
+
Finds documents related to queries using TF-IDF-like scoring.
|
| 87 |
+
|
| 88 |
+
**Features:**
|
| 89 |
+
- Advanced similarity algorithms
|
| 90 |
+
- Relevance scoring
|
| 91 |
+
- Context snippet extraction
|
| 92 |
+
- Cross-reference suggestions
|
| 93 |
+
|
| 94 |
+
### 9. **Documentation Gap Analysis** (`analyze_document_gaps`)
|
| 95 |
+
Identifies missing content and improvement opportunities.
|
| 96 |
+
|
| 97 |
+
**Features:**
|
| 98 |
+
- Content completeness analysis
|
| 99 |
+
- Section coverage assessment
|
| 100 |
+
- Readability evaluation
|
| 101 |
+
- Improvement recommendations
|
| 102 |
+
|
| 103 |
+
### 10. **Documentation Index Generation** (`generate_documentation_index`)
|
| 104 |
+
Creates comprehensive searchable indexes of all content.
|
| 105 |
+
|
| 106 |
+
**Features:**
|
| 107 |
+
- Concept clustering
|
| 108 |
+
- Cross-reference mapping
|
| 109 |
+
- Topic categorization
|
| 110 |
+
- Metadata extraction
|
| 111 |
+
|
| 112 |
+
## 🚀 Advanced Features
|
| 113 |
+
|
| 114 |
+
### Document Intelligence Engine
|
| 115 |
+
The system includes a sophisticated `DocumentIntelligence` class that provides:
|
| 116 |
+
|
| 117 |
+
- **Key Concept Extraction**: Identifies important terms and phrases
|
| 118 |
+
- **Smart Summarization**: Uses sentence scoring for optimal summaries
|
| 119 |
+
- **Readability Analysis**: Flesch reading ease scoring
|
| 120 |
+
- **Question Detection**: Automatic Q&A pair extraction
|
| 121 |
+
- **Content Similarity**: TF-IDF-based document comparison
|
| 122 |
+
|
| 123 |
+
### Natural Language Processing
|
| 124 |
+
Advanced text processing capabilities:
|
| 125 |
+
|
| 126 |
+
- **Sentence Scoring**: Multi-factor sentence importance evaluation
|
| 127 |
+
- **Phrase Extraction**: N-gram analysis for key phrases
|
| 128 |
+
- **Syllable Counting**: For readability metrics
|
| 129 |
+
- **Pattern Recognition**: Multiple definition and question patterns
|
| 130 |
+
|
| 131 |
+
## 💡 Practical Use Cases
|
| 132 |
+
|
| 133 |
+
### 1. **Documentation Quality Assurance**
|
| 134 |
+
```
|
| 135 |
+
"Analyze the quality and completeness of our documentation"
|
| 136 |
+
"Which documents need improvement in readability?"
|
| 137 |
+
"What sections are missing from our docs?"
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
### 2. **Content Discovery and Organization**
|
| 141 |
+
```
|
| 142 |
+
"Find all documents related to configuration and setup"
|
| 143 |
+
"Generate a comprehensive table of contents"
|
| 144 |
+
"Create an index of all concepts in the documentation"
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
### 3. **Automated FAQ Generation**
|
| 148 |
+
```
|
| 149 |
+
"Extract all questions and answers to create an FAQ"
|
| 150 |
+
"What common questions are addressed in the docs?"
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
### 4. **Content Summarization**
|
| 154 |
+
```
|
| 155 |
+
"Create executive summaries of all technical documents"
|
| 156 |
+
"Summarize the troubleshooting guide focusing on error handling"
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
### 5. **Documentation Maintenance**
|
| 160 |
+
```
|
| 161 |
+
"Compare the old and new versions of the setup guide"
|
| 162 |
+
"Identify duplicate content across documents"
|
| 163 |
+
"Find outdated or redundant information"
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
## 🔧 Integration Examples
|
| 167 |
+
|
| 168 |
+
### Gradio Chat Interface
|
| 169 |
+
The tools integrate seamlessly with your existing chat interface:
|
| 170 |
+
|
| 171 |
+
```python
|
| 172 |
+
# Users can ask natural language questions like:
|
| 173 |
+
"What are the main topics covered in our documentation?"
|
| 174 |
+
"Create a summary of all configuration-related content"
|
| 175 |
+
"Find documents that need better organization"
|
| 176 |
+
```
|
| 177 |
+
|
| 178 |
+
### Direct API Usage
|
| 179 |
+
For programmatic access:
|
| 180 |
+
|
| 181 |
+
```python
|
| 182 |
+
from client_agent import DocsNavigatorClient
|
| 183 |
+
|
| 184 |
+
client = DocsNavigatorClient()
|
| 185 |
+
await client.connect()
|
| 186 |
+
|
| 187 |
+
# Intelligent analysis
|
| 188 |
+
result = await client.session.call_tool("analyze_document_gaps", {})
|
| 189 |
+
summary = await client.session.call_tool("intelligent_summarize", {
|
| 190 |
+
"relative_path": "overview.md",
|
| 191 |
+
"summary_type": "short"
|
| 192 |
+
})
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
## 📈 Performance Benefits
|
| 196 |
+
|
| 197 |
+
1. **Faster Content Discovery**: Semantic search finds relevant content quickly
|
| 198 |
+
2. **Automated Insights**: Gap analysis identifies improvement areas automatically
|
| 199 |
+
3. **Consistent Quality**: Readability analysis ensures content standards
|
| 200 |
+
4. **User Experience**: Better organization and navigation
|
| 201 |
+
5. **Maintenance Efficiency**: Automated detection of issues and duplicates
|
| 202 |
+
|
| 203 |
+
## 🔮 Future Enhancement Ideas
|
| 204 |
+
|
| 205 |
+
### Content Generation
|
| 206 |
+
- **Auto-completion**: Suggest missing sections based on document type
|
| 207 |
+
- **Template Generation**: Create document templates from existing patterns
|
| 208 |
+
- **Content Recommendations**: Suggest related content to add
|
| 209 |
+
|
| 210 |
+
### Advanced Analytics
|
| 211 |
+
- **User Journey Mapping**: Track how users navigate documentation
|
| 212 |
+
- **Content Performance**: Identify most/least accessed content
|
| 213 |
+
- **Sentiment Analysis**: Analyze tone and user-friendliness
|
| 214 |
+
|
| 215 |
+
### Integration Opportunities
|
| 216 |
+
- **Version Control Integration**: Track documentation changes and improvements
|
| 217 |
+
- **CI/CD Integration**: Automated quality checks in deployment pipeline
|
| 218 |
+
- **Knowledge Base Sync**: Integration with external knowledge systems
|
| 219 |
+
|
| 220 |
+
## 🛠️ Customization Options
|
| 221 |
+
|
| 222 |
+
The system is designed to be easily extensible:
|
| 223 |
+
|
| 224 |
+
1. **Custom Patterns**: Add domain-specific definition patterns
|
| 225 |
+
2. **Scoring Algorithms**: Modify relevance scoring for your content type
|
| 226 |
+
3. **Analysis Metrics**: Add custom quality metrics
|
| 227 |
+
4. **Content Types**: Extend support for additional file formats
|
| 228 |
+
|
| 229 |
+
## 📚 Getting Started
|
| 230 |
+
|
| 231 |
+
1. **Test the tools**: Run `demo_intelligent_features.py` to see examples
|
| 232 |
+
2. **Try the chat interface**: Ask natural language questions about your docs
|
| 233 |
+
3. **Explore specific tools**: Use `test_intelligent_tools.py` for detailed testing
|
| 234 |
+
4. **Customize for your needs**: Modify patterns and scoring in `document_intelligence.py`
|
| 235 |
+
|
| 236 |
+
Your documentation system is now equipped with AI-powered intelligence that can understand, analyze, and improve your content automatically!
|
guides/UI_ENHANCEMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🎨 Gradio UI/UX Enhancement Guide
|
| 2 |
+
|
| 3 |
+
This guide covers the various options available to improve the UI/UX of your Docs Navigator Gradio interface.
|
| 4 |
+
|
| 5 |
+
## 🚀 Quick Start
|
| 6 |
+
|
| 7 |
+
### Option 1: Use the Enhanced Default
|
| 8 |
+
```bash
|
| 9 |
+
python app_gradio.py
|
| 10 |
+
```
|
| 11 |
+
The main `app_gradio.py` has been upgraded with professional styling, better error handling, and modern design.
|
| 12 |
+
|
| 13 |
+
### Option 2: Try Different Styles
|
| 14 |
+
```bash
|
| 15 |
+
python launch_ui.py enhanced # Modern professional (default)
|
| 16 |
+
python launch_ui.py minimal # Clean, simple
|
| 17 |
+
python launch_ui.py corporate # Business/enterprise
|
| 18 |
+
python launch_ui.py dark # Dark mode
|
| 19 |
+
python launch_ui.py glassmorphism # Modern glass effect
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### Option 3: Explore All Options
|
| 23 |
+
```bash
|
| 24 |
+
python gradio_ui_showcase.py modern
|
| 25 |
+
python gradio_ui_showcase.py dark
|
| 26 |
+
python gradio_ui_showcase.py minimal
|
| 27 |
+
python gradio_ui_showcase.py corporate
|
| 28 |
+
python gradio_ui_showcase.py glassmorphism
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
## 🎭 Available UI Styles
|
| 32 |
+
|
| 33 |
+
### 1. **Enhanced Professional** (Default)
|
| 34 |
+
- **File**: `app_gradio.py`
|
| 35 |
+
- **Features**: Modern design, custom CSS, professional theme, avatars, examples
|
| 36 |
+
- **Best for**: General use, professional presentations
|
| 37 |
+
|
| 38 |
+
### 2. **Modern Animated**
|
| 39 |
+
- **File**: `gradio_ui_showcase.py modern`
|
| 40 |
+
- **Features**: Gradient backgrounds, animations, glassmorphism effects
|
| 41 |
+
- **Best for**: Impressive demos, modern aesthetics
|
| 42 |
+
|
| 43 |
+
### 3. **Dark Mode Professional**
|
| 44 |
+
- **File**: `gradio_ui_showcase.py dark` or `launch_ui.py dark`
|
| 45 |
+
- **Features**: Dark theme, reduced eye strain, professional appearance
|
| 46 |
+
- **Best for**: Long usage sessions, developer-focused environments
|
| 47 |
+
|
| 48 |
+
### 4. **Minimal Clean**
|
| 49 |
+
- **File**: `launch_ui.py minimal`
|
| 50 |
+
- **Features**: Distraction-free, simple, fast loading
|
| 51 |
+
- **Best for**: Focus on content, minimal resource usage
|
| 52 |
+
|
| 53 |
+
### 5. **Corporate Enterprise**
|
| 54 |
+
- **File**: `launch_ui.py corporate`
|
| 55 |
+
- **Features**: Business-appropriate styling, professional colors
|
| 56 |
+
- **Best for**: Enterprise environments, formal presentations
|
| 57 |
+
|
| 58 |
+
### 6. **Glassmorphism Modern**
|
| 59 |
+
- **File**: `gradio_ui_showcase.py glassmorphism`
|
| 60 |
+
- **Features**: Glass effects, modern transparency, cutting-edge design
|
| 61 |
+
- **Best for**: Showcasing modern design trends
|
| 62 |
+
|
| 63 |
+
## 🛠️ Customization Options
|
| 64 |
+
|
| 65 |
+
### Theme Customization
|
| 66 |
+
You can easily customize themes in `ui_config.py`:
|
| 67 |
+
|
| 68 |
+
```python
|
| 69 |
+
from ui_config import create_theme, create_custom_css
|
| 70 |
+
|
| 71 |
+
# Create custom theme
|
| 72 |
+
my_theme = create_theme("professional", "purple_theme")
|
| 73 |
+
|
| 74 |
+
# Create custom CSS
|
| 75 |
+
my_css = create_custom_css(
|
| 76 |
+
components=["container", "modern_input", "animated_buttons"],
|
| 77 |
+
color_scheme="green_theme"
|
| 78 |
+
)
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
### Available Color Schemes
|
| 82 |
+
- `blue_gradient`: Blue to purple gradient (default)
|
| 83 |
+
- `corporate_blue`: Professional blue tones
|
| 84 |
+
- `purple_theme`: Purple-focused palette
|
| 85 |
+
- `green_theme`: Green nature-inspired colors
|
| 86 |
+
|
| 87 |
+
### CSS Components
|
| 88 |
+
You can mix and match these CSS components:
|
| 89 |
+
- `container`: Basic container styling
|
| 90 |
+
- `glass_effect`: Glassmorphism background effects
|
| 91 |
+
- `modern_input`: Enhanced input field styling
|
| 92 |
+
- `animated_buttons`: Button hover animations
|
| 93 |
+
- `chat_bubbles`: Enhanced chat message styling
|
| 94 |
+
|
| 95 |
+
## 🎯 Advanced Customization
|
| 96 |
+
|
| 97 |
+
### 1. Custom Avatars
|
| 98 |
+
Replace the avatar URLs in any interface:
|
| 99 |
+
```python
|
| 100 |
+
avatar_images=(
|
| 101 |
+
"path/to/user-avatar.png", # User avatar
|
| 102 |
+
"path/to/bot-avatar.png" # Bot avatar
|
| 103 |
+
)
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
### 2. Custom Fonts
|
| 107 |
+
Add Google Fonts or system fonts:
|
| 108 |
+
```python
|
| 109 |
+
theme = gr.themes.Soft(
|
| 110 |
+
font=[
|
| 111 |
+
gr.themes.GoogleFont("Your-Font-Name"),
|
| 112 |
+
"fallback-font",
|
| 113 |
+
"sans-serif"
|
| 114 |
+
]
|
| 115 |
+
)
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
### 3. Custom CSS
|
| 119 |
+
Add your own CSS for complete control:
|
| 120 |
+
```python
|
| 121 |
+
custom_css = """
|
| 122 |
+
.your-custom-class {
|
| 123 |
+
/* Your styles here */
|
| 124 |
+
}
|
| 125 |
+
"""
|
| 126 |
+
|
| 127 |
+
demo = gr.ChatInterface(
|
| 128 |
+
css=custom_css,
|
| 129 |
+
# ... other options
|
| 130 |
+
)
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### 4. Layout Modifications
|
| 134 |
+
Customize the chat interface components:
|
| 135 |
+
```python
|
| 136 |
+
gr.ChatInterface(
|
| 137 |
+
chatbot=gr.Chatbot(
|
| 138 |
+
height=600, # Adjust height
|
| 139 |
+
show_label=False, # Hide labels
|
| 140 |
+
bubble_full_width=False, # Bubble styling
|
| 141 |
+
show_share_button=False # Hide share button
|
| 142 |
+
),
|
| 143 |
+
textbox=gr.Textbox(
|
| 144 |
+
placeholder="Custom placeholder...",
|
| 145 |
+
container=False, # Remove container
|
| 146 |
+
scale=7 # Adjust width ratio
|
| 147 |
+
),
|
| 148 |
+
submit_btn=gr.Button("Send 🚀", variant="primary"),
|
| 149 |
+
examples=["Custom", "Examples", "Here"]
|
| 150 |
+
)
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
## 🎨 Design Best Practices
|
| 154 |
+
|
| 155 |
+
### 1. **Color Psychology**
|
| 156 |
+
- **Blue**: Trust, professionalism, reliability
|
| 157 |
+
- **Purple**: Creativity, innovation, luxury
|
| 158 |
+
- **Green**: Growth, harmony, freshness
|
| 159 |
+
- **Dark themes**: Reduced eye strain, modern feel
|
| 160 |
+
|
| 161 |
+
### 2. **Typography**
|
| 162 |
+
- Use system fonts for fast loading: `system-ui`, `sans-serif`
|
| 163 |
+
- Google Fonts for custom branding: `Inter`, `Poppins`, `Roboto`
|
| 164 |
+
- Maintain good contrast ratios for accessibility
|
| 165 |
+
|
| 166 |
+
### 3. **Layout**
|
| 167 |
+
- Keep max-width around 1000-1200px for readability
|
| 168 |
+
- Use consistent spacing and border-radius
|
| 169 |
+
- Ensure responsive design for mobile devices
|
| 170 |
+
|
| 171 |
+
### 4. **Animations**
|
| 172 |
+
- Use subtle transitions (0.2-0.3s)
|
| 173 |
+
- Avoid excessive animations that distract
|
| 174 |
+
- Add hover effects for interactive feedback
|
| 175 |
+
|
| 176 |
+
## 🔧 Performance Considerations
|
| 177 |
+
|
| 178 |
+
### Fast Loading Options
|
| 179 |
+
- **Minimal theme**: Fastest loading, least CSS
|
| 180 |
+
- **System fonts**: No external font loading
|
| 181 |
+
- **Reduced animations**: Better performance on slower devices
|
| 182 |
+
|
| 183 |
+
### Rich Experience Options
|
| 184 |
+
- **Custom CSS**: More personalization, slightly slower
|
| 185 |
+
- **Google Fonts**: Better typography, requires internet
|
| 186 |
+
- **Complex animations**: Better UX, more CPU usage
|
| 187 |
+
|
| 188 |
+
## 📱 Mobile Responsiveness
|
| 189 |
+
|
| 190 |
+
All themes include mobile-responsive design with:
|
| 191 |
+
- Adjusted padding and margins for small screens
|
| 192 |
+
- Scalable text and components
|
| 193 |
+
- Touch-friendly button sizes
|
| 194 |
+
- Optimized chat bubble sizing
|
| 195 |
+
|
| 196 |
+
## 🚀 Deployment Tips
|
| 197 |
+
|
| 198 |
+
### For Production
|
| 199 |
+
1. Use the **Corporate** or **Enhanced** themes for professional environments
|
| 200 |
+
2. Test on different screen sizes and devices
|
| 201 |
+
3. Consider loading times for your users
|
| 202 |
+
4. Enable error handling and user feedback
|
| 203 |
+
|
| 204 |
+
### For Demos
|
| 205 |
+
1. Use **Modern** or **Glassmorphism** for visual impact
|
| 206 |
+
2. Include engaging examples and clear descriptions
|
| 207 |
+
3. Consider public sharing options with `share=True`
|
| 208 |
+
|
| 209 |
+
### For Development
|
| 210 |
+
1. Use **Dark** theme for reduced eye strain
|
| 211 |
+
2. Enable detailed error messages
|
| 212 |
+
3. Use **Minimal** theme for faster iteration
|
| 213 |
+
|
| 214 |
+
## 🔗 Quick Reference
|
| 215 |
+
|
| 216 |
+
| Style | Command | Best For |
|
| 217 |
+
|-------|---------|----------|
|
| 218 |
+
| Enhanced | `python app_gradio.py` | General use |
|
| 219 |
+
| Modern | `python gradio_ui_showcase.py modern` | Demos |
|
| 220 |
+
| Dark | `python launch_ui.py dark` | Development |
|
| 221 |
+
| Minimal | `python launch_ui.py minimal` | Focus |
|
| 222 |
+
| Corporate | `python launch_ui.py corporate` | Business |
|
| 223 |
+
| Glass | `python gradio_ui_showcase.py glassmorphism` | Showcase |
|
| 224 |
+
|
| 225 |
+
---
|
| 226 |
+
|
| 227 |
+
🎨 **Pro Tip**: Mix and match elements from different themes to create your perfect UI! Use `ui_config.py` as a starting point for custom configurations.
|
pyproject.toml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "docs-navigator-mcp"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Docs Navigator MCP — FastMCP server + agent + Gradio UI"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.10"
|
| 7 |
+
|
| 8 |
+
dependencies = [
|
| 9 |
+
"mcp[cli]>=0.1.0",
|
| 10 |
+
"anthropic>=0.36.0",
|
| 11 |
+
"python-dotenv>=1.0.1",
|
| 12 |
+
"gradio>=5.0.0",
|
| 13 |
+
"PyPDF2>=3.0.0",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
[tool.uv]
|
| 17 |
+
# optional, good defaults
|
| 18 |
+
package = true
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
mcp[cli]>=0.1.0
|
| 2 |
+
anthropic>=0.36.0
|
| 3 |
+
python-dotenv>=1.0.1
|
| 4 |
+
gradio>=5.0.0
|
| 5 |
+
PyPDF2>=3.0.0
|
server.py
ADDED
|
File without changes
|