# ChatBIA 안드로이드 스트리밍 연동 가이드 ## 📡 API 엔드포인트 ### 1. 일반 채팅 (비스트리밍) ``` POST /chat ``` - **타임아웃 위험**: 긴 응답 시 타임아웃 발생 가능 - **안드로이드에서 권장하지 않음** ### 2. 스트리밍 채팅 ✅ **권장** ``` POST /chat/stream ``` - **타임아웃 방지**: 토큰 단위로 실시간 수신 - **안드로이드에 최적화** - **SSE (Server-Sent Events)** 방식 --- ## 🔧 안드로이드 구현 (Kotlin) ### 1. build.gradle 의존성 추가 ```gradle dependencies { // OkHttp for SSE streaming implementation("com.squareup.okhttp3:okhttp:4.12.0") implementation("com.squareup.okhttp3:okhttp-sse:4.12.0") // JSON 파싱 implementation("com.google.code.gson:gson:2.10.1") // Coroutines implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3") } ``` ### 2. 데이터 모델 ```kotlin // ChatRequest.kt data class ChatRequest( val message: String, val mode: String = "bsl", // "bsl" or "general" val max_tokens: Int = 1024, val temperature: Float = 0.7f ) // StreamingResponse.kt data class StreamingResponse( val token: String = "", val done: Boolean = false, val token_count: Int = 0, val mode: String = "", val error: String? = null ) ``` ### 3. ChatBIA API 클라이언트 ```kotlin // ChatBiaApiClient.kt import com.google.gson.Gson import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.flow import okhttp3.* import okhttp3.MediaType.Companion.toMediaType import okhttp3.RequestBody.Companion.toRequestBody import okhttp3.sse.EventSource import okhttp3.sse.EventSourceListener import okhttp3.sse.EventSources import kotlin.coroutines.resume import kotlin.coroutines.resumeWithException import kotlin.coroutines.suspendCoroutine class ChatBiaApiClient(private val baseUrl: String) { private val client = OkHttpClient.Builder() .connectTimeout(30, TimeUnit.SECONDS) .readTimeout(60, TimeUnit.SECONDS) // 스트리밍은 긴 타임아웃 .writeTimeout(30, TimeUnit.SECONDS) .build() private val gson = Gson() /** * 스트리밍 채팅 (권장) * Flow를 통해 토큰 단위로 실시간 수신 */ fun chatStream(request: ChatRequest): Flow = flow { suspendCoroutine { continuation -> val url = "$baseUrl/chat/stream" // JSON 요청 body val jsonBody = gson.toJson(request) val requestBody = jsonBody.toRequestBody("application/json".toMediaType()) val httpRequest = Request.Builder() .url(url) .post(requestBody) .addHeader("Accept", "text/event-stream") .build() // SSE EventSource 생성 val eventSource = EventSources.createFactory(client) .newEventSource(httpRequest, object : EventSourceListener() { override fun onOpen(eventSource: EventSource, response: Response) { // 연결 성공 } override fun onEvent( eventSource: EventSource, id: String?, type: String?, data: String ) { try { val response = gson.fromJson(data, StreamingResponse::class.java) // Flow로 emit trySend(response) // 완료 시 연결 종료 if (response.done) { eventSource.cancel() continuation.resume(Unit) } } catch (e: Exception) { eventSource.cancel() continuation.resumeWithException(e) } } override fun onFailure( eventSource: EventSource, t: Throwable?, response: Response? ) { continuation.resumeWithException( t ?: Exception("SSE 연결 실패: ${response?.code}") ) } override fun onClosed(eventSource: EventSource) { if (!continuation.isCompleted) { continuation.resume(Unit) } } }) } } /** * 일반 채팅 (비스트리밍) * 긴 응답 시 타임아웃 위험 있음 */ suspend fun chat(request: ChatRequest): ChatResponse = suspendCoroutine { continuation -> val url = "$baseUrl/chat" val jsonBody = gson.toJson(request) val requestBody = jsonBody.toRequestBody("application/json".toMediaType()) val httpRequest = Request.Builder() .url(url) .post(requestBody) .build() client.newCall(httpRequest).enqueue(object : Callback { override fun onFailure(call: Call, e: IOException) { continuation.resumeWithException(e) } override fun onResponse(call: Call, response: Response) { if (response.isSuccessful) { val body = response.body?.string() val chatResponse = gson.fromJson(body, ChatResponse::class.java) continuation.resume(chatResponse) } else { continuation.resumeWithException( Exception("HTTP ${response.code}: ${response.message}") ) } } }) } data class ChatResponse( val response: String, val mode: String, val tokens: Int ) } ``` ### 4. ViewModel 사용 예제 ```kotlin // ChatViewModel.kt import androidx.lifecycle.ViewModel import androidx.lifecycle.viewModelScope import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.catch import kotlinx.coroutines.launch class ChatViewModel : ViewModel() { private val apiClient = ChatBiaApiClient("https://your-hf-space.hf.space") private val _chatState = MutableStateFlow("") val chatState: StateFlow = _chatState private val _isLoading = MutableStateFlow(false) val isLoading: StateFlow = _isLoading /** * 스트리밍 채팅 전송 */ fun sendStreamingMessage(message: String, mode: String = "bsl") { viewModelScope.launch { _isLoading.value = true _chatState.value = "" // 초기화 val request = ChatRequest( message = message, mode = mode, max_tokens = 1024, temperature = 0.7f ) apiClient.chatStream(request) .catch { e -> _chatState.value = "오류: ${e.message}" _isLoading.value = false } .collect { response -> if (response.error != null) { _chatState.value = "서버 오류: ${response.error}" _isLoading.value = false } else if (response.done) { // 완료 _isLoading.value = false } else { // 토큰 추가 _chatState.value += response.token } } } } } ``` ### 5. Compose UI 예제 ```kotlin // ChatScreen.kt import androidx.compose.foundation.layout.* import androidx.compose.material3.* import androidx.compose.runtime.* import androidx.compose.ui.Modifier import androidx.compose.ui.unit.dp @Composable fun ChatScreen(viewModel: ChatViewModel = viewModel()) { val chatState by viewModel.chatState.collectAsState() val isLoading by viewModel.isLoading.collectAsState() var inputText by remember { mutableStateOf("") } Column( modifier = Modifier .fillMaxSize() .padding(16.dp) ) { // 채팅 출력 Card( modifier = Modifier .fillMaxWidth() .weight(1f) ) { Text( text = chatState, modifier = Modifier.padding(16.dp) ) } Spacer(modifier = Modifier.height(16.dp)) // 입력 필드 Row( modifier = Modifier.fillMaxWidth() ) { OutlinedTextField( value = inputText, onValueChange = { inputText = it }, modifier = Modifier.weight(1f), placeholder = { Text("메시지 입력...") }, enabled = !isLoading ) Spacer(modifier = Modifier.width(8.dp)) Button( onClick = { if (inputText.isNotBlank()) { viewModel.sendStreamingMessage(inputText) inputText = "" } }, enabled = !isLoading ) { Text(if (isLoading) "전송 중..." else "전송") } } } } ``` --- ## 🧪 테스트 방법 ### 1. 로컬 서버 실행 ```bash cd ChatBIA-Server uvicorn main:app --host 0.0.0.0 --port 8000 ``` ### 2. Python 테스트 ```bash python test_streaming.py ``` ### 3. 안드로이드 앱에서 연결 ```kotlin // 로컬 테스트 (에뮬레이터) val apiClient = ChatBiaApiClient("http://10.0.2.2:8000") // 실제 디바이스 (같은 네트워크) val apiClient = ChatBiaApiClient("http://YOUR_IP:8000") // Hugging Face Spaces (배포 후) val apiClient = ChatBiaApiClient("https://your-space.hf.space") ``` --- ## 📊 응답 형식 ### 스트리밍 응답 (SSE) ``` data: {"token":"안녕","done":false,"token_count":1} data: {"token":"하세요","done":false,"token_count":2} data: {"token":"!","done":false,"token_count":3} data: {"token":"","done":true,"token_count":3,"mode":"bsl"} ``` ### 최종 응답 ```json { "token": "", "done": true, "token_count": 150, "mode": "bsl" } ``` ### 오류 응답 ```json { "error": "오류 메시지", "done": true } ``` --- ## ⚡ 성능 최적화 팁 1. **타임아웃 설정** - Connect: 30초 - Read: 60초 (스트리밍) - Write: 30초 2. **재연결 로직** ```kotlin fun retryOnFailure(maxRetries: Int = 3) { var attempts = 0 while (attempts < maxRetries) { try { chatStream(request).collect { } break } catch (e: Exception) { attempts++ delay(2000 * attempts) // 지수 백오프 } } } ``` 3. **메모리 관리** - Flow를 사용하여 메모리 효율적으로 처리 - UI 업데이트는 StateFlow로 최적화 --- ## 🚀 배포 후 사용 Hugging Face Spaces에 배포 후: ```kotlin val BASE_URL = "https://your-username-chatbia-server.hf.space" val apiClient = ChatBiaApiClient(BASE_URL) ``` **주의**: Hugging Face Spaces 무료 플랜은 CPU만 제공되므로 응답 속도가 느릴 수 있습니다. 스트리밍 방식이 더욱 중요합니다! --- ## 🔗 관련 문서 - [FastAPI Streaming](https://fastapi.tiangolo.com/advanced/custom-response/#streamingresponse) - [OkHttp SSE](https://square.github.io/okhttp/recipes/#server-sent-events) - [Kotlin Flow](https://kotlinlang.org/docs/flow.html)