chatbit-api / ANDROID_STREAMING_GUIDE.md
Seounghyup's picture
Add streaming API endpoint for Android app
9026aea

ChatBIA ์•ˆ๋“œ๋กœ์ด๋“œ ์ŠคํŠธ๋ฆฌ๋ฐ ์—ฐ๋™ ๊ฐ€์ด๋“œ

๐Ÿ“ก API ์—”๋“œํฌ์ธํŠธ

1. ์ผ๋ฐ˜ ์ฑ„ํŒ… (๋น„์ŠคํŠธ๋ฆฌ๋ฐ)

POST /chat
  • ํƒ€์ž„์•„์›ƒ ์œ„ํ—˜: ๊ธด ์‘๋‹ต ์‹œ ํƒ€์ž„์•„์›ƒ ๋ฐœ์ƒ ๊ฐ€๋Šฅ
  • ์•ˆ๋“œ๋กœ์ด๋“œ์—์„œ ๊ถŒ์žฅํ•˜์ง€ ์•Š์Œ

2. ์ŠคํŠธ๋ฆฌ๋ฐ ์ฑ„ํŒ… โœ… ๊ถŒ์žฅ

POST /chat/stream
  • ํƒ€์ž„์•„์›ƒ ๋ฐฉ์ง€: ํ† ํฐ ๋‹จ์œ„๋กœ ์‹ค์‹œ๊ฐ„ ์ˆ˜์‹ 
  • ์•ˆ๋“œ๋กœ์ด๋“œ์— ์ตœ์ ํ™”
  • SSE (Server-Sent Events) ๋ฐฉ์‹

๐Ÿ”ง ์•ˆ๋“œ๋กœ์ด๋“œ ๊ตฌํ˜„ (Kotlin)

1. build.gradle ์˜์กด์„ฑ ์ถ”๊ฐ€

dependencies {
    // OkHttp for SSE streaming
    implementation("com.squareup.okhttp3:okhttp:4.12.0")
    implementation("com.squareup.okhttp3:okhttp-sse:4.12.0")

    // JSON ํŒŒ์‹ฑ
    implementation("com.google.code.gson:gson:2.10.1")

    // Coroutines
    implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3")
}

2. ๋ฐ์ดํ„ฐ ๋ชจ๋ธ

// ChatRequest.kt
data class ChatRequest(
    val message: String,
    val mode: String = "bsl",  // "bsl" or "general"
    val max_tokens: Int = 1024,
    val temperature: Float = 0.7f
)

// StreamingResponse.kt
data class StreamingResponse(
    val token: String = "",
    val done: Boolean = false,
    val token_count: Int = 0,
    val mode: String = "",
    val error: String? = null
)

3. ChatBIA API ํด๋ผ์ด์–ธํŠธ

// ChatBiaApiClient.kt
import com.google.gson.Gson
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import okhttp3.*
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.RequestBody.Companion.toRequestBody
import okhttp3.sse.EventSource
import okhttp3.sse.EventSourceListener
import okhttp3.sse.EventSources
import kotlin.coroutines.resume
import kotlin.coroutines.resumeWithException
import kotlin.coroutines.suspendCoroutine

class ChatBiaApiClient(private val baseUrl: String) {

    private val client = OkHttpClient.Builder()
        .connectTimeout(30, TimeUnit.SECONDS)
        .readTimeout(60, TimeUnit.SECONDS)  // ์ŠคํŠธ๋ฆฌ๋ฐ์€ ๊ธด ํƒ€์ž„์•„์›ƒ
        .writeTimeout(30, TimeUnit.SECONDS)
        .build()

    private val gson = Gson()

    /**
     * ์ŠคํŠธ๋ฆฌ๋ฐ ์ฑ„ํŒ… (๊ถŒ์žฅ)
     * Flow๋ฅผ ํ†ตํ•ด ํ† ํฐ ๋‹จ์œ„๋กœ ์‹ค์‹œ๊ฐ„ ์ˆ˜์‹ 
     */
    fun chatStream(request: ChatRequest): Flow<StreamingResponse> = flow {
        suspendCoroutine<Unit> { continuation ->
            val url = "$baseUrl/chat/stream"

            // JSON ์š”์ฒญ body
            val jsonBody = gson.toJson(request)
            val requestBody = jsonBody.toRequestBody("application/json".toMediaType())

            val httpRequest = Request.Builder()
                .url(url)
                .post(requestBody)
                .addHeader("Accept", "text/event-stream")
                .build()

            // SSE EventSource ์ƒ์„ฑ
            val eventSource = EventSources.createFactory(client)
                .newEventSource(httpRequest, object : EventSourceListener() {

                    override fun onOpen(eventSource: EventSource, response: Response) {
                        // ์—ฐ๊ฒฐ ์„ฑ๊ณต
                    }

                    override fun onEvent(
                        eventSource: EventSource,
                        id: String?,
                        type: String?,
                        data: String
                    ) {
                        try {
                            val response = gson.fromJson(data, StreamingResponse::class.java)

                            // Flow๋กœ emit
                            trySend(response)

                            // ์™„๋ฃŒ ์‹œ ์—ฐ๊ฒฐ ์ข…๋ฃŒ
                            if (response.done) {
                                eventSource.cancel()
                                continuation.resume(Unit)
                            }

                        } catch (e: Exception) {
                            eventSource.cancel()
                            continuation.resumeWithException(e)
                        }
                    }

                    override fun onFailure(
                        eventSource: EventSource,
                        t: Throwable?,
                        response: Response?
                    ) {
                        continuation.resumeWithException(
                            t ?: Exception("SSE ์—ฐ๊ฒฐ ์‹คํŒจ: ${response?.code}")
                        )
                    }

                    override fun onClosed(eventSource: EventSource) {
                        if (!continuation.isCompleted) {
                            continuation.resume(Unit)
                        }
                    }
                })
        }
    }

    /**
     * ์ผ๋ฐ˜ ์ฑ„ํŒ… (๋น„์ŠคํŠธ๋ฆฌ๋ฐ)
     * ๊ธด ์‘๋‹ต ์‹œ ํƒ€์ž„์•„์›ƒ ์œ„ํ—˜ ์žˆ์Œ
     */
    suspend fun chat(request: ChatRequest): ChatResponse = suspendCoroutine { continuation ->
        val url = "$baseUrl/chat"

        val jsonBody = gson.toJson(request)
        val requestBody = jsonBody.toRequestBody("application/json".toMediaType())

        val httpRequest = Request.Builder()
            .url(url)
            .post(requestBody)
            .build()

        client.newCall(httpRequest).enqueue(object : Callback {
            override fun onFailure(call: Call, e: IOException) {
                continuation.resumeWithException(e)
            }

            override fun onResponse(call: Call, response: Response) {
                if (response.isSuccessful) {
                    val body = response.body?.string()
                    val chatResponse = gson.fromJson(body, ChatResponse::class.java)
                    continuation.resume(chatResponse)
                } else {
                    continuation.resumeWithException(
                        Exception("HTTP ${response.code}: ${response.message}")
                    )
                }
            }
        })
    }

    data class ChatResponse(
        val response: String,
        val mode: String,
        val tokens: Int
    )
}

4. ViewModel ์‚ฌ์šฉ ์˜ˆ์ œ

// ChatViewModel.kt
import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.catch
import kotlinx.coroutines.launch

class ChatViewModel : ViewModel() {

    private val apiClient = ChatBiaApiClient("https://your-hf-space.hf.space")

    private val _chatState = MutableStateFlow("")
    val chatState: StateFlow<String> = _chatState

    private val _isLoading = MutableStateFlow(false)
    val isLoading: StateFlow<Boolean> = _isLoading

    /**
     * ์ŠคํŠธ๋ฆฌ๋ฐ ์ฑ„ํŒ… ์ „์†ก
     */
    fun sendStreamingMessage(message: String, mode: String = "bsl") {
        viewModelScope.launch {
            _isLoading.value = true
            _chatState.value = ""  // ์ดˆ๊ธฐํ™”

            val request = ChatRequest(
                message = message,
                mode = mode,
                max_tokens = 1024,
                temperature = 0.7f
            )

            apiClient.chatStream(request)
                .catch { e ->
                    _chatState.value = "์˜ค๋ฅ˜: ${e.message}"
                    _isLoading.value = false
                }
                .collect { response ->
                    if (response.error != null) {
                        _chatState.value = "์„œ๋ฒ„ ์˜ค๋ฅ˜: ${response.error}"
                        _isLoading.value = false
                    } else if (response.done) {
                        // ์™„๋ฃŒ
                        _isLoading.value = false
                    } else {
                        // ํ† ํฐ ์ถ”๊ฐ€
                        _chatState.value += response.token
                    }
                }
        }
    }
}

5. Compose UI ์˜ˆ์ œ

// ChatScreen.kt
import androidx.compose.foundation.layout.*
import androidx.compose.material3.*
import androidx.compose.runtime.*
import androidx.compose.ui.Modifier
import androidx.compose.ui.unit.dp

@Composable
fun ChatScreen(viewModel: ChatViewModel = viewModel()) {

    val chatState by viewModel.chatState.collectAsState()
    val isLoading by viewModel.isLoading.collectAsState()
    var inputText by remember { mutableStateOf("") }

    Column(
        modifier = Modifier
            .fillMaxSize()
            .padding(16.dp)
    ) {
        // ์ฑ„ํŒ… ์ถœ๋ ฅ
        Card(
            modifier = Modifier
                .fillMaxWidth()
                .weight(1f)
        ) {
            Text(
                text = chatState,
                modifier = Modifier.padding(16.dp)
            )
        }

        Spacer(modifier = Modifier.height(16.dp))

        // ์ž…๋ ฅ ํ•„๋“œ
        Row(
            modifier = Modifier.fillMaxWidth()
        ) {
            OutlinedTextField(
                value = inputText,
                onValueChange = { inputText = it },
                modifier = Modifier.weight(1f),
                placeholder = { Text("๋ฉ”์‹œ์ง€ ์ž…๋ ฅ...") },
                enabled = !isLoading
            )

            Spacer(modifier = Modifier.width(8.dp))

            Button(
                onClick = {
                    if (inputText.isNotBlank()) {
                        viewModel.sendStreamingMessage(inputText)
                        inputText = ""
                    }
                },
                enabled = !isLoading
            ) {
                Text(if (isLoading) "์ „์†ก ์ค‘..." else "์ „์†ก")
            }
        }
    }
}

๐Ÿงช ํ…Œ์ŠคํŠธ ๋ฐฉ๋ฒ•

1. ๋กœ์ปฌ ์„œ๋ฒ„ ์‹คํ–‰

cd ChatBIA-Server
uvicorn main:app --host 0.0.0.0 --port 8000

2. Python ํ…Œ์ŠคํŠธ

python test_streaming.py

3. ์•ˆ๋“œ๋กœ์ด๋“œ ์•ฑ์—์„œ ์—ฐ๊ฒฐ

// ๋กœ์ปฌ ํ…Œ์ŠคํŠธ (์—๋ฎฌ๋ ˆ์ดํ„ฐ)
val apiClient = ChatBiaApiClient("http://10.0.2.2:8000")

// ์‹ค์ œ ๋””๋ฐ”์ด์Šค (๊ฐ™์€ ๋„คํŠธ์›Œํฌ)
val apiClient = ChatBiaApiClient("http://YOUR_IP:8000")

// Hugging Face Spaces (๋ฐฐํฌ ํ›„)
val apiClient = ChatBiaApiClient("https://your-space.hf.space")

๐Ÿ“Š ์‘๋‹ต ํ˜•์‹

์ŠคํŠธ๋ฆฌ๋ฐ ์‘๋‹ต (SSE)

data: {"token":"์•ˆ๋…•","done":false,"token_count":1}

data: {"token":"ํ•˜์„ธ์š”","done":false,"token_count":2}

data: {"token":"!","done":false,"token_count":3}

data: {"token":"","done":true,"token_count":3,"mode":"bsl"}

์ตœ์ข… ์‘๋‹ต

{
  "token": "",
  "done": true,
  "token_count": 150,
  "mode": "bsl"
}

์˜ค๋ฅ˜ ์‘๋‹ต

{
  "error": "์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€",
  "done": true
}

โšก ์„ฑ๋Šฅ ์ตœ์ ํ™” ํŒ

  1. ํƒ€์ž„์•„์›ƒ ์„ค์ •

    • Connect: 30์ดˆ
    • Read: 60์ดˆ (์ŠคํŠธ๋ฆฌ๋ฐ)
    • Write: 30์ดˆ
  2. ์žฌ์—ฐ๊ฒฐ ๋กœ์ง

    fun retryOnFailure(maxRetries: Int = 3) {
        var attempts = 0
        while (attempts < maxRetries) {
            try {
                chatStream(request).collect { }
                break
            } catch (e: Exception) {
                attempts++
                delay(2000 * attempts)  // ์ง€์ˆ˜ ๋ฐฑ์˜คํ”„
            }
        }
    }
    
  3. ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ

    • Flow๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ฉ”๋ชจ๋ฆฌ ํšจ์œจ์ ์œผ๋กœ ์ฒ˜๋ฆฌ
    • UI ์—…๋ฐ์ดํŠธ๋Š” StateFlow๋กœ ์ตœ์ ํ™”

๐Ÿš€ ๋ฐฐํฌ ํ›„ ์‚ฌ์šฉ

Hugging Face Spaces์— ๋ฐฐํฌ ํ›„:

val BASE_URL = "https://your-username-chatbia-server.hf.space"
val apiClient = ChatBiaApiClient(BASE_URL)

์ฃผ์˜: Hugging Face Spaces ๋ฌด๋ฃŒ ํ”Œ๋žœ์€ CPU๋งŒ ์ œ๊ณต๋˜๋ฏ€๋กœ ์‘๋‹ต ์†๋„๊ฐ€ ๋А๋ฆด ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ŠคํŠธ๋ฆฌ๋ฐ ๋ฐฉ์‹์ด ๋”์šฑ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค!


๐Ÿ”— ๊ด€๋ จ ๋ฌธ์„œ