diff --git a/swift/deepgram-transcribe-audio/Constants.swift b/swift/deepgram-transcribe-audio/Constants.swift new file mode 100644 index 00000000..b1082df0 --- /dev/null +++ b/swift/deepgram-transcribe-audio/Constants.swift @@ -0,0 +1,11 @@ +let exampleRequest = """ +{ + "payload": { + "fileUrl": "https://static.deepgram.com/examples/interview_speech-analytics.wav" + }, + "variables": { + "DEEPGRAM_API_KEY": "" + } +} +""" +let deepgramURL = "https://api.deepgram.com/v1/listen" \ No newline at end of file diff --git a/swift/deepgram-transcribe-audio/Index.swift b/swift/deepgram-transcribe-audio/Index.swift new file mode 100644 index 00000000..1a45ad82 --- /dev/null +++ b/swift/deepgram-transcribe-audio/Index.swift @@ -0,0 +1,28 @@ +import Foundation + +func main(context: RuntimeContext) async throws -> RuntimeOutput { + var transcribeRequest: TranscribeRequest + var response: [String: Any] = ["success": false] + do { + transcribeRequest = try context.req.bodyRaw.decodeTranscribeRequest() + guard !transcribeRequest.variables.DEEPGRAM_API_KEY.isEmpty else { + throw TranscribeRequestError.emptyApiKey + } + let transcribedAudioResponse = await TranscribeAudio( + url: transcribeRequest.payload.fileUrl, apiKey: transcribeRequest.variables.DEEPGRAM_API_KEY) + + response = transcribedAudioResponse + } catch let TranscribeRequestError.dataCorrupted(message) { + response["error"] = message + } catch let TranscribeRequestError.keyNotFound(key: key) { + response["error"] = + "Key \(key.stringValue) not found in the request body a proper request body for this function looks something like \(exampleRequest)" + } catch TranscribeRequestError.emptyApiKey { + response["error"] = "Please provide valid apikey" + } catch { + print(error.localizedDescription) + response["error"] = + "Please provide valid Request body. A proper request body for this function looks something like \(exampleRequest)" + } + return try context.res.json(response) +} diff --git a/swift/deepgram-transcribe-audio/Package.swift b/swift/deepgram-transcribe-audio/Package.swift new file mode 100644 index 00000000..be381819 --- /dev/null +++ b/swift/deepgram-transcribe-audio/Package.swift @@ -0,0 +1,18 @@ +// swift-tools-version: 5.5 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "ProllyFinal", + dependencies: [ + .package(url: "https://github.com/swift-server/async-http-client.git", from: "1.9.0"), + ], + targets: [ + .target( + name: "ProllyFinal", + dependencies: [ + .product(name: "AsyncHTTPClient", package: "async-http-client"), + ]) + ] +) diff --git a/swift/deepgram-transcribe-audio/README.MD b/swift/deepgram-transcribe-audio/README.MD new file mode 100644 index 00000000..7392a0ff --- /dev/null +++ b/swift/deepgram-transcribe-audio/README.MD @@ -0,0 +1,131 @@ +# Transcribe Audio Files + +A Swift Cloud Function that transcribes an Audio file with the help of [Deepgram API](https://deepgram.com/). + +_Example input 1:_ + +```json +{ + "fileUrl": "https://static.deepgram.com/examples/interview_speech-analytics.wav" +} +``` + +_Example output 1:_ + +```json +{ + "success": true, + "deepgramData": { + "metadata": { + "transaction_key": "deprecated", + "channels": 1, + "sha256": "", + "created": "", + "request_id": "", + "models": [ + "96a295ec-6336-43d5-b1cb-1e48b5e6d9a4" + ], + "duration": 17.56631, + "model_info": { + "96a295ec-6336-43d5-b1cb-1e48b5e6d9a4": { + "version": "2023-02-22.3", + "name": "general", + "arch": "base" + } + } + }, + "results": { + "channels": [ + { + "alternatives": [ + { + "transcript": "yep i said", + "confidence": 0.9955948, + "words": [ + { + "confidence": 0.9886008, + "start": 5.6079545, + "word": "yep", + "end": 5.846591 + }, + { + "confidence": 0.94343305, + "start": 7.213404, + "word": "i", + "end": 7.3721275 + }, + { + "confidence": 0.92837, + "start": 7.3721275, + "word": "said", + "end": 7.530851 + } + ] + } + ] + } + ] + } + } +} +``` + +_Example input 2:_ + +```json +{ + + "fileUrl": "WRONG_URL" +} +``` + +_Example output 2:_ + +```json +{ + "message": "Please provide a valid audio URL", + "success": false +} +``` + +## 📝 Variables + +List of variables used by this cloud function: + +- **DEEPGRAM_API_KEY** - Deepgram API Key +â„šī¸ Create your Deepgram API key at [console.deepgram.com](https://console.deepgram.com) + +## 🚀 Deployment + +1. Clone this repository, and enter this function folder: + + ```shell + git clone https://github.com/open-runtimes/examples.git && cd examples + cd swift/deepgram-transcribe-audio + ``` + +2. Enter this function folder and build the code: + + ```shell + docker run --rm --interactive --tty --volume $PWD:/mnt/code openruntimes/swift:v3-5.5 sh helpers/build.sh + ``` + + As a result, a `code.tar.gz` file will be generated. + +3. Start the Open Runtime: + + ```shell + docker run -p 3000:3000 -e OPEN_RUNTIMES_SECRET=secret-key --rm --interactive --tty --volume $PWD/code.tar.gz:/mnt/code/code.tar.gz:ro openruntimes/swift:v3-5.5 sh helpers/start.sh "/usr/local/server/src/function/Runtime serve --env production --hostname 0.0.0.0 --port 3000" + ``` + +Your function is now listening on port `3000`, and you can execute it by sending `POST` request with appropriate authorization headers. To learn more about runtime, you can visit Swift runtime [README](https://github.com/open-runtimes/open-runtimes/tree/main/runtimes/swift-5.5). + +4. Run the cURL function to send request. +```bash +curl -g -X POST http://localhost:4000/ -H "x-open-runtimes-secret: secret-key" -H "Content-type: application/json" -d '{"payload":{"fileUrl":"https://static.deepgram.com/examples/interview_speech-analytics.wav"},"variables":{"DEEPGRAM_API_KEY":""}}' +``` + +## 📝 Notes + +- This function is designed for use with Appwrite Cloud Functions. You can learn more about it in [Appwrite docs](https://appwrite.io/docs/functions). +- Working fine with both [openruntimes/swift:v3-5.5](https://github.com/open-runtimes/open-runtimes/tree/main/runtimes/swift-5.5) and [openruntimes/swift:v3-5.8](https://github.com/open-runtimes/open-runtimes/tree/main/runtimes/swift-5.8) \ No newline at end of file diff --git a/swift/deepgram-transcribe-audio/TranscribeRequest.swift b/swift/deepgram-transcribe-audio/TranscribeRequest.swift new file mode 100644 index 00000000..7c3704a6 --- /dev/null +++ b/swift/deepgram-transcribe-audio/TranscribeRequest.swift @@ -0,0 +1,20 @@ +import Foundation + +struct Payload: Codable{ + var fileUrl: URL +} + +struct Variables: Codable{ + var DEEPGRAM_API_KEY: String +} +struct TranscribeRequest: Codable { + var payload: Payload + var variables: Variables +} + +enum TranscribeRequestError: Error{ + case emptyApiKey + case dataCorrupted(message:String) + case keyNotFound(key: CodingKey) + case invalidRequest +} \ No newline at end of file diff --git a/swift/deepgram-transcribe-audio/Utils.swift b/swift/deepgram-transcribe-audio/Utils.swift new file mode 100644 index 00000000..0c4d08ab --- /dev/null +++ b/swift/deepgram-transcribe-audio/Utils.swift @@ -0,0 +1,71 @@ +import AsyncHTTPClient +import Foundation + +func DictToString(json: [String: Any]) -> String { + String(decoding: try! JSONSerialization.data(withJSONObject: json, options: []), as: UTF8.self) +} +extension String { + func toData() -> Data { + self.data(using: .utf8)! + } + func decodeJson() -> [String: Any] { + self.toData().decodeJson() + } + func decodeTranscribeRequest() throws -> TranscribeRequest { + let decoder = JSONDecoder() + do { + return try decoder.decode(TranscribeRequest.self, from: self.toData()) + } catch let DecodingError.dataCorrupted(ctx) { + throw TranscribeRequestError.dataCorrupted(message: ctx.debugDescription) + } catch let DecodingError.keyNotFound(key, _) { + throw TranscribeRequestError.keyNotFound(key: key) + } catch { + throw TranscribeRequestError.invalidRequest + } + + } +} +extension Data { + func decodeJson() -> [String: Any] { + return try! JSONSerialization.jsonObject(with: self, options: []) as! [String: Any] + } +} +func TranscribeAudio(url: URL, apiKey: String) async -> [String: Any] { + let httpClient = HTTPClient(eventLoopGroupProvider: .createNew) + defer { + // Shutdown is guaranteed to work if it's done precisely once (which is the case here). + try! httpClient.syncShutdown() + } + var request = HTTPClientRequest(url: deepgramURL) + request.method = .POST + request.headers.add(name: "Authorization", value: "Token \(apiKey)") + request.headers.add(name: "Content-Type", value: "application/json") + request.body = .bytes([UInt8](DictToString(json: ["url": url.absoluteString]).utf8)) + var response: [String: Any] = [:] + do { + let DeepgramResponse = try await httpClient.execute(request, timeout: .seconds(30)) + var DeepgramResponseBody = try await DeepgramResponse.body.collect(upTo: 1024 * 1024) // 1MB + // upTo is just to allocate memory limit to request to avoid DDOS nothing related to length of body in general + // ref: swift-server/async-http-client/issues/363 + let TranscribedJson = DeepgramResponseBody.readData(length: DeepgramResponseBody.readableBytes)! + .decodeJson() + let statusCode = DeepgramResponse.status.code + if statusCode == 200 { + response = ["success": true, "deepgramData": TranscribedJson] + } else if statusCode == 401 { + response = ["success": false, "message": "Please provide a valid DEEPGRAM_API_KEY"] + } else if statusCode == 400 { + response = ["success": false, "message": "Please provide a valid audio URL"] + } else { + response = [ + "success": false, + "message": "\(TranscribedJson["error"] ?? "encounter error from deepgram"), \(TranscribedJson["reason"] ?? "")", + ] + } + } catch { + response = ["success": false, "message": error.localizedDescription] + print(error) + } + + return response +}