libkysdk-coreai-1.1.0.1/ 0000775 0001750 0001750 00000000000 15207167112 012574 5 ustar zp zp libkysdk-coreai-1.1.0.1/CMakeLists.txt 0000664 0001750 0001750 00000011555 15207167112 015343 0 ustar zp zp cmake_minimum_required(VERSION 3.5)
project(kysdk-coreai-speech LANGUAGES CXX C)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_SKIP_RPATH ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
#######################################################################
# Options
#######################################################################
option(ENABLE_TEST "Build Test" OFF)
include_directories(include)
include_directories(src)
find_package(PkgConfig REQUIRED)
find_package(OpenSSL REQUIRED)
find_package(KylinAiProto REQUIRED)
include_directories(${OPENSSL_INCLUDE_DIR})
pkg_check_modules(GIO REQUIRED gio-unix-2.0)
pkg_check_modules(AI_COMMON REQUIRED kysdk-ai-common)
find_package(jsoncpp REQUIRED)
kylin_ai_generate_gdbus_proto_code(SPEECH_COMMON_PROTO_FILES corespeechservice)
kylin_ai_generate_gdbus_proto_code(VISION_COMMON_PROTO_FILES corevisionservice)
kylin_ai_generate_gdbus_proto_code(EMBEDDING_COMMON_PROTO_FILES coretextembeddingservice coreimageembeddingservice)
include_directories(${GIO_INCLUDE_DIRS})
include_directories(${GIO_INCLUDE_DIRS} ${OPENSSL_INCLUDE_DIR} ${AI_COMMON_INCLUDE_DIRS})
include_directories(include/kylin-ai/coreai/speech)
include_directories(include/kylin-ai/coreai/vision)
include_directories(include/kylin-ai/coreai/embedding)
include_directories(include/kylin-ai)
set(SPEECH_SERVICE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/_speechrecognitionresult.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/_speechrecognitionsession.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/_speechsynthesisresult.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/_speechsynthesizersession.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/_speechmodelconfig.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/audioconfig.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/audiodatastream.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/coreaispeechserviceproxy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/coreaispeechserver.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/logger.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/recognizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/speech/synthesizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/util.cpp
)
set(VISION_SERVICE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/src/vision/_textrecognitionresult.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/vision/_textrecognitionsession.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/vision/textrecognition.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/vision/corevisionserviceproxy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/vision/_textrecognitionconfig.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/util.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/logger.cpp
)
set(EMBEDDING_SERVICE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/src/embedding/embedding.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/embedding/imageembeddingprocessorproxy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/embedding/imageembeddingsession.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/embedding/textembeddingprocessorproxy.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/embedding/textembeddingsession.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/logger.cpp
)
add_library(kysdk-coreai-speech SHARED
include/kylin-ai/coreai/speech/audioconfig.h
include/kylin-ai/coreai/speech/audiodatastream.h
include/kylin-ai/coreai/speech/recognizer.h
include/kylin-ai/coreai/speech/result.h
include/kylin-ai/coreai/speech/synthesizer.h
include/kylin-ai/coreai/speech/error.h
include/kylin-ai/coreai/speech/config.h
${SPEECH_SERVICE_SOURCES}
${SPEECH_COMMON_PROTO_FILES}
)
add_library(kysdk-coreai-vision SHARED
include/kylin-ai/coreai/vision/textrecognitionresult.h
include/kylin-ai/coreai/vision/textrecognition.h
include/kylin-ai/coreai/vision/error.h
include/kylin-ai/coreai/vision/config.h
${VISION_SERVICE_SOURCES}
${VISION_COMMON_PROTO_FILES}
)
add_library(kysdk-coreai-embedding SHARED
${EMBEDDING_SERVICE_SOURCES}
${EMBEDDING_COMMON_PROTO_FILES}
)
if (ENABLE_TEST)
add_subdirectory(tests)
endif ()
set_target_properties(kysdk-coreai-speech PROPERTIES VERSION 1.0.0 SOVERSION 1)
set_target_properties(kysdk-coreai-vision PROPERTIES VERSION 1.0.0 SOVERSION 1)
set_target_properties(kysdk-coreai-embedding PROPERTIES VERSION 1.0.0 SOVERSION 1)
target_link_libraries(
kysdk-coreai-speech
jsoncpp
${GIO_LIBRARIES}
${OPENSSL_LIBRARIES}
)
target_link_libraries(
kysdk-coreai-vision
jsoncpp
${GIO_LIBRARIES}
${OPENSSL_LIBRARIES}
)
target_link_libraries(
kysdk-coreai-embedding
jsoncpp
${GIO_LIBRARIES}
${OPENSSL_LIBRARIES}
)
include(CMakePackageConfigHelpers)
include(GNUInstallDirs)
install(TARGETS kysdk-coreai-speech
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
install(TARGETS kysdk-coreai-vision
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
install(TARGETS kysdk-coreai-embedding
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
install(DIRECTORY include/kylin-ai
DESTINATION include)
install(DIRECTORY usr/ DESTINATION /usr)
libkysdk-coreai-1.1.0.1/.gitignore 0000664 0001750 0001750 00000001517 15207167112 014570 0 ustar zp zp # This file is used to ignore files which are generated
# ----------------------------------------------------------------------------
*~
*.autosave
*.a
*.core
*.moc
*.o
*.obj
*.orig
*.rej
*.so
*.so.*
*_pch.h.cpp
*_resource.rc
*.qm
.#*
*.*#
core
!core/
tags
.DS_Store
.directory
*.debug
Makefile*
*.prl
*.app
moc_*.cpp
ui_*.h
qrc_*.cpp
Thumbs.db
*.res
*.rc
/.qmake.cache
/.qmake.stash
# qtcreator generated files
*.pro.user*
CMakeLists.txt.user*
# xemacs temporary files
*.flc
# Vim temporary files
.*.swp
# Visual Studio generated files
*.ib_pdb_index
*.idb
*.ilk
*.pdb
*.sln
*.suo
*.vcproj
*vcproj.*.*.user
*.ncb
*.sdf
*.opensdf
*.vcxproj
*vcxproj.*
# MinGW generated files
*.Debug
*.Release
# Python byte code
*.pyc
# Binaries
# --------
*.dll
*.exe
build
.vscode
.cache
.reuse
# third-party
third-party/llama.cpp/common/build-info.cpp libkysdk-coreai-1.1.0.1/src/ 0000775 0001750 0001750 00000000000 15207167112 013363 5 ustar zp zp libkysdk-coreai-1.1.0.1/src/logger.h 0000664 0001750 0001750 00000030554 15207167112 015022 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef UTILS_LOGGER_H
#define UTILS_LOGGER_H
#include
#include
#include
#include
#include
// #define CLR_CLR "\033[0m" // 恢复颜色
// #define CLR_BLACK "\033[30m" // 黑色字
// #define CLR_RED "\033[31m" // 红色字
// #define CLR_GREEN "\033[32m" // 绿色字
// #define CLR_YELLOW "\033[33m" // 黄色字
// #define CLR_BLUE "\033[34m" // 蓝色字
// #define CLR_PURPLE "\033[35m" // 紫色字
// #define CLR_SKYBLUE "\033[36m" // 天蓝字
// #define CLR_WHITE "\033[37m" // 白色字
//
// #define CLR_BLK_WHT "\033[40;37m" // 黑底白字
// #define CLR_RED_WHT "\033[41;37m" // 红底白字
// #define CLR_GREEN_WHT "\033[42;37m" // 绿底白字
// #define CLR_YELLOW_WHT "\033[43;37m" // 黄底白字
// #define CLR_BLUE_WHT "\033[44;37m" // 蓝底白字
// #define CLR_PURPLE_WHT "\033[45;37m" // 紫底白字
// #define CLR_SKYBLUE_WHT "\033[46;37m" // 天蓝底白字
// #define CLR_WHT_BLK "\033[47;30m" // 白底黑字
#ifndef __FILENAME__
#define __FILENAME__ (strrchr("/" __FILE__, '/') + 1)
#endif
class Logger {
public:
enum Level {
LOG_LEVEL_SILENT = 0,
LOG_LEVEL_DEBUG,
LOG_LEVEL_INFO,
LOG_LEVEL_WARN,
LOG_LEVEL_ERROR
};
static void printLn();
template
static void printLn(T t, Args... args) {
std::cout << t << ' ';
printLn(args...);
}
static void printErrorLn();
template
static void printErrorLn(T t, Args... args) {
std::cerr << t << ' ';
printErrorLn(args...);
}
template
static void printLnLevel(const Logger& logger, Level level, Args... args) {
std::lock_guard locker(logger.mutex_);
#ifdef RUN_IN_DEBUG
if (logger.level_ == LOG_LEVEL_SILENT) {
// do nothing
} else if (level == LOG_LEVEL_DEBUG and level >= logger.level_) {
printLn(currentTime(), "\033[36mdebug\033[0m", args...);
} else if (level == LOG_LEVEL_INFO and level >= logger.level_) {
printLn(currentTime(), "\033[32minfo \033[0m", args...);
} else if (level == LOG_LEVEL_WARN and level >= logger.level_) {
printLn(currentTime(), "\033[33mwarn \033[0m", args...);
} else if (level == LOG_LEVEL_ERROR and level >= logger.level_) {
printLn(currentTime(), "\033[31merror\033[0m", args...);
}
#else
// release 暂时往stderr输出
if (logger.level_ == LOG_LEVEL_SILENT) {
// do nothing
} else if (level == LOG_LEVEL_DEBUG and level >= logger.level_) {
printErrorLn(currentTime(), "\033[36mdebug\033[0m", args...);
} else if (level == LOG_LEVEL_INFO and level >= logger.level_) {
printErrorLn(currentTime(), "\033[32minfo \033[0m", args...);
} else if (level == LOG_LEVEL_WARN and level >= logger.level_) {
printErrorLn(currentTime(), "\033[33mwarn \033[0m", args...);
} else if (level == LOG_LEVEL_ERROR and level >= logger.level_) {
printErrorLn(currentTime(), "\033[31merror\033[0m", args...);
}
#endif
}
static void printfLnLevel(const Logger& logger, Level level,
const char* format) {
std::lock_guard locker(logger.mutex_);
#ifdef RUN_IN_DEBUG
FILE* pFile = stdout;
#else
FILE *pFile = stderr;
#endif
if (logger.level_ == LOG_LEVEL_SILENT) {
// do nothing
} else if (level == LOG_LEVEL_DEBUG and level >= logger.level_) {
std::fprintf(pFile, "%s %s %s\n", currentTime().c_str(),
"\033[36mdebug\033[0m ", format);
} else if (level == LOG_LEVEL_INFO and level >= logger.level_) {
std::fprintf(pFile, "%s %s %s\n", currentTime().c_str(),
"\033[32minfo \033[0m", format);
} else if (level == LOG_LEVEL_WARN and level >= logger.level_) {
std::fprintf(pFile, "%s %s %s\n", currentTime().c_str(),
"\033[33mwarn \033[0m", format);
} else if (level == LOG_LEVEL_ERROR and level >= logger.level_) {
std::fprintf(pFile, "%s %s %s\n", currentTime().c_str(),
"\033[31merror\033[0m", format);
}
}
template
static void printfLnLevel(const Logger& logger, Level level,
const char* format, Args... args) {
std::lock_guard locker(logger.mutex_);
#ifdef RUN_IN_DEBUG
FILE* pFile = stdout;
#else
FILE *pFile = stderr;
#endif
if (logger.level_ == LOG_LEVEL_SILENT) {
// do nothing
} else if (level == LOG_LEVEL_DEBUG and level >= logger.level_) {
std::fprintf(pFile, "%s %s", currentTime().c_str(),
"\033[36mdebug\033[0m ");
std::fprintf(pFile, format, args...);
std::fprintf(pFile, "\n");
} else if (level == LOG_LEVEL_INFO and level >= logger.level_) {
std::fprintf(pFile, "%s %s ", currentTime().c_str(),
"\033[32minfo \033[0m");
std::fprintf(pFile, format, args...);
std::fprintf(pFile, "\n");
} else if (level == LOG_LEVEL_WARN and level >= logger.level_) {
std::fprintf(pFile, "%s %s ", currentTime().c_str(),
"\033[33mwarn \033[0m");
std::fprintf(pFile, format, args...);
std::fprintf(pFile, "\n");
} else if (level == LOG_LEVEL_ERROR and level >= logger.level_) {
std::fprintf(pFile, "%s %s ", currentTime().c_str(),
"\033[31merror\033[0m");
std::fprintf(pFile, format, args...);
std::fprintf(pFile, "\n");
}
}
static std::string location(const char* fileName, int line,
const char* funcName);
static std::string currentTime();
public:
Logger();
private:
mutable std::mutex mutex_;
std::atomic level_{LOG_LEVEL_INFO};
};
static Logger g_Logger{};
#ifdef RUN_IN_DEBUG
#define LOGD(...) \
do { \
Logger::printLnLevel( \
g_Logger, Logger::LOG_LEVEL_DEBUG, \
Logger::location(__FILENAME__, __LINE__, __FUNCTION__), \
##__VA_ARGS__); \
} while (0)
#define LOGDF(fmt, ...) \
do { \
Logger::printfLnLevel(g_Logger, Logger::LOG_LEVEL_DEBUG, \
"[%s:%d:%s] " fmt, __FILENAME__, __LINE__, \
__FUNCTION__, ##__VA_ARGS__); \
} while (0)
#define LOGI(...) \
do { \
Logger::printLnLevel( \
g_Logger, Logger::LOG_LEVEL_INFO, \
Logger::location(__FILENAME__, __LINE__, __FUNCTION__), \
##__VA_ARGS__); \
} while (0)
#define LOGIF(fmt, ...) \
do { \
Logger::printfLnLevel(g_Logger, Logger::LOG_LEVEL_INFO, \
"[%s:%d:%s] " fmt, __FILENAME__, __LINE__, \
__FUNCTION__, ##__VA_ARGS__); \
} while (0)
#define LOGW(...) \
do { \
Logger::printLnLevel( \
g_Logger, Logger::LOG_LEVEL_WARN, \
Logger::location(__FILENAME__, __LINE__, __FUNCTION__), \
##__VA_ARGS__); \
} while (0)
#define LOGWF(fmt, ...) \
do { \
Logger::printfLnLevel(g_Logger, Logger::LOG_LEVEL_WARN, \
"[%s:%d:%s] " fmt, __FILENAME__, __LINE__, \
__FUNCTION__, ##__VA_ARGS__); \
} while (0)
#define LOGE(...) \
do { \
Logger::printLnLevel( \
g_Logger, Logger::LOG_LEVEL_ERROR, \
Logger::location(__FILENAME__, __LINE__, __FUNCTION__), \
##__VA_ARGS__); \
} while (0)
#define LOGEF(fmt, ...) \
do { \
Logger::printfLnLevel(g_Logger, Logger::LOG_LEVEL_ERROR, \
"[%s:%d:%s] " fmt, __FILENAME__, __LINE__, \
__FUNCTION__, ##__VA_ARGS__); \
} while (0)
#else
#define LOGD(...) \
do { \
Logger::printLnLevel(g_Logger, Logger::LOG_LEVEL_DEBUG, \
##__VA_ARGS__); \
} while (0)
#define LOGDF(fmt, ...) \
do { \
Logger::printfLnLevel(g_Logger, Logger::LOG_LEVEL_DEBUG, fmt, \
##__VA_ARGS__); \
} while (0)
#define LOGI(...) \
do { \
Logger::printLnLevel(g_Logger, Logger::LOG_LEVEL_INFO, ##__VA_ARGS__); \
} while (0)
#define LOGIF(fmt, ...) \
do { \
Logger::printfLnLevel(g_Logger, Logger::LOG_LEVEL_INFO, fmt, \
##__VA_ARGS__); \
} while (0)
#define LOGW(...) \
do { \
Logger::printLnLevel(g_Logger, Logger::LOG_LEVEL_WARN, ##__VA_ARGS__); \
} while (0)
#define LOGWF(fmt, ...) \
do { \
Logger::printfLnLevel(g_Logger, Logger::LOG_LEVEL_WARN, fmt, \
##__VA_ARGS__); \
} while (0)
#define LOGE(...) \
do { \
Logger::printLnLevel(g_Logger, Logger::LOG_LEVEL_ERROR, \
##__VA_ARGS__); \
} while (0)
#define LOGEF(fmt, ...) \
do { \
Logger::printfLnLevel(g_Logger, Logger::LOG_LEVEL_ERROR, fmt, \
## __VA_ARGS__); \
} while (0)
#endif
#endif // UTILS_LOGGER_H
libkysdk-coreai-1.1.0.1/src/vision/ 0000775 0001750 0001750 00000000000 15207167112 014672 5 ustar zp zp libkysdk-coreai-1.1.0.1/src/vision/_textrecognitionresult.cpp 0000664 0001750 0001750 00000004523 15207167112 022225 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "_textrecognitionresult.h"
#include "textrecognitionresult.h"
// 获取一行中的文本内容
const char *text_line_get_value(TextLine *text_line) {
if (text_line == nullptr) {
return nullptr;
}
return text_line->text.c_str();
}
// 获取一行文本的角点位置信息(四个角的位置信息),point_number固定输出为4
PixelPoint *text_line_get_corner_points(TextLine *text_line,
int *point_number) {
if (text_line == nullptr) {
return nullptr;
}
if (text_line->points.empty()) {
return nullptr;
}
if (point_number == nullptr) {
return nullptr;
}
*point_number = 4;
return text_line->points.begin().base();
}
// 获取识别结果的整体文本信息,不带格式
const char *text_recognition_result_get_value(TextRecognitionResult *result) {
if (result == nullptr) {
return nullptr;
}
return result->allText.c_str();
}
TextLine **text_recognition_result_get_text_lines(TextRecognitionResult *result,
int *line_count) {
if (result == nullptr) {
return nullptr;
}
if (line_count == nullptr) {
return nullptr;
}
*line_count = result->lines.size();
return result->lines.data();
}
int text_recognition_result_get_error_code(TextRecognitionResult *result) {
if (result == nullptr) {
return -1;
}
return result->errorCode;
}
const char *text_recognition_result_get_error_message(
TextRecognitionResult *result) {
if (result == nullptr) {
return nullptr;
}
return result->errorMessage.c_str();
}
libkysdk-coreai-1.1.0.1/src/vision/corevisionserviceproxy.h 0000664 0001750 0001750 00000005405 15207167112 021712 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _CORE_VISION_SERVICE_PROXY_H_
#define _CORE_VISION_SERVICE_PROXY_H_
#include
#include
#include
#include "_textrecognitionconfig.h"
#include "corevisionserviceglue.h"
#include "error.h"
#include "textrecognition.h"
namespace kyai {
namespace core {
namespace vision {
using ReconnectedCallback = std::function;
class CoreVisionServer {
public:
static CoreVisionServer &getInstance();
~CoreVisionServer();
void init();
bool available() const { return connection_ != nullptr; }
GDBusConnection *getConnection() const { return connection_; }
private:
CoreVisionServer() = default;
void init(const std::string &unixPath);
void destroy();
private:
GDBusConnection *connection_{nullptr};
};
class CoreVisionServiceProxy {
public:
~CoreVisionServiceProxy();
bool connectToServer();
bool reconnectServer();
void setRecognitionResultCallback(TextRecognitionResultCallback callback,
void *userData);
void setReconnectedCallback(ReconnectedCallback callback);
int initEngine(const char *config, int *sessionId);
void destroyTextRecognition(int sessionId);
void recognizeTextFromImageFileAsync(int sessionId, const char *imageFile);
void recognizeTextFromImageDataAsync(int sessionId, const char *data,
unsigned int imageDataLength);
private:
static void onRecognitionResultCallback(GObject *sourceObject,
GAsyncResult *res,
gpointer userData);
void handleErrorOccurred(int errorCode);
void handleServerClosed();
void handleServerTimeout();
private:
TextRecognitionResultCallback recognitionCallback_{nullptr};
ReconnectedCallback reconnectedCallback_{nullptr};
void *recognitionUserData_{nullptr};
AiRuntimeCoreVisionService *proxy_{nullptr};
int reconnectAttempts_{0};
static constexpr int maxReconnectAttempts{10};
};
} // namespace vision
} // namespace core
} // namespace kyai
#endif
libkysdk-coreai-1.1.0.1/src/vision/_textrecognitionsession.cpp 0000664 0001750 0001750 00000004607 15207167112 022375 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "_textrecognitionsession.h"
#include
namespace kyai {
namespace core {
namespace vision {
_TextRecognitionSession::_TextRecognitionSession() {
serviceProxy_.setReconnectedCallback([this]() { initSession(); });
}
int _TextRecognitionSession::init() {
if (!initServer()) {
return AiSdkCommonErrorCode::AISDK_RUNTIME_ERROR;
}
return initSession();
}
void _TextRecognitionSession::destroyTextRecognitionSession() {
serviceProxy_.destroyTextRecognition(sessionId_);
}
void _TextRecognitionSession::setRecognitionResultCallback(
TextRecognitionResultCallback callback, void *userData) {
serviceProxy_.setRecognitionResultCallback(callback, userData);
}
void _TextRecognitionSession::setRecognizeTextModelConfig(
const TextRecognitionModelConfig &config) {
modelConfig_ = config;
}
void _TextRecognitionSession::recognizeTextFromImageFileAsync(
const char *imageFile) {
serviceProxy_.recognizeTextFromImageFileAsync(sessionId_, imageFile);
}
void _TextRecognitionSession::recognizeTextFromImageDataAsync(
const char *imageData, unsigned int imageDataLength) {
serviceProxy_.recognizeTextFromImageDataAsync(sessionId_, imageData,
imageDataLength);
}
bool _TextRecognitionSession::initServer() {
if (!serviceProxy_.connectToServer()) {
return serviceProxy_.reconnectServer();
}
return true;
}
int _TextRecognitionSession::initSession() {
const std::string jsonStringConfig = modelConfigToJson(modelConfig_);
int errorCode =
serviceProxy_.initEngine(jsonStringConfig.c_str(), &sessionId_);
return errorCode;
}
} // namespace vision
} // namespace core
} // namespace kyai libkysdk-coreai-1.1.0.1/src/vision/corevisionserviceproxy.cpp 0000664 0001750 0001750 00000024704 15207167112 022250 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "corevisionserviceproxy.h"
#include
#include
#include
#include
#include
#include
#include "_textrecognitionconfig.h"
#include "_textrecognitionresult.h"
#include "logger.h"
#include "util.h"
namespace kyai {
namespace core {
namespace vision {
const std::string serverUnixPath = "unix:path=/tmp/.kylin-ai-runtime-unix/" +
std::to_string(getuid()) +
"/core-vision.sock";
const char *objectPath = "/com/kylin/AiRuntime/CoreVision";
void dissectRecognitionResult(GVariant *parameters,
TextRecognitionResult *recognitionResult) {
if (parameters == nullptr || recognitionResult == nullptr) {
return;
}
GVariantIter iter;
g_variant_iter_init(&iter, parameters);
GVariant *text = g_variant_iter_next_value(&iter);
GVariant *lines = g_variant_iter_next_value(&iter);
GVariant *errorCode = g_variant_iter_next_value(&iter);
GVariant *errorMessage = g_variant_iter_next_value(&iter);
if (!text || !lines || !errorCode || !errorMessage) {
LOGE("recognition result error: result is nullptr!");
return;
}
const gchar *resultText = g_variant_get_string(text, nullptr);
recognitionResult->allText = std::string(resultText);
g_variant_unref(text);
GVariantIter linesIter;
g_variant_iter_init(&linesIter, lines);
while (GVariant *lineVariant = g_variant_iter_next_value(&linesIter)) {
TextLine *line = new TextLine{};
GVariant *pointsValue;
double conf;
gchar *oneTextLine;
g_variant_get(lineVariant, "(s@a(ii)d)", &oneTextLine, &pointsValue,
&conf);
if (!oneTextLine || !pointsValue) {
LOGE("recognition result error: result is nullptr!");
g_variant_unref(lineVariant);
g_variant_unref(lines);
delete line;
return;
}
GVariantIter pointsIter;
g_variant_iter_init(&pointsIter, pointsValue);
while (GVariant *pointVariant =
g_variant_iter_next_value(&pointsIter)) {
PixelPoint point;
g_variant_get(pointVariant, "(ii)", &point.x, &point.y);
if (point.x < 0 || point.y < 0) {
LOGE("recognition result point error: pixel point < 0!");
point.x = 0;
point.y = 0;
}
line->points.push_back(point);
g_variant_unref(pointVariant);
}
g_variant_unref(pointsValue);
line->text = oneTextLine;
recognitionResult->lines.push_back(line);
g_free(oneTextLine);
g_variant_unref(lineVariant);
}
g_variant_unref(lines);
int resultErrorCode = g_variant_get_int32(errorCode);
recognitionResult->errorCode = resultErrorCode;
g_variant_unref(errorCode);
const gchar *resultErrorMessage =
g_variant_get_string(errorMessage, nullptr);
recognitionResult->errorMessage = std::string(resultErrorMessage);
g_variant_unref(errorMessage);
}
CoreVisionServer &CoreVisionServer::getInstance() {
static CoreVisionServer instance;
return instance;
}
CoreVisionServer::~CoreVisionServer() { destroy(); }
void CoreVisionServer::destroy() {
if (connection_ != nullptr) {
g_object_unref(connection_);
}
}
void CoreVisionServer::init() { init(serverUnixPath); }
void CoreVisionServer::init(const std::string &unixPath) {
GError *error = nullptr;
connection_ = g_dbus_connection_new_for_address_sync(
unixPath.c_str(), G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_CLIENT,
nullptr, nullptr, &error);
if (connection_ == nullptr) {
g_printerr("Error connecting to D-Bus address %s: %s\n",
unixPath.c_str(), error->message);
g_error_free(error);
}
}
void CoreVisionServiceProxy::setRecognitionResultCallback(
TextRecognitionResultCallback callback, void *userData) {
recognitionCallback_ = callback;
recognitionUserData_ = userData;
}
void CoreVisionServiceProxy::setReconnectedCallback(
ReconnectedCallback callback) {
reconnectedCallback_ = std::move(callback);
}
CoreVisionServiceProxy::~CoreVisionServiceProxy() {
if (proxy_) {
g_object_unref(proxy_);
}
}
bool CoreVisionServiceProxy::connectToServer() {
CoreVisionServer::getInstance().init();
if (!CoreVisionServer::getInstance().available()) {
g_printerr(
"Error creating core vision server proxy: Server proxy "
"connection is unavailable.\n");
return false;
}
if (proxy_ != nullptr) {
g_object_unref(proxy_);
proxy_ = nullptr;
}
GError *error = nullptr;
auto *connection = CoreVisionServer::getInstance().getConnection();
proxy_ = ai_runtime_core_vision_service_proxy_new_sync(
connection, G_DBUS_PROXY_FLAGS_NONE, nullptr, objectPath, nullptr,
&error);
if (proxy_ == nullptr) {
LOGE("Error creating core vision service proxy: %s\n", error->message);
g_error_free(error);
return false;
}
g_dbus_proxy_set_default_timeout(G_DBUS_PROXY(proxy_), 5 * 60 * 1000);
return true;
}
int CoreVisionServiceProxy::initEngine(const char *config, int *sessionId) {
int errorCode = AISDK_NO_ERROR;
GError *error = nullptr;
bool success = ai_runtime_core_vision_service_call_init_sync(
proxy_, config, sessionId, &errorCode, nullptr, &error);
if (not success) {
if (error) {
LOGE("Error calling init: {}:{}", error->code, error->message);
g_error_free(error);
return AISDK_RUNTIME_ERROR;
}
}
return errorCode;
}
void CoreVisionServiceProxy::destroyTextRecognition(int sessionId) {
ai_runtime_core_vision_service_call_destroy_sync(proxy_, sessionId, nullptr,
nullptr);
}
void CoreVisionServiceProxy::recognizeTextFromImageDataAsync(
int sessionId, const char *data, unsigned int imageDataLength) {
if (proxy_ == nullptr) {
LOGE("Proxy not exist!");
return;
}
std::string imageData(data, imageDataLength);
std::string base64ImageData = util::base64Encode(imageData);
ai_runtime_core_vision_service_call_recognize_text_from_image_data(
proxy_, sessionId, base64ImageData.c_str(), base64ImageData.size(),
nullptr, onRecognitionResultCallback, this);
}
void CoreVisionServiceProxy::recognizeTextFromImageFileAsync(
int sessionId, const char *imageFile) {
if (proxy_ == nullptr) {
LOGE("Proxy not exist!");
return;
}
ai_runtime_core_vision_service_call_recognize_text_from_image_file(
proxy_, sessionId, imageFile, nullptr, onRecognitionResultCallback,
this);
}
void CoreVisionServiceProxy::onRecognitionResultCallback(GObject *sourceObject,
GAsyncResult *res,
gpointer userData) {
GVariant *result = nullptr;
GError *error = nullptr;
auto *proxy = static_cast(userData);
bool ret =
ai_runtime_core_vision_service_call_recognize_text_from_image_file_finish(
(AiRuntimeCoreVisionService *)sourceObject, &result, res, &error);
if (!ret) {
if (error) {
LOGE("Error calling recognizeTextFromImageFile: {}:{}", error->code,
error->message);
proxy->handleErrorOccurred(error->code);
g_error_free(error);
}
if (result) {
g_variant_unref(result);
}
return;
}
if (proxy->recognitionCallback_ != nullptr) {
TextRecognitionResult textRecognitionResult = {};
dissectRecognitionResult(result, &textRecognitionResult);
g_variant_unref(result);
proxy->recognitionCallback_(&textRecognitionResult,
proxy->recognitionUserData_);
} else {
LOGE("Recognizing callback is not set!");
}
}
void CoreVisionServiceProxy::handleErrorOccurred(int errorCode) {
LOGE("Server disconnected.");
switch (errorCode) {
case G_IO_ERROR_CLOSED: {
handleServerClosed();
break;
}
case G_IO_ERROR_TIMED_OUT: {
handleServerTimeout();
break;
}
}
}
void CoreVisionServiceProxy::handleServerClosed() {
LOGE("Server closed unexpectedly.");
if (recognitionCallback_) {
TextRecognitionResult result{
.errorCode = AISDK_RUNTIME_ERROR,
.errorMessage = "Server closed unexpectedly."};
recognitionCallback_(&result, recognitionUserData_);
}
if (reconnectServer()) {
if (reconnectedCallback_) {
std::fprintf(stderr, "Reconnected to server And call back.\n");
reconnectedCallback_();
}
}
reconnectAttempts_ = 0;
}
void CoreVisionServiceProxy::handleServerTimeout() {
if (recognitionCallback_) {
TextRecognitionResult result{.errorCode = AISDK_SERVICE_TIMEOUT,
.errorMessage = "Server timeout."};
recognitionCallback_(&result, recognitionUserData_);
}
}
bool CoreVisionServiceProxy::reconnectServer() {
if (reconnectAttempts_ >= maxReconnectAttempts) {
LOGE("Max reconnect attempts reached");
return false;
}
reconnectAttempts_++;
LOGD("Reconnect attempt {}.", reconnectAttempts_);
if (!connectToServer()) {
LOGE("Failed to reconnect to server");
std::this_thread::sleep_for(std::chrono::seconds(1));
return reconnectServer();
}
LOGI("Successfully reconnected to server.");
return true;
}
} // namespace vision
} // namespace core
} // namespace kyai libkysdk-coreai-1.1.0.1/src/vision/_textrecognitionsession.h 0000664 0001750 0001750 00000003325 15207167112 022036 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _TEXTRECOGNITIONSESSION_
#define _TEXTRECOGNITIONSESSION_
#include "_textrecognitionconfig.h"
#include "corevisionserviceproxy.h"
#include "textrecognition.h"
#include "textrecognitionresult.h"
namespace kyai {
namespace core {
namespace vision {
class _TextRecognitionSession {
public:
_TextRecognitionSession();
int init();
void destroyTextRecognitionSession();
void setRecognitionResultCallback(TextRecognitionResultCallback callback,
void *userData);
void recognizeTextFromImageFileAsync(const char *imageFile);
void recognizeTextFromImageDataAsync(const char *imageData,
unsigned int imageDataLength);
void setRecognizeTextModelConfig(const TextRecognitionModelConfig &config);
private:
bool initServer();
int initSession();
private:
CoreVisionServiceProxy serviceProxy_;
int sessionId_{0};
TextRecognitionModelConfig modelConfig_{};
};
} // namespace vision
} // namespace core
} // namespace kyai
#endif libkysdk-coreai-1.1.0.1/src/vision/_textrecognitionconfig.cpp 0000664 0001750 0001750 00000004064 15207167112 022154 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "_textrecognitionconfig.h"
#include
TextRecognitionModelConfig *text_recognition_model_config_create() {
return new _TextRecognitionModelConfig();
}
void text_recognition_model_config_destroy(
TextRecognitionModelConfig **config) {
if (config == nullptr) {
return;
}
delete *config;
*config = nullptr;
}
void text_recognition_model_config_set_name(TextRecognitionModelConfig *config,
const char *name) {
if (config == nullptr) {
return;
}
config->name = name;
}
void text_recognition_model_config_set_deploy_type(
TextRecognitionModelConfig *config, ModelDeployType type) {
if (config == nullptr) {
return;
}
config->type = type;
}
std::string modelConfigToJson(const _TextRecognitionModelConfig &modelconfig) {
Json::Value root;
root["name"] = modelconfig.name;
switch (modelconfig.type) {
case ModelDeployType::OnDevice:
root["type"] = "Local";
break;
case ModelDeployType::PrivateCloud:
root["type"] = "PrivateCloud";
break;
case ModelDeployType::PublicCloud:
root["type"] = "PublicCloud";
break;
default:
root["type"] = "Default";
break;
}
Json::FastWriter writer;
return writer.write(root);
}
libkysdk-coreai-1.1.0.1/src/vision/_textrecognitionresult.h 0000664 0001750 0001750 00000002227 15207167112 021671 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _TEXTRCOGNITIONRESULT_H_
#define _TEXTRCOGNITIONRESULT_H_
#include
#include
#include "textrecognitionresult.h"
struct _TextLine {
std::string text;
std::vector points;
};
struct _TextRecognitionResult {
std::string allText;
std::vector<_TextLine *> lines;
int errorCode;
std::string errorMessage;
~_TextRecognitionResult() {
for (auto line : lines) {
delete line;
}
}
};
#endif libkysdk-coreai-1.1.0.1/src/vision/_textrecognitionconfig.h 0000664 0001750 0001750 00000002022 15207167112 021611 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _TEXTRECOGNITIONCONFIG_H
#define _TEXTRECOGNITIONCONFIG_H
#include
#include "coreai/vision/config.h"
struct _TextRecognitionModelConfig {
std::string name{};
ModelDeployType type{ModelDeployType(-1)};
};
std::string modelConfigToJson(const _TextRecognitionModelConfig &modelconfig);
#endif //_TEXTRECOGNITIONCONFIG_H
libkysdk-coreai-1.1.0.1/src/vision/textrecognition.cpp 0000664 0001750 0001750 00000005663 15207167112 020635 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "textrecognition.h"
#include "_textrecognitionsession.h"
// 创建session
TextRecognitionSession *text_recognition_create_session() {
auto *session = new kyai::core::vision::_TextRecognitionSession();
return (TextRecognitionSession *)session;
}
// 销毁session
void text_recognition_destroy_session(TextRecognitionSession **session) {
if (session == nullptr) {
return;
}
if (*session == nullptr) {
return;
}
((kyai::core::vision::_TextRecognitionSession *)*session)
->destroyTextRecognitionSession();
delete (kyai::core::vision::_TextRecognitionSession *)*session;
*session = nullptr;
}
// 初始化session
int text_recognition_init_session(TextRecognitionSession *session) {
if (session == nullptr) {
return AISDK_INVALID_SESSION;
}
return ((kyai::core::vision::_TextRecognitionSession *)session)->init();
}
// 设置结果回调
void text_recognition_result_set_callback(
TextRecognitionSession *session, TextRecognitionResultCallback callback,
void *user_data) {
if (session == nullptr) {
return;
}
((kyai::core::vision::_TextRecognitionSession *)session)
->setRecognitionResultCallback(callback, user_data);
}
// 配置Ocr模型
void text_recognition_set_model_config(TextRecognitionSession *session,
TextRecognitionModelConfig *config) {
if (session == nullptr or config == nullptr) {
return;
}
((kyai::core::vision::_TextRecognitionSession *)session)
->setRecognizeTextModelConfig(*config);
}
// 从图像文件中识别
void text_recognition_recognize_text_from_image_file_async(
TextRecognitionSession *session, const char *image_file) {
if (session == nullptr) {
return;
}
((kyai::core::vision::_TextRecognitionSession *)session)
->recognizeTextFromImageFileAsync(image_file);
}
// 从图像数据中进行识别
void text_recognition_recognize_text_from_image_data_async(
TextRecognitionSession *session, const char *image_data,
unsigned int image_data_length) {
if (session == nullptr) {
return;
}
((kyai::core::vision::_TextRecognitionSession *)session)
->recognizeTextFromImageDataAsync(image_data, image_data_length);
} libkysdk-coreai-1.1.0.1/src/logger.cpp 0000664 0001750 0001750 00000003223 15207167112 015346 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "logger.h"
#include
#include
void Logger::printLn() { std::cout << '\n'; }
void Logger::printErrorLn() { std::cerr << '\n'; }
std::string Logger::location(const char* fileName, int line,
const char* funcName) {
std::ostringstream oss;
oss << '[' << fileName << ':' << line << ':' << funcName << ']';
return oss.str();
}
std::string Logger::currentTime() {
struct timeval tv {};
struct tm* tm = nullptr;
gettimeofday(&tv, nullptr);
time_t tt = tv.tv_sec;
tm = localtime(&tt);
char buffer[32]{};
#ifdef RUN_IN_DEBUG
sprintf(buffer, "[%04d-%02d-%02d %02d:%02d:%02d.%03ld]", tm->tm_year + 1900,
tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
tv.tv_usec / 1000);
#else
sprintf(buffer, "[%ld]", tv.tv_sec * 1000 + tv.tv_usec / 1000);
#endif
return buffer;
}
Logger::Logger() {
#ifdef RUN_IN_DEBUG
level_ = LOG_LEVEL_DEBUG;
#endif
}
libkysdk-coreai-1.1.0.1/src/util.h 0000664 0001750 0001750 00000002175 15207167112 014516 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef UTIL_H
#define UTIL_H
#include
#include
#include
namespace util {
std::string base64Encode(const void *data, std::size_t size);
std::string base64Encode(const std::vector &input);
std::string base64Encode(const std::string &input);
std::vector base64Decode(const std::string &input);
bool stringContains(const std::string &str, const std::string &sub);
} // namespace util
#endif // UTIL_H
libkysdk-coreai-1.1.0.1/src/embedding/ 0000775 0001750 0001750 00000000000 15207167112 015301 5 ustar zp zp libkysdk-coreai-1.1.0.1/src/embedding/embeddingcommon.h 0000664 0001750 0001750 00000002520 15207167112 020600 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef EMBEDDINGCOMMON_H
#define EMBEDDINGCOMMON_H
#include "coreai/embedding/error.h"
#include
#include
typedef struct _EmbeddingResult {
std::vector vector_result = {};
int error_code = 0;
std::string error_message = "";
_EmbeddingResult() {}
_EmbeddingResult(std::vector vector_result)
: vector_result(vector_result),
error_code(COREAI_EMBEDDING_SUCESS),
error_message("Sucess") {}
_EmbeddingResult(int error_code, std::string error_message)
: vector_result({}),
error_code(error_code),
error_message(error_message) {}
} EmbeddingResult;
#endif libkysdk-coreai-1.1.0.1/src/embedding/textembeddingsession.h 0000664 0001750 0001750 00000002422 15207167112 021701 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef TEXTEMBEDDINGSESSION_H
#define TEXTEMBEDDINGSESSION_H
#include
#include "embedding.h"
#include "embeddingcommon.h"
#include "textembeddingprocessorproxy.h"
class _TextEmbeddingSession {
public:
int connect();
int init();
EmbeddingResult* embeddingText(const std::string& text);
void embeddingTextAsync(const std::string& text,
TextEmbeddingResultCallback callback,
void* callback_user_data);
std::string getModelInfo();
private:
TextEmbeddingProcessorProxy text_embedding_processor_proxy_;
};
#endif
libkysdk-coreai-1.1.0.1/src/embedding/imageembeddingsession.cpp 0000664 0001750 0001750 00000004643 15207167112 022341 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "imageembeddingsession.h"
int _ImageEmbeddingSession::connect() {
// 先init 与runtime 的连接
return image_embedding_processor_proxy_.connect();
}
int _ImageEmbeddingSession::init() {
return image_embedding_processor_proxy_.initEngine();
}
EmbeddingResult* _ImageEmbeddingSession::embeddingText(
const std::string& text) {
return image_embedding_processor_proxy_.embeddingText(text);
}
EmbeddingResult* _ImageEmbeddingSession::embeddingImage(
const std::string& file_path) {
return image_embedding_processor_proxy_.embeddingImage(file_path);
}
EmbeddingResult* _ImageEmbeddingSession::embeddingBase64Image(
const std::string& file_data) {
return image_embedding_processor_proxy_.embeddingBase64Image(file_data);
}
void _ImageEmbeddingSession::embeddingTextAsync(
const std::string& text, ImageEmbeddingResultCallback callback,
void* callback_user_data) {
image_embedding_processor_proxy_.embeddingTextAsync(text, callback,
callback_user_data);
}
void _ImageEmbeddingSession::embeddingImageAsync(
const std::string& file_path, ImageEmbeddingResultCallback callback,
void* callback_user_data) {
image_embedding_processor_proxy_.embeddingImageAsync(file_path, callback,
callback_user_data);
}
void _ImageEmbeddingSession::embeddingBase64ImageAsync(
const std::string& file_data, ImageEmbeddingResultCallback callback,
void* callback_user_data) {
image_embedding_processor_proxy_.embeddingBase64ImageAsync(
file_data, callback, callback_user_data);
}
std::string _ImageEmbeddingSession::getModelInfo() {
return image_embedding_processor_proxy_.getModelInfo();
} libkysdk-coreai-1.1.0.1/src/embedding/imageembeddingprocessorproxy.h 0000664 0001750 0001750 00000005733 15207167112 023445 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef SERVICES_IMAGEEMBEDDINGPROCESSORPROXY_H
#define SERVICES_IMAGEEMBEDDINGPROCESSORPROXY_H
#include
#include
#include
#include
#include
#include "coreimageembeddingserviceglue.h"
#include "embeddingcommon.h"
class ImageEmbeddingProcessorProxy {
public:
using ImageEmbeddingCallback = std::function;
using ImageEmbeddingResultCallback =
std::function;
struct EmbeddingRequestContext {
ImageEmbeddingResultCallback callback;
void* callback_user_data;
};
public:
~ImageEmbeddingProcessorProxy();
int connect();
int initEngine();
EmbeddingResult* embeddingText(const std::string& text);
EmbeddingResult* embeddingImage(const std::string& file_path);
EmbeddingResult* embeddingBase64Image(const std::string& file_data);
void embeddingTextAsync(const std::string& text,
ImageEmbeddingResultCallback callback,
void* callback_user_data);
void embeddingImageAsync(const std::string& file_path,
ImageEmbeddingResultCallback callback,
void* callback_user_data);
void embeddingBase64ImageAsync(const std::string& file_data,
ImageEmbeddingResultCallback callback,
void* callback_user_data);
std::string getModelInfo();
private:
void destroy();
static EmbeddingResult parseResult(const std::string& json_string_result);
static void embeddingTextAsyncCallback(GObject* object, GAsyncResult* res,
gpointer data);
static void embeddingImageAsyncCallback(GObject* object, GAsyncResult* res,
gpointer data);
static void embeddingBase64ImageAsyncCallback(GObject* object,
GAsyncResult* res,
gpointer data);
private:
AiRuntimeCoreImageEmbeddingService* delegate_ = nullptr;
GDBusConnection* connection_ = nullptr;
int sessionId_ = -1;
int initErrorCode_ = -1; // 0表示初始化成功,非0表示初始化失败
};
#endif libkysdk-coreai-1.1.0.1/src/embedding/imageembeddingsession.h 0000664 0001750 0001750 00000003451 15207167112 022002 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef IMAGEEMBEDDINGSESSION_H
#define IMAGEEMBEDDINGSESSION_H
#include
#include "embedding.h"
#include "embeddingcommon.h"
#include "imageembeddingprocessorproxy.h"
class _ImageEmbeddingSession {
public:
int connect();
int init();
EmbeddingResult* embeddingText(const std::string& text);
EmbeddingResult* embeddingImage(const std::string& file_path);
EmbeddingResult* embeddingBase64Image(const std::string& file_data);
void embeddingTextAsync(const std::string& text,
ImageEmbeddingResultCallback callback,
void* callback_user_data);
void embeddingImageAsync(const std::string& file_path,
ImageEmbeddingResultCallback callback,
void* callback_user_data);
void embeddingBase64ImageAsync(const std::string& file_data,
ImageEmbeddingResultCallback callback,
void* callback_user_data);
std::string getModelInfo();
private:
ImageEmbeddingProcessorProxy image_embedding_processor_proxy_;
};
#endif
libkysdk-coreai-1.1.0.1/src/embedding/textembeddingprocessorproxy.h 0000664 0001750 0001750 00000004062 15207167112 023341 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef SERVICES_TEXTEMBEDDINGPROCESSORPROXY_H
#define SERVICES_TEXTEMBEDDINGPROCESSORPROXY_H
#include
#include
#include
#include
#include
#include "coretextembeddingserviceglue.h"
#include "embeddingcommon.h"
class TextEmbeddingProcessorProxy {
public:
using TextEmbeddingCallback = std::function;
using TextEmbeddingResultCallback =
std::function;
struct EmbeddingRequestContext {
TextEmbeddingResultCallback callback;
void* callback_user_data;
};
public:
~TextEmbeddingProcessorProxy();
int connect();
int initEngine();
EmbeddingResult* embeddingText(const std::string& text);
void embeddingTextAsync(const std::string& text,
TextEmbeddingResultCallback callback,
void* callback_user_data);
std::string getModelInfo();
private:
void destroy();
static EmbeddingResult parseResult(const std::string& json_string_result);
static void embeddingTextAsyncCallback(GObject* object, GAsyncResult* res,
gpointer data);
private:
AiRuntimeCoreTextEmbeddingService* delegate_ = nullptr;
GDBusConnection* connection_ = nullptr;
int sessionId_ = -1;
int initErrorCode_ = -1;
};
#endif libkysdk-coreai-1.1.0.1/src/embedding/textembeddingprocessorproxy.cpp 0000664 0001750 0001750 00000020655 15207167112 023702 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "textembeddingprocessorproxy.h"
#include "error.h"
#include "logger.h"
#include "nlohmann/json.hpp"
namespace {
const char* textEmbeddingProcessorObjectPath =
"/com/kylin/AiRuntime/CoreTextEmbeddingService";
const std::string serverUnixPath = "unix:path=/tmp/.kylin-ai-runtime-unix/" +
std::to_string(getuid()) +
"/core-textembedding.sock";
} // namespace
int TextEmbeddingProcessorProxy::initEngine() {
nlohmann::json config;
config["engineName"] = "Embedding";
GError* error = nullptr;
ai_runtime_core_text_embedding_service_call_init_sync(
delegate_, config.dump().c_str(), &sessionId_, &initErrorCode_, nullptr,
&error);
if (error) {
g_printerr("Init engine error: %s\n", error->message);
g_error_free(error);
return COREAI_EMBEDDING_CONNECTION_ERROR;
}
if (sessionId_ == -1) {
LOGEF("Proxy init engine error sessionId:%i.", sessionId_);
return COREAI_EMBEDDING_INIT_ERROR;
}
LOGIF("Proxy init engine sessionId:%i.", sessionId_);
return COREAI_EMBEDDING_SUCESS;
}
int TextEmbeddingProcessorProxy::connect() {
// 获得连接
GError* error = nullptr;
connection_ = g_dbus_connection_new_for_address_sync(
serverUnixPath.c_str(), G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_CLIENT,
nullptr, /* GDBusAuthObserver */
nullptr, /* GCancellable */
&error);
if (connection_ == nullptr && error != nullptr) {
g_printerr("Error connecting to D-Bus address %s: %s\n",
serverUnixPath.c_str(), error->message);
g_error_free(error);
return COREAI_EMBEDDING_CONNECTION_ERROR;
}
// 获得代理
delegate_ = ai_runtime_core_text_embedding_service_proxy_new_sync(
connection_, G_DBUS_PROXY_FLAGS_NONE, nullptr,
textEmbeddingProcessorObjectPath, nullptr, &error);
if (delegate_ == nullptr && error != nullptr) {
g_printerr("Error creating text embedding processor proxy %s: %s\n",
textEmbeddingProcessorObjectPath, error->message);
g_error_free(error);
return COREAI_EMBEDDING_CONNECTION_ERROR;
}
return COREAI_EMBEDDING_SUCESS;
}
TextEmbeddingProcessorProxy::~TextEmbeddingProcessorProxy() { destroy(); }
EmbeddingResult* TextEmbeddingProcessorProxy::embeddingText(
const std::string& text) {
GError* error = nullptr;
nlohmann::json object;
object["text"] = text;
object["sessionId"] = sessionId_;
char* json_result = nullptr;
ai_runtime_core_text_embedding_service_call_embedding_text_sync(
delegate_, object.dump().c_str(), &json_result, nullptr, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_text]: %s\n", error->message);
// 由于runtime可能会崩溃,当前的session就不可用了,让用户重新创建session
EmbeddingResult* res = new EmbeddingResult(
COREAI_EMBEDDING_CONNECTION_ERROR, error->message);
g_error_free(error);
return res;
}
if (!json_result) {
LOGE("Te-embedding text result error:json_result is nullptr!");
EmbeddingResult* res = new EmbeddingResult(
COREAI_EMBEDDING_RUNTIME_ERROR, "Embedding reulst is nullptr!");
return res;
}
auto result = parseResult(json_result);
g_free(json_result);
EmbeddingResult* res = new EmbeddingResult(result);
return res;
}
void TextEmbeddingProcessorProxy::embeddingTextAsync(
const std::string& text, TextEmbeddingResultCallback callback,
void* callback_user_data) {
nlohmann::json object;
object["text"] = text;
object["sessionId"] = sessionId_;
EmbeddingRequestContext* context =
new EmbeddingRequestContext{callback, callback_user_data};
ai_runtime_core_text_embedding_service_call_embedding_text(
delegate_, object.dump().c_str(), nullptr,
(GAsyncReadyCallback)embeddingTextAsyncCallback, context);
}
void TextEmbeddingProcessorProxy::destroy() {
// 如果用户想用新的连接重连,这里清理一下connection
if (connection_) {
g_object_unref(connection_);
connection_ = nullptr;
}
if (delegate_) {
g_object_unref(delegate_);
delegate_ = nullptr;
}
}
EmbeddingResult TextEmbeddingProcessorProxy::parseResult(
const std::string& json_string_result) {
EmbeddingResult result;
nlohmann::json root =
nlohmann::json::parse(json_string_result, nullptr, false);
if (root.empty() || !root.contains("vector_result") ||
!root["vector_result"].is_array() || !root.contains("errorMessage") ||
!root.contains("errorCode")) {
LOGE("Json format error.");
return EmbeddingResult(COREAI_EMBEDDING_RUNTIME_ERROR,
"Json format error.");
}
auto vector_result = root["vector_result"].get>();
for (const auto& value : vector_result) {
result.vector_result.push_back(static_cast(value));
}
result.error_message = root["errorMessage"].get();
result.error_code = root["errorCode"].get();
return result;
}
void TextEmbeddingProcessorProxy::embeddingTextAsyncCallback(GObject* object,
GAsyncResult* res,
gpointer data) {
auto* context = static_cast(data);
GError* error = NULL;
char* json_result = nullptr;
ai_runtime_core_text_embedding_service_call_embedding_text_finish(
(AiRuntimeCoreTextEmbeddingService*)object, &json_result, res, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_embedding_text_async]: %s\n",
error->message);
// 由于runtime可能会崩溃,当前的session就不可用了,让用户重新创建session
EmbeddingResult result(COREAI_EMBEDDING_CONNECTION_ERROR,
error->message);
context->callback(&result, context->callback_user_data);
g_error_free(error);
delete context;
context = nullptr;
return;
}
if (!json_result) {
LOGE(
"Te-embedding text async result error:json_result is "
"nullptr!");
EmbeddingResult result(COREAI_EMBEDDING_RUNTIME_ERROR,
"Embedding reulst is nullptr!");
context->callback(&result, context->callback_user_data);
delete context;
context = nullptr;
return;
}
auto result = parseResult(json_result);
g_free(json_result);
context->callback(&result, context->callback_user_data);
delete context;
context = nullptr;
}
std::string TextEmbeddingProcessorProxy::getModelInfo() {
GError* error = nullptr;
char* model_info = nullptr;
ai_runtime_core_text_embedding_service_call_get_model_info_sync(
delegate_, sessionId_, &model_info, nullptr, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_text]: %s\n", error->message);
g_error_free(error);
return "";
}
if (!model_info) {
LOGE("Te-embedding modelInfo is empty!");
return "";
}
// engine中返回的向量化模型信息包括文本和图像的,在sdk层拆开json字符串
nlohmann::json root = nlohmann::json::parse(model_info, nullptr, false);
if (root.empty() || !root.contains("models") ||
!root["models"].contains("text_model")) {
LOGE(
"Te-embedding parse modelInfo error! text_model not found in "
"json.");
g_free(model_info);
return "";
}
// 提取 text_model 的部分
std::string res = root["models"]["text_model"].dump();
g_free(model_info);
return res;
} libkysdk-coreai-1.1.0.1/src/embedding/textembeddingsession.cpp 0000664 0001750 0001750 00000003003 15207167112 022230 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "textembeddingsession.h"
int _TextEmbeddingSession::connect() {
// 先init 与runtime 的连接
return text_embedding_processor_proxy_.connect();
}
int _TextEmbeddingSession::init() {
// 在初始化engine
return text_embedding_processor_proxy_.initEngine();
}
EmbeddingResult* _TextEmbeddingSession::embeddingText(const std::string& text) {
return text_embedding_processor_proxy_.embeddingText(text);
}
void _TextEmbeddingSession::embeddingTextAsync(
const std::string& text, TextEmbeddingResultCallback callback,
void* callback_user_data) {
text_embedding_processor_proxy_.embeddingTextAsync(text, callback,
callback_user_data);
}
std::string _TextEmbeddingSession::getModelInfo() {
return text_embedding_processor_proxy_.getModelInfo();
} libkysdk-coreai-1.1.0.1/src/embedding/imageembeddingprocessorproxy.cpp 0000664 0001750 0001750 00000035557 15207167112 024007 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "imageembeddingprocessorproxy.h"
#include
#include "error.h"
#include "logger.h"
#include "nlohmann/json.hpp"
namespace {
const char* imageEmbeddingProcessorObjectPath =
"/com/kylin/AiRuntime/CoreImageEmbeddingService";
const std::string serverUnixPath = "unix:path=/tmp/.kylin-ai-runtime-unix/" +
std::to_string(getuid()) +
"/core-imageembedding.sock";
} // namespace
ImageEmbeddingProcessorProxy::~ImageEmbeddingProcessorProxy() { destroy(); }
int ImageEmbeddingProcessorProxy::connect() {
GError* error = nullptr;
connection_ = g_dbus_connection_new_for_address_sync(
serverUnixPath.c_str(), G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_CLIENT,
nullptr, /* GDBusAuthObserver */
nullptr, /* GCancellable */
&error);
if (connection_ == nullptr && error != nullptr) {
g_printerr("Error connecting to D-Bus address %s: %s\n",
serverUnixPath.c_str(), error->message);
g_error_free(error);
return COREAI_EMBEDDING_CONNECTION_ERROR;
}
delegate_ = ai_runtime_core_image_embedding_service_proxy_new_sync(
connection_, G_DBUS_PROXY_FLAGS_NONE, nullptr,
imageEmbeddingProcessorObjectPath, nullptr, &error);
if (delegate_ == nullptr && error != nullptr) {
g_printerr("Error creating image embedding processor proxy %s: %s\n",
imageEmbeddingProcessorObjectPath, error->message);
g_error_free(error);
return COREAI_EMBEDDING_CONNECTION_ERROR;
}
return COREAI_EMBEDDING_SUCESS;
}
void ImageEmbeddingProcessorProxy::destroy() {
if (connection_) {
g_object_unref(connection_);
connection_ = nullptr;
}
if (delegate_) {
g_object_unref(delegate_);
delegate_ = nullptr;
}
}
int ImageEmbeddingProcessorProxy::initEngine() {
nlohmann::json config;
config["engineName"] = "Embedding";
GError* error = nullptr;
ai_runtime_core_image_embedding_service_call_init_sync(
delegate_, config.dump().c_str(), &sessionId_, &initErrorCode_, nullptr,
&error);
if (error) {
g_printerr("Init engine error: %s\n", error->message);
g_error_free(error);
return COREAI_EMBEDDING_CONNECTION_ERROR;
}
if (sessionId_ == -1) {
LOGEF("Proxy init engine error sessionId:%i.", sessionId_);
return COREAI_EMBEDDING_INIT_ERROR;
}
LOGIF("Proxy init engine sessionId:%i.\n", sessionId_);
return COREAI_EMBEDDING_SUCESS;
}
EmbeddingResult* ImageEmbeddingProcessorProxy::embeddingText(
const std::string& text) {
GError* error = nullptr;
nlohmann::json object;
object["text"] = text;
object["sessionId"] = sessionId_;
char* json_result = nullptr;
ai_runtime_core_image_embedding_service_call_embedding_text_sync(
delegate_, object.dump().c_str(), &json_result, nullptr, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_image_embedding_text]: %s\n",
error->message);
// 由于runtime可能会崩溃,当前的session就不可用了,让用户重新创建session
EmbeddingResult* res = new EmbeddingResult(
COREAI_EMBEDDING_CONNECTION_ERROR, error->message);
g_error_free(error);
return res;
}
if (!json_result) {
LOGE("Im-embedding text result error:json_result is nullptr!");
EmbeddingResult* res = new EmbeddingResult(
COREAI_EMBEDDING_RUNTIME_ERROR, "Embedding reulst is nullptr!");
return res;
}
std::string string_json_result(json_result);
g_free(json_result);
auto result = parseResult(string_json_result);
EmbeddingResult* res = new EmbeddingResult(result);
return res;
}
EmbeddingResult* ImageEmbeddingProcessorProxy::embeddingImage(
const std::string& file_path) {
GError* error = nullptr;
nlohmann::json object;
object["file_path"] = file_path;
object["sessionId"] = sessionId_;
char* json_result = nullptr;
ai_runtime_core_image_embedding_service_call_embedding_image_file_sync(
delegate_, object.dump().c_str(), &json_result, nullptr, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_image_embedding_image]: %s\n",
error->message);
// 由于runtime可能会崩溃,当前的session就不可用了,让用户重新创建sessino
EmbeddingResult* res = new EmbeddingResult(
COREAI_EMBEDDING_CONNECTION_ERROR, error->message);
g_error_free(error);
return res;
}
if (!json_result) {
LOGE("Im-embedding image result error:json_result is nullptr!");
EmbeddingResult* res = new EmbeddingResult(
COREAI_EMBEDDING_RUNTIME_ERROR, "Embedding reulst is nullptr!");
return res;
}
auto result = parseResult(json_result);
g_free(json_result);
EmbeddingResult* res = new EmbeddingResult(result);
return res;
}
EmbeddingResult* ImageEmbeddingProcessorProxy::embeddingBase64Image(
const std::string& file_data) {
GError* error = nullptr;
nlohmann::json object;
object["base64ImageData"] = file_data;
object["sessionId"] = sessionId_;
char* json_result = nullptr;
ai_runtime_core_image_embedding_service_call_embedding_base64_image_sync(
delegate_, object.dump().c_str(), &json_result, nullptr, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_image_embedding_base64_image]: %s\n",
error->message);
// 由于runtime可能会崩溃,当前的session就不可用了,让用户重新创建sessino
EmbeddingResult* res = new EmbeddingResult(
COREAI_EMBEDDING_CONNECTION_ERROR, error->message);
g_error_free(error);
return res;
}
if (!json_result) {
LOGE("Im-embedding base64 image result error:json_result is nullptr!");
EmbeddingResult* res = new EmbeddingResult(
COREAI_EMBEDDING_RUNTIME_ERROR, "Embedding reulst is nullptr!");
return res;
}
auto result = parseResult(json_result);
g_free(json_result);
EmbeddingResult* res = new EmbeddingResult(result);
return res;
}
void ImageEmbeddingProcessorProxy::embeddingTextAsync(
const std::string& text, ImageEmbeddingResultCallback callback,
void* callback_user_data) {
nlohmann::json object;
object["text"] = text;
object["sessionId"] = sessionId_;
EmbeddingRequestContext* context =
new EmbeddingRequestContext{callback, callback_user_data};
ai_runtime_core_image_embedding_service_call_embedding_text(
delegate_, object.dump().c_str(), nullptr,
(GAsyncReadyCallback)embeddingTextAsyncCallback, context);
}
void ImageEmbeddingProcessorProxy::embeddingImageAsync(
const std::string& file_path, ImageEmbeddingResultCallback callback,
void* callback_user_data) {
nlohmann::json object;
object["file_path"] = file_path;
object["sessionId"] = sessionId_;
EmbeddingRequestContext* context =
new EmbeddingRequestContext{callback, callback_user_data};
ai_runtime_core_image_embedding_service_call_embedding_image_file(
delegate_, object.dump().c_str(), nullptr,
(GAsyncReadyCallback)embeddingImageAsyncCallback, context);
}
void ImageEmbeddingProcessorProxy::embeddingBase64ImageAsync(
const std::string& file_data, ImageEmbeddingResultCallback callback,
void* callback_user_data) {
nlohmann::json object;
object["base64ImageData"] = file_data;
object["sessionId"] = sessionId_;
EmbeddingRequestContext* context =
new EmbeddingRequestContext{callback, callback_user_data};
ai_runtime_core_image_embedding_service_call_embedding_base64_image(
delegate_, object.dump().c_str(), nullptr,
(GAsyncReadyCallback)embeddingBase64ImageAsyncCallback, context);
}
void ImageEmbeddingProcessorProxy::embeddingTextAsyncCallback(GObject* object,
GAsyncResult* res,
gpointer data) {
auto* context = static_cast(data);
GError* error = NULL;
char* json_result = nullptr;
ai_runtime_core_image_embedding_service_call_embedding_text_finish(
(AiRuntimeCoreImageEmbeddingService*)object, &json_result, res, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_image_embedding_text_async]: %s\n",
error->message);
// 由于runtime可能会崩溃,当前的session就不可用了,让用户重新创建session
EmbeddingResult result(COREAI_EMBEDDING_CONNECTION_ERROR,
error->message);
context->callback(&result, context->callback_user_data);
g_error_free(error);
delete context;
context = nullptr;
return;
}
if (!json_result) {
LOGE("Im-embedding text async result error:json_result is nullptr!");
EmbeddingResult result(COREAI_EMBEDDING_RUNTIME_ERROR,
"Embedding reulst is nullptr!");
context->callback(&result, context->callback_user_data);
delete context;
context = nullptr;
return;
}
auto result = parseResult(json_result);
g_free(json_result);
context->callback(&result, context->callback_user_data);
delete context;
context = nullptr;
}
void ImageEmbeddingProcessorProxy::embeddingImageAsyncCallback(
GObject* object, GAsyncResult* res, gpointer data) {
auto* context = static_cast(data);
GError* error = NULL;
char* json_result = nullptr;
ai_runtime_core_image_embedding_service_call_embedding_image_file_finish(
(AiRuntimeCoreImageEmbeddingService*)object, &json_result, res, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_image_embedding_image_async]: %s\n",
error->message);
// 由于runtime可能会崩溃,当前的session就不可用了,让用户重新创建session
EmbeddingResult result(COREAI_EMBEDDING_CONNECTION_ERROR,
error->message);
context->callback(&result, context->callback_user_data);
g_error_free(error);
delete context;
context = nullptr;
return;
}
if (!json_result) {
LOGE("Im-embedding image async result error:json_result is nullptr!");
EmbeddingResult result(COREAI_EMBEDDING_RUNTIME_ERROR,
"Embedding reulst is nullptr!");
context->callback(&result, context->callback_user_data);
delete context;
context = nullptr;
return;
}
auto result = parseResult(json_result);
g_free(json_result);
context->callback(&result, context->callback_user_data);
delete context;
context = nullptr;
}
void ImageEmbeddingProcessorProxy::embeddingBase64ImageAsyncCallback(
GObject* object, GAsyncResult* res, gpointer data) {
auto* context = static_cast(data);
GError* error = NULL;
char* json_result = nullptr;
ai_runtime_core_image_embedding_service_call_embedding_base64_image_finish(
(AiRuntimeCoreImageEmbeddingService*)object, &json_result, res, &error);
if (error) {
g_printerr(
"[Dbus-close:aisdk_image_embedding_base64_image_async]: %s\n",
error->message);
// 由于runtime可能会崩溃,当前的session就不可用了,让用户重新创建sessino
EmbeddingResult result(COREAI_EMBEDDING_CONNECTION_ERROR,
error->message);
context->callback(&result, context->callback_user_data);
g_error_free(error);
delete context;
context = nullptr;
return;
}
if (!json_result) {
LOGE(
"Im-embedding base64 image async result error:json_result is "
"nullptr!");
EmbeddingResult result(COREAI_EMBEDDING_RUNTIME_ERROR,
"Embedding reulst is nullptr!");
context->callback(&result, context->callback_user_data);
delete context;
context = nullptr;
return;
}
auto result = parseResult(json_result);
g_free(json_result);
context->callback(&result, context->callback_user_data);
delete context;
context = nullptr;
}
EmbeddingResult ImageEmbeddingProcessorProxy::parseResult(
const std::string& json_string_result) {
EmbeddingResult result;
nlohmann::json root =
nlohmann::json::parse(json_string_result, nullptr, false);
if (root.empty() || !root.contains("vector_result") ||
!root["vector_result"].is_array() || !root.contains("errorMessage") ||
!root.contains("errorCode")) {
LOGE("Json format error.");
return EmbeddingResult(COREAI_EMBEDDING_RUNTIME_ERROR,
"Json format error.");
}
auto vector_result = root["vector_result"].get>();
for (const auto& value : vector_result) {
result.vector_result.push_back(static_cast(value));
}
result.error_message = root["errorMessage"].get();
result.error_code = root["errorCode"].get();
return result;
}
std::string ImageEmbeddingProcessorProxy::getModelInfo() {
GError* error = nullptr;
char* model_info = nullptr;
ai_runtime_core_image_embedding_service_call_get_model_info_sync(
delegate_, sessionId_, &model_info, nullptr, &error);
if (error) {
g_printerr("[Dbus-close:aisdk_text]: %s\n", error->message);
g_error_free(error);
return "";
}
if (!model_info) {
LOGE("Te-embedding modelInfo is empty!");
return "";
}
// engine中返回的向量化模型信息包括文本和图像的,在sdk层拆开json字符串
nlohmann::json root = nlohmann::json::parse(model_info, nullptr, false);
if (root.empty() || !root.contains("models") ||
!root["models"].contains("image_model")) {
LOGE(
"Te-embedding parse modelInfo error! image_model not found in "
"json.");
g_free(model_info);
return "";
}
// 提取 image_model 的部分
std::string res = root["models"]["image_model"].dump();
g_free(model_info);
return res;
} libkysdk-coreai-1.1.0.1/src/embedding/embedding.cpp 0000664 0001750 0001750 00000020641 15207167112 017726 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "embedding.h"
#include
#include "embeddingcommon.h"
#include "error.h"
#include "imageembeddingsession.h"
#include "textembeddingsession.h"
float *embedding_result_get_vector_data(EmbeddingResult *result) {
if (!result) return nullptr;
return result->vector_result.data();
}
int embedding_result_get_vector_length(EmbeddingResult *result) {
if (!result) return 0;
return result->vector_result.size();
}
int embedding_result_get_error_code(EmbeddingResult *result) {
if (!result) return COREAI_EMBEDDING_INPUT_ERROR;
return result->error_code;
}
const char *embedding_result_get_error_message(EmbeddingResult *result) {
if (!result) return nullptr;
return result->error_message.c_str();
}
void embedding_result_destroy(EmbeddingResult **result) {
// 双重检查
if (!result) return;
if (*result != nullptr) {
delete *result;
*result = nullptr;
}
}
void embedding_model_info_destroy(char *result) { delete[] result; }
_TextEmbeddingSession *text_embedding_create_session() {
_TextEmbeddingSession *textEmbeddingSession = new _TextEmbeddingSession();
return textEmbeddingSession;
}
void text_embedding_destroy_session(_TextEmbeddingSession **session) {
// 双重检查
if (!session) return;
if (*session != nullptr) {
delete *session;
*session = nullptr;
}
}
int text_embedding_init_session(_TextEmbeddingSession *session) {
if (!session) return COREAI_EMBEDDING_INPUT_ERROR;
// 先init 与runtime的连接
if (session->connect() != 0) return COREAI_EMBEDDING_CONNECTION_ERROR;
// 模型加载
return session->init();
}
bool text_embedding_get_model_info(_TextEmbeddingSession *session,
char **model_info) {
if (!session) return false;
std::string res;
res = session->getModelInfo();
if (!res.empty()) {
size_t length = res.size() + 1;
*model_info = new char[length];
std::strcpy(*model_info, res.c_str());
return true;
}
return false;
}
bool text_embedding(TextEmbeddingSession *session, const char *text,
EmbeddingResult **result) {
if (!session || !text) {
*result = new EmbeddingResult(COREAI_EMBEDDING_INPUT_ERROR,
"Incorrect input parameter");
return false;
}
// 这里面会开辟内存 所以需要用户调用destory接口
*result = session->embeddingText(text);
if (!result || !(*result)) return false;
return true;
}
void text_embedding_async(_TextEmbeddingSession *session, const char *text,
TextEmbeddingResultCallback callback,
void *callback_user_data) {
if (!session || !text) {
EmbeddingResult result(COREAI_EMBEDDING_INPUT_ERROR,
"Incorrect input parameter");
callback(&result, callback_user_data);
return;
}
session->embeddingTextAsync(text, callback, callback_user_data);
}
_ImageEmbeddingSession *image_embedding_create_session() {
_ImageEmbeddingSession *imageEmbeddingSession =
new _ImageEmbeddingSession();
return imageEmbeddingSession;
}
void image_embedding_destroy_session(_ImageEmbeddingSession **session) {
if (!session) return;
if (*session != nullptr) {
delete *session;
*session = nullptr;
}
}
int image_embedding_init_session(_ImageEmbeddingSession *session) {
if (!session) return COREAI_EMBEDDING_INPUT_ERROR;
// 先init 与runtime的连接
if (session->connect() != 0) return COREAI_EMBEDDING_CONNECTION_ERROR;
// 模型加载
return session->init();
}
bool image_embedding_get_model_info(_ImageEmbeddingSession *session,
char **model_info) {
if (!session) return false;
std::string res;
res = session->getModelInfo();
if (!res.empty()) {
size_t length = res.size() + 1;
*model_info = new char[length];
std::strcpy(*model_info, res.c_str());
return true;
}
return false;
}
bool text_embedding_by_image_model(_ImageEmbeddingSession *session,
const char *text, EmbeddingResult **result) {
if (!session || !text) {
*result = new EmbeddingResult(COREAI_EMBEDDING_INPUT_ERROR,
"Incorrect input parameter");
return false;
}
*result = session->embeddingText(
text); // 这里面会开辟内存 所以需要用户调用destory接口
if (!result || !(*result)) return false;
return true;
}
bool image_embedding_by_image_file(_ImageEmbeddingSession *session,
const char *image_file,
EmbeddingResult **result) {
if (!session || !image_file) {
*result = new EmbeddingResult(COREAI_EMBEDDING_INPUT_ERROR,
"Incorrect input parameter");
return false;
}
*result = session->embeddingImage(
image_file); // 这里面会开辟内存 所以需要用户调用destory接口
if (!result || !(*result)) return false;
return true;
}
bool image_embedding_by_base64_image_data(_ImageEmbeddingSession *session,
const unsigned char *image_data,
unsigned int image_data_length,
EmbeddingResult **result) {
if (!session || !image_data) {
*result = new EmbeddingResult(COREAI_EMBEDDING_INPUT_ERROR,
"Incorrect input parameter");
return false;
}
std::string base64_str(reinterpret_cast(image_data),
image_data_length);
*result = session->embeddingBase64Image(
base64_str); // 这里面会开辟内存 所以需要用户调用destory接口
if (!result || !(*result)) return false;
return true;
}
void text_embedding_by_image_model_async(_ImageEmbeddingSession *session,
const char *text,
ImageEmbeddingResultCallback callback,
void *callback_user_data) {
if (!session || !text) {
EmbeddingResult result(COREAI_EMBEDDING_INPUT_ERROR,
"Incorrect input parameter");
callback(&result, callback_user_data);
return;
}
session->embeddingTextAsync(text, callback, callback_user_data);
}
void image_embedding_from_by_file_async(_ImageEmbeddingSession *session,
const char *file_path,
ImageEmbeddingResultCallback callback,
void *callback_user_data) {
if (!session || !file_path) {
EmbeddingResult result(COREAI_EMBEDDING_INPUT_ERROR,
"Incorrect input parameter");
callback(&result, callback_user_data);
return;
}
session->embeddingImageAsync(file_path, callback, callback_user_data);
}
void image_embedding_by_base64_image_data_async(
_ImageEmbeddingSession *session, const unsigned char *image_data,
unsigned int image_data_length, ImageEmbeddingResultCallback callback,
void *callback_user_data) {
if (!session || !image_data) {
EmbeddingResult result(COREAI_EMBEDDING_INPUT_ERROR,
"Incorrect input parameter");
callback(&result, callback_user_data);
return;
}
std::string base64_image_str(reinterpret_cast(image_data),
image_data_length);
session->embeddingBase64ImageAsync(base64_image_str, callback,
callback_user_data);
} libkysdk-coreai-1.1.0.1/src/speech/ 0000775 0001750 0001750 00000000000 15207167112 014632 5 ustar zp zp libkysdk-coreai-1.1.0.1/src/speech/_speechrecognitionsession.cpp 0000664 0001750 0001750 00000006024 15207167112 022613 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "_speechrecognitionsession.h"
#include "logger.h"
namespace kyai {
namespace core {
namespace speech {
_SpeechRecognitionSession::_SpeechRecognitionSession() {
serviceProxy_.setReconnectedCallback([this] { initSession(); });
}
_SpeechRecognitionSession::~_SpeechRecognitionSession() {
serviceProxy_.setReconnectedCallback(nullptr);
}
int _SpeechRecognitionSession::init() {
if (!initServer()) {
return AiSdkCommonErrorCode::AISDK_RUNTIME_ERROR;
}
return initSession();
}
void _SpeechRecognitionSession::destroyRecognizer() {
serviceProxy_.destroyRecognizer(sessionId_);
}
void _SpeechRecognitionSession::setRecognizerAudioConfig(_AudioConfig *config) {
if (config == nullptr) {
return;
}
audioConfig_ = config;
serviceProxy_.setRecognizerAudioConfig(sessionId_, config->toJsonString());
}
_AudioConfig *_SpeechRecognitionSession::getRecognizerAudioConfig() const {
return audioConfig_;
}
void _SpeechRecognitionSession::setRecognitionResultCallback(
SpeechRecognitionResultCallback callback, void *userData) {
serviceProxy_.setRecognitionResultCallback(callback, userData);
}
void _SpeechRecognitionSession::startContinuousRecognition() {
serviceProxy_.startContinuousRecognition(sessionId_);
}
void _SpeechRecognitionSession::stopContinuousRecognition() {
serviceProxy_.stopContinuousRecognition(sessionId_);
}
void _SpeechRecognitionSession::continuousRecognitionWriteAudioData(
const uint8_t *data, uint32_t length) {
serviceProxy_.continuousRecognitionWriteAudioData(sessionId_, data, length);
}
void _SpeechRecognitionSession::recognizeOnce() {
serviceProxy_.recognizeOnce(sessionId_);
}
void _SpeechRecognitionSession::setRecognitionModelConfig(
const SpeechModelConfig &config) {
modelConfig_ = config;
}
bool _SpeechRecognitionSession::initServer() {
if (!serviceProxy_.connectToServer()) {
return serviceProxy_.reconnectServer();
}
return true;
}
int _SpeechRecognitionSession::initSession() {
const std::string jsonStringConfig = modelConfigToJson(modelConfig_);
int errorCode =
serviceProxy_.initRecognizer(jsonStringConfig.c_str(), sessionId_);
if (errorCode != AISDK_NO_ERROR) {
LOGE("Init session error: ", errorCode);
}
return errorCode;
}
} // namespace speech
} // namespace core
} // namespace kyai
libkysdk-coreai-1.1.0.1/src/speech/_speechsynthesizersession.h 0000664 0001750 0001750 00000003417 15207167112 022332 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _SPEECHSYNTHESIZER_
#define _SPEECHSYNTHESIZER_
#include "_audioconfig.h"
#include "_speechmodelconfig.h"
#include "coreaispeechserviceproxy.h"
#include "result.h"
namespace kyai {
namespace core {
namespace speech {
class _SpeechSynthesizerSession {
public:
_SpeechSynthesizerSession();
~_SpeechSynthesizerSession();
int init();
void destroySynthesizer();
void setSynthesizerAudioConfig(_AudioConfig *config);
void setSynthesizerResultCallback(SpeechSynthesisResultCallback callback,
void *userData);
void synthesizeTextOnce(const char *text, uint32_t textLength);
void synthesizeTextContinuous(const char *text, uint32_t textLength);
int stopSpeaking();
void setSynthesizerModelConfig(const SpeechModelConfig &config);
private:
bool initServer();
int initSession();
private:
_AudioConfig *audioConfig_{nullptr};
CoreAiSpeechServiceProxy serviceProxy_;
int sessionId_{-1};
SpeechModelConfig modelConfig_{};
};
} // namespace speech
} // namespace core
} // namespace kyai
#endif
libkysdk-coreai-1.1.0.1/src/speech/synthesizer.cpp 0000664 0001750 0001750 00000006232 15207167112 017730 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "synthesizer.h"
#include "_speechsynthesizersession.h"
SpeechSynthesizerSession *speech_synthesizer_create_session() {
auto *session = new kyai::core::speech::_SpeechSynthesizerSession();
return (SpeechSynthesizerSession *)session;
}
void speech_synthesizer_destroy_session(SpeechSynthesizerSession **session) {
if (session == nullptr) {
return;
}
if (*session == nullptr) {
return;
}
((kyai::core::speech::_SpeechSynthesizerSession *)*session)
->destroySynthesizer();
delete (kyai::core::speech::_SpeechSynthesizerSession *)*session;
*session = nullptr;
}
int speech_synthesizer_init_session(SpeechSynthesizerSession *session) {
if (session == nullptr) {
return AISDK_INVALID_SESSION;
}
return ((kyai::core::speech::_SpeechSynthesizerSession *)session)->init();
}
void speech_synthesizer_set_audio_config(SpeechSynthesizerSession *session,
AudioConfig *audio_config) {
if (session == nullptr) {
return;
}
if (audio_config == nullptr) {
return;
}
((kyai::core::speech::_SpeechSynthesizerSession *)session)
->setSynthesizerAudioConfig((_AudioConfig *)audio_config);
}
void speech_synthesizer_result_set_callback(
SpeechSynthesizerSession *session, SpeechSynthesisResultCallback callback,
void *user_data) {
if (session == nullptr) {
return;
}
((kyai::core::speech::_SpeechSynthesizerSession *)session)
->setSynthesizerResultCallback(callback, user_data);
}
void speech_synthesizer_set_model_config(SpeechSynthesizerSession *session,
SpeechModelConfig *config) {
if (session == nullptr or config == nullptr) {
return;
}
((kyai::core::speech::_SpeechSynthesizerSession *)session)
->setSynthesizerModelConfig(*config);
}
void speech_synthesizer_synthesize_text_async(SpeechSynthesizerSession *session,
const char *text,
uint32_t text_length) {
if (session == nullptr) {
return;
}
((kyai::core::speech::_SpeechSynthesizerSession *)session)
->synthesizeTextOnce(text, text_length);
}
int speech_synthesizer_stop_speaking(SpeechSynthesizerSession *session) {
if (session == nullptr) {
return SPEECH_PARAM_INVALID;
}
return ((kyai::core::speech::_SpeechSynthesizerSession *)session)
->stopSpeaking();
}
libkysdk-coreai-1.1.0.1/src/speech/_speechmodelconfig.cpp 0000664 0001750 0001750 00000003705 15207167112 021160 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "_speechmodelconfig.h"
#include
SpeechModelConfig *speech_model_config_create() {
return new _SpeechModelConfig();
}
void speech_model_config_destroy(SpeechModelConfig **config) {
if (config == nullptr) {
return;
}
delete *config;
*config = nullptr;
}
void speech_model_config_set_name(SpeechModelConfig *config, const char *name) {
if (config == nullptr) {
return;
}
config->name = name;
}
void speech_model_config_set_deploy_type(SpeechModelConfig *config,
ModelDeployType type) {
if (config == nullptr) {
return;
}
config->type = type;
}
std::string modelConfigToJson(const _SpeechModelConfig &modelconfig) {
Json::Value root;
root["name"] = modelconfig.name;
switch (modelconfig.type) {
case ModelDeployType::OnDevice:
root["type"] = "Local";
break;
case ModelDeployType::PrivateCloud:
root["type"] = "PrivateCloud";
break;
case ModelDeployType::PublicCloud:
root["type"] = "PublicCloud";
break;
default:
root["type"] = "Default";
break;
}
Json::FastWriter writer;
return writer.write(root);
}
libkysdk-coreai-1.1.0.1/src/speech/coreaispeechserviceproxy.h 0000664 0001750 0001750 00000011344 15207167112 022123 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _CORE_SPEECH_SERVICE_PROXY_H_
#define _CORE_SPEECH_SERVICE_PROXY_H_
#include
#include
#include
#include "_speechmodelconfig.h"
#include "corespeechserviceglue.h"
#include "error.h"
#include "result.h"
namespace kyai {
namespace core {
namespace speech {
using ReconnectedCallback = std::function;
class CoreAiSpeechServiceProxy {
public:
~CoreAiSpeechServiceProxy();
bool connectToServer();
bool reconnectServer();
void setRecognitionResultCallback(SpeechRecognitionResultCallback callback,
void *userData);
void setSynthesizerCallback(SpeechSynthesisResultCallback callback,
void *userData);
void setReconnectedCallback(const ReconnectedCallback &callback);
int initRecognizer(const char *config, int &sessionId);
int initSynthesizer(const char *config, int &sessionId);
void destroyRecognizer(int sessionId);
void destroySynthesizer(int sessionId);
void setRecognizerAudioConfig(int sessionId,
const std::string &audioConfig);
void startContinuousRecognition(int sessionId);
void stopContinuousRecognition(int sessionId);
void continuousRecognitionWriteAudioData(int sessionId, const uint8_t *data,
uint32_t length);
void recognizeOnce(int sessionId);
void setSynthesizerAudioConfig(int sessionId,
const std::string &audioConfig);
void synthesizeTextOnce(int sessionId, const char *text,
uint32_t textLength);
void synthesizeTextContinuous(int sessionId, const char *text,
uint32_t textLength);
int stopSpeaking(int sessionId);
private:
void connectSpeechRecognitionResultSignals(int sessionId);
void connectSpeechSynthesisResultSignals(int sessionId);
static void onRecognitionResultCallback(
GDBusConnection *connection, const gchar *senderName,
const gchar *objectPath, const gchar *interfaceName,
const gchar *signalName, GVariant *parameters, gpointer userData);
static void onSynthesisResult(GDBusConnection *connection,
const gchar *senderName,
const gchar *objectPath,
const gchar *interfaceName,
const gchar *signalName, GVariant *parameters,
gpointer userData);
static void onOnceRecognitionFinishedCallback(GObject *sourceObject,
GAsyncResult *res,
gpointer userData);
void handleOnceRecognitionError(int errorCode);
void handleOnceRecognitionServerClosed();
void handleOnceRecognitionResultTimeout();
void handleOnceRecognitionGeneralError();
static void onOnceSynthesisFinishedCallback(GObject *sourceObject,
GAsyncResult *res,
gpointer userData);
void handleOnceSynthesisErrorOccurred(int errorCode);
void handleOnceSynthesisServerClosed();
void handleOnceSynthesisResultTimeout();
void handleOnceSynthesisGeneralError();
private:
SpeechRecognitionResultCallback recognitionCallback_{nullptr};
void *recognitionUserData_{nullptr};
SpeechSynthesisResultCallback synthesizerCallback_{nullptr};
void *synthesizerUserData_{nullptr};
guint recognitionSubscriptionId_{0};
guint synthesizerSubscriptionId_{0};
AiRuntimeCoreSpeechService *proxy_{nullptr};
// 以下变量是为了重连
ReconnectedCallback reconnectedCallback_{nullptr};
int reconnectAttempts_{0};
static constexpr int maxReconnectAttempts = 10;
bool recognitionResultSignalConnected_{false};
bool synthesisResultSignalConnected_{false};
bool continuousRecognitionStarted_{false};
};
} // namespace speech
} // namespace core
} // namespace kyai
#endif
libkysdk-coreai-1.1.0.1/src/speech/_speechsynthesizersession.cpp 0000664 0001750 0001750 00000006001 15207167112 022655 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "_speechsynthesizersession.h"
#include "logger.h"
namespace kyai {
namespace core {
namespace speech {
_SpeechSynthesizerSession::_SpeechSynthesizerSession() {
serviceProxy_.setReconnectedCallback([this] { initSession(); });
}
_SpeechSynthesizerSession::~_SpeechSynthesizerSession() {
serviceProxy_.setReconnectedCallback(nullptr);
}
int _SpeechSynthesizerSession::init() {
if (!initServer()) {
return AiSdkCommonErrorCode::AISDK_RUNTIME_ERROR;
}
return initSession();
}
void _SpeechSynthesizerSession::destroySynthesizer() {
serviceProxy_.destroySynthesizer(sessionId_);
}
void _SpeechSynthesizerSession::setSynthesizerAudioConfig(
_AudioConfig *config) {
if (config == nullptr) {
return;
}
audioConfig_ = config;
serviceProxy_.setSynthesizerAudioConfig(sessionId_, config->toJsonString());
}
void _SpeechSynthesizerSession::setSynthesizerResultCallback(
SpeechSynthesisResultCallback callback, void *userData) {
serviceProxy_.setSynthesizerCallback(callback, userData);
}
void _SpeechSynthesizerSession::synthesizeTextOnce(const char *text,
uint32_t textLength) {
LOGD(sessionId_, std::string(text, textLength));
serviceProxy_.synthesizeTextOnce(sessionId_, text, textLength);
}
void _SpeechSynthesizerSession::synthesizeTextContinuous(const char *text,
uint32_t textLength) {
serviceProxy_.synthesizeTextContinuous(sessionId_, text, textLength);
}
int _SpeechSynthesizerSession::stopSpeaking() {
return serviceProxy_.stopSpeaking(sessionId_);
}
void _SpeechSynthesizerSession::setSynthesizerModelConfig(
const SpeechModelConfig &config) {
modelConfig_ = config;
}
bool _SpeechSynthesizerSession::initServer() {
if (!serviceProxy_.connectToServer()) {
return serviceProxy_.reconnectServer();
}
return true;
}
int _SpeechSynthesizerSession::initSession() {
const std::string jsonStringConfig = modelConfigToJson(modelConfig_);
int errorCode =
serviceProxy_.initSynthesizer(jsonStringConfig.c_str(), sessionId_);
LOGD(sessionId_);
if (errorCode != AISDK_NO_ERROR) {
LOGE("Init session error: ", errorCode);
}
return errorCode;
}
} // namespace speech
} // namespace core
} // namespace kyai
libkysdk-coreai-1.1.0.1/src/speech/coreaispeechserver.cpp 0000664 0001750 0001750 00000005607 15207167112 021227 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "coreaispeechserver.h"
#include
namespace kyai::core::speech {
const std::string serverUnixPath = "unix:path=/tmp/.kylin-ai-runtime-unix/" +
std::to_string(getuid()) +
"/core-speech.sock";
CoreAiSpeechServer &CoreAiSpeechServer::getInstance() {
static CoreAiSpeechServer instance{};
return instance;
}
CoreAiSpeechServer::CoreAiSpeechServer() {}
CoreAiSpeechServer::~CoreAiSpeechServer() { destroy(); }
void CoreAiSpeechServer::init() { init(serverUnixPath); }
void CoreAiSpeechServer::init(const std::string &unixPath) {
std::lock_guard locker(mutex_);
if (connection_ != nullptr && !g_dbus_connection_is_closed(connection_)) {
return;
}
GError *error = nullptr;
connection_ = g_dbus_connection_new_for_address_sync(
unixPath.c_str(), G_DBUS_CONNECTION_FLAGS_AUTHENTICATION_CLIENT,
nullptr, /* GDBusAuthObserver */
nullptr, /* GCancellable */
&error);
if (connection_ == nullptr) {
g_printerr("Error connecting to D-Bus address %s: %s\n",
unixPath.c_str(), error->message);
g_error_free(error);
return;
}
handlerId_ = g_signal_connect(connection_, "closed",
G_CALLBACK(onConnectionClosed), this);
}
void CoreAiSpeechServer::destroy() {
std::lock_guard locker(mutex_);
if (connection_ != nullptr) {
// 断开信号连接
if (handlerId_ != 0) {
g_signal_handler_disconnect(connection_, handlerId_);
}
// 关闭并释放连接
g_dbus_connection_close_sync(connection_, nullptr, nullptr);
g_object_unref(connection_);
}
onConnectionClosed_ = nullptr;
}
void CoreAiSpeechServer::onConnectionClosed(GDBusConnection *connection,
gboolean remotePeerVanished,
GError *error, gpointer userData) {
LOGE("On connection closed.");
CoreAiSpeechServer *server = (CoreAiSpeechServer *)(userData);
if (server->onConnectionClosed_) {
server->onConnectionClosed_();
}
}
} // namespace kyai::core::speech
libkysdk-coreai-1.1.0.1/src/speech/_audioconfig.h 0000664 0001750 0001750 00000003737 15207167112 017443 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef SRC_AUDIO_CONFIG_H
#define SRC_AUDIO_CONFIG_H
#include
#include
#include "audiodatastream.h"
#include "util.h"
struct InputAudioInfo {
enum class InputSource { DEFAULT_MIC, STREAM, PCM_FILE, RAW_DATA };
enum class Format { PCM, MP3 }; // 指的是文件格式
enum class Encode {
SAMPLE_S16LE, // Signed 16 Bit PCM, little endian
}; // 音频存储格式,也有的库叫format
InputSource inputSource{InputSource::DEFAULT_MIC};
Format format{Format::PCM};
Encode encode{Encode::SAMPLE_S16LE};
int rate{16000};
int channel{1};
std::vector audioData; // 原始音频数据
std::filesystem::path inputFilePath{};
AudioDataStream* audioDataStream{nullptr};
};
struct OutputAudioInfo {
enum class OutputTarget { PCM_DATA, DEFAULT_SPEAKER, PCM_OUTPUT_FILE };
OutputTarget outputTarget{OutputTarget::DEFAULT_SPEAKER};
int speed{50}; // 语速,可选值:[0-100],默认为50
int volume{50}; // 音量,可选值:[0-100],默认为50
int pitch{50}; // 音高,可选值:[0-100],默认为50
std::filesystem::path outputFilePath{};
};
struct _AudioConfig {
std::string toJsonString() const;
InputAudioInfo inputAudioInfo{};
OutputAudioInfo outputAudioInfo{};
};
#endif
libkysdk-coreai-1.1.0.1/src/speech/audiodatastream.cpp 0000664 0001750 0001750 00000003147 15207167112 020512 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "audiodatastream.h"
#include
#include "_audiodatastream.h"
AudioDataStream *audio_data_stream_create() {
auto *ptr = new _AudioDataStream();
ptr->audio_data_to_stream_write_func = nullptr;
ptr->p_user_data = nullptr;
return ptr;
}
void audio_data_stream_destroy(AudioDataStream **stream) {
if (stream == nullptr) {
return;
}
delete *stream;
*stream = nullptr;
}
int audio_data_stream_write(AudioDataStream *stream, const uint8_t *data,
uint32_t data_length) {
if (stream == nullptr) {
return -1;
}
if (data == nullptr) {
return -1;
}
if (data_length == 0) {
return -1;
}
if (stream->audio_data_to_stream_write_func == nullptr) {
return -1;
}
return stream->audio_data_to_stream_write_func(data, data_length,
stream->p_user_data);
} libkysdk-coreai-1.1.0.1/src/speech/coreaispeechserver.h 0000664 0001750 0001750 00000003502 15207167112 020664 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef SRC_CORE_AI_SPEECH_SERVER_H
#define SRC_CORE_AI_SPEECH_SERVER_H
#include
#include
#include
#include
namespace kyai::core::speech {
using OnConnectionClosed = std::function;
class CoreAiSpeechServer {
public:
static CoreAiSpeechServer &getInstance();
~CoreAiSpeechServer();
void init();
bool available() const { return connection_ != nullptr; }
GDBusConnection *getConnection() const { return connection_; }
void setOnConnectionClosed(const OnConnectionClosed &onConnectionClosed) {
onConnectionClosed_ = onConnectionClosed;
}
private:
CoreAiSpeechServer();
void init(const std::string &unixPath);
void destroy();
static void onConnectionClosed(GDBusConnection *connection,
gboolean remotePeerVanished, GError *error,
gpointer userData);
private:
GDBusConnection *connection_{nullptr};
gulong handlerId_{};
OnConnectionClosed onConnectionClosed_;
mutable std::mutex mutex_;
};
} // namespace kyai::core::speech
#endif // SRC_CORE_AI_SPEECH_SERVER_H
libkysdk-coreai-1.1.0.1/src/speech/_speechsynthesisresult.cpp 0000664 0001750 0001750 00000003354 15207167112 022162 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "_speechsynthesisresult.h"
#include "result.h"
SpeechResultReason speech_synthesis_result_get_reason(
SpeechSynthesisResult *result) {
return result->reason;
}
const char *speech_synthesis_result_get_audio_format(
SpeechSynthesisResult *result) {
return result->audioFormat.c_str();
}
int speech_synthesis_result_get_audio_rate(SpeechSynthesisResult *result) {
return result->audioRate;
}
int speech_synthesis_result_get_audio_channel(SpeechSynthesisResult *result) {
return result->audioChannel;
}
const uint8_t *speech_synthesis_result_get_data(SpeechSynthesisResult *result,
uint32_t *data_length) {
if (result == nullptr) {
return nullptr;
}
*data_length = result->audioData.size();
return result->audioData.data();
}
int speech_synthesis_result_get_error_code(SpeechSynthesisResult *result) {
return result->errorCode;
}
const char *speech_synthesis_result_get_error_message(
SpeechSynthesisResult *result) {
return result->errorMessage.c_str();
}
libkysdk-coreai-1.1.0.1/src/speech/_audiodatastream.h 0000664 0001750 0001750 00000002063 15207167112 020312 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _AUDIODATASTREAM_H
#define _AUDIODATASTREAM_H
struct _AudioDataStream {
typedef int (*AudioDataToStreamWriteFunc)(const uint8_t *data,
uint32_t data_length,
void *p_user_data);
AudioDataToStreamWriteFunc audio_data_to_stream_write_func;
void *p_user_data;
};
#endif
libkysdk-coreai-1.1.0.1/src/speech/audioconfig.cpp 0000664 0001750 0001750 00000013540 15207167112 017630 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "audioconfig.h"
#include
#include
#include "_audioconfig.h"
#include "logger.h"
std::string _AudioConfig::toJsonString() const {
Json::Value root;
// 设置输入来源类型
switch (inputAudioInfo.inputSource) {
case InputAudioInfo::InputSource::DEFAULT_MIC:
root["input_source"] =
int(InputAudioInfo::InputSource::DEFAULT_MIC);
break;
case InputAudioInfo::InputSource::STREAM:
root["input_source"] = int(InputAudioInfo::InputSource::STREAM);
break;
case InputAudioInfo::InputSource::PCM_FILE:
root["input_source"] = int(InputAudioInfo::InputSource::PCM_FILE);
root["pcm_file_path"] = inputAudioInfo.inputFilePath.string();
break;
case InputAudioInfo::InputSource::RAW_DATA:
root["input_source"] = int(InputAudioInfo::InputSource::RAW_DATA);
root["raw_data"] =
util::base64Encode(inputAudioInfo.audioData.data(),
inputAudioInfo.audioData.size());
break;
default:
LOGE("未知输入源");
break;
}
// 设置输出目标类型
switch (outputAudioInfo.outputTarget) {
case OutputAudioInfo::OutputTarget::DEFAULT_SPEAKER:
root["output_target"] =
int(OutputAudioInfo::OutputTarget::DEFAULT_SPEAKER);
break;
case OutputAudioInfo::OutputTarget::PCM_OUTPUT_FILE:
root["output_target"] =
int(OutputAudioInfo::OutputTarget::PCM_OUTPUT_FILE);
root["output_pcm_file_path"] =
outputAudioInfo.outputFilePath.string();
break;
case OutputAudioInfo::OutputTarget::PCM_DATA:
root["output_target"] =
int(OutputAudioInfo::OutputTarget::PCM_DATA);
break;
default:
LOGE("未知输出源");
break;
}
// 设置采样率
root["sample_rate"] = inputAudioInfo.rate;
// 设置输出音频格式
root["speed"] = outputAudioInfo.speed;
root["volume"] = outputAudioInfo.volume;
root["pitch"] = outputAudioInfo.pitch;
// 序列化 JSON 对象
Json::FastWriter writer;
return writer.write(root);
}
AudioConfig *
audio_config_create_continuous_audio_input_from_default_microphone() {
AudioConfig *config = new AudioConfig{};
config->inputAudioInfo.inputSource =
InputAudioInfo::InputSource::DEFAULT_MIC;
return config;
}
AudioConfig *audio_config_create_continuous_audio_input_from_audio_data_stream(
AudioDataStream *audio_data_stream) {
AudioConfig *config = new AudioConfig{};
config->inputAudioInfo.inputSource = InputAudioInfo::InputSource::STREAM;
config->inputAudioInfo.audioDataStream = audio_data_stream;
return config;
}
AudioConfig *audio_config_create_once_audio_input_from_pcm_file(
const char *pcm_file) {
AudioConfig *config = new AudioConfig{};
config->inputAudioInfo.inputSource = InputAudioInfo::InputSource::PCM_FILE;
config->inputAudioInfo.inputFilePath = pcm_file;
return config;
}
AudioConfig *audio_config_create_once_audio_input_from_pcm_data(
const uint8_t *audio_data, uint32_t data_length) {
AudioConfig *config = new AudioConfig{};
config->inputAudioInfo.inputSource = InputAudioInfo::InputSource::RAW_DATA;
config->inputAudioInfo.audioData.assign(audio_data,
audio_data + data_length);
return config;
}
AudioConfig *audio_config_create_audio_output_from_pcm_data() {
AudioConfig *config = new AudioConfig{};
config->outputAudioInfo.outputTarget =
OutputAudioInfo::OutputTarget::PCM_DATA;
return config;
}
// 使用系统默认扬声器作为音频输出
AudioConfig *audio_config_create_audio_output_from_default_speaker() {
AudioConfig *config = new AudioConfig{};
config->outputAudioInfo.outputTarget =
OutputAudioInfo::OutputTarget::DEFAULT_SPEAKER;
return config;
}
// 使用pcm文件作为音频输出
AudioConfig *audio_config_create_audio_output_from_pcm_file_name(
const char *pcm_file) {
AudioConfig *config = new AudioConfig{};
config->outputAudioInfo.outputTarget =
OutputAudioInfo::OutputTarget::PCM_OUTPUT_FILE;
config->outputAudioInfo.outputFilePath = pcm_file;
return config;
}
void audio_config_destroy(AudioConfig **config) {
if (config == nullptr) {
return;
}
delete *config;
*config = nullptr;
}
void audio_config_set_input_audio_rate(AudioConfig *config, int rate) {
if (config == nullptr) {
return;
}
config->inputAudioInfo.rate = rate;
}
void audio_config_set_output_audio_speed(AudioConfig *config, int speed) {
if (config == nullptr) {
return;
}
config->outputAudioInfo.speed = speed;
}
void audio_config_set_output_audio_volume(AudioConfig *config, int volume) {
if (config == nullptr) {
return;
}
config->outputAudioInfo.volume = volume;
}
void audio_config_set_output_audio_pitch(AudioConfig *config, int pitch) {
if (config == nullptr) {
return;
}
config->outputAudioInfo.pitch = pitch;
}
libkysdk-coreai-1.1.0.1/src/speech/_speechsynthesisresult.h 0000664 0001750 0001750 00000002032 15207167112 021617 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _SPEECHSYNTHESISRESULT_H_
#define _SPEECHSYNTHESISRESULT_H_
#include
#include
#include "result.h"
struct _SpeechSynthesisResult {
SpeechResultReason reason;
std::string audioFormat;
int audioRate;
int audioChannel;
std::vector audioData;
int errorCode;
std::string errorMessage;
};
#endif
libkysdk-coreai-1.1.0.1/src/speech/coreaispeechserviceproxy.cpp 0000664 0001750 0001750 00000056052 15207167112 022463 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "coreaispeechserviceproxy.h"
#include
#include
#include
#include "_speechmodelconfig.h"
#include "_speechrecognitionresult.h"
#include "_speechsynthesisresult.h"
#include "coreaispeechserver.h"
#include "logger.h"
#include "util.h"
namespace kyai {
namespace core {
namespace speech {
const char *objectPath = "/com/kylin/AiRuntime/CoreSpeech";
const char *interface = "com.kylin.AiRuntime.CoreSpeech";
const char *recognitionSpeechResultSignal = "RecognitionSpeechResult";
const char *synthesisResultSignal = "SynthesisResult";
_SpeechRecognitionResult dissectRecognitionResult(GVariant *parameters) {
_SpeechRecognitionResult result{};
GVariantIter iter;
g_variant_iter_init(&iter, parameters);
GVariant *reason = g_variant_iter_next_value(&iter);
if (reason != nullptr) {
int *pReason = (int *)g_variant_get_data(reason);
if (pReason != nullptr) {
result.reason = SpeechResultReason(*pReason);
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(reason);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *text = g_variant_iter_next_value(&iter);
if (text != nullptr) {
char *pText = (char *)g_variant_get_data(text);
if (pText != nullptr) {
result.text = pText;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(text);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *speakerId = g_variant_iter_next_value(&iter);
if (speakerId != nullptr) {
int *pSpeakerId = (int *)g_variant_get_data(speakerId);
if (pSpeakerId != nullptr) {
result.speakerId = *pSpeakerId;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(speakerId);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *errorCode = g_variant_iter_next_value(&iter);
if (errorCode != nullptr) {
int *pErrorCode = (int *)g_variant_get_data(errorCode);
if (pErrorCode != nullptr) {
result.errorCode = *pErrorCode;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(errorCode);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *errorMessage = g_variant_iter_next_value(&iter);
if (errorMessage != nullptr) {
char *pErrorMessage = (char *)g_variant_get_data(errorMessage);
if (pErrorMessage != nullptr) {
result.errorMessage = pErrorMessage;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(errorMessage);
} else {
LOGE("recognition result error: result is nullptr!");
}
return result;
}
_SpeechSynthesisResult dissectSynthesisResult(GVariant *parameters) {
_SpeechSynthesisResult result{};
GVariantIter iter;
g_variant_iter_init(&iter, parameters);
// 获取字节数组
GVariant *audioData = g_variant_iter_next_value(&iter);
if (audioData != nullptr) {
gchar *pAudioData = (gchar *)g_variant_get_data(audioData);
if (pAudioData != nullptr) {
std::string encode = pAudioData;
result.audioData = util::base64Decode(encode);
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(audioData);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *reason = g_variant_iter_next_value(&iter);
if (reason != nullptr) {
int *pReason = (int *)g_variant_get_data(reason);
if (pReason != nullptr) {
result.reason = SpeechResultReason(*pReason);
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(reason);
} else {
LOGE("recognition result error: result is nullptr!");
}
// 获取整数
GVariant *audioFormat = g_variant_iter_next_value(&iter);
if (audioFormat != nullptr) {
char *pAudioFormat = (char *)g_variant_get_data(audioFormat);
if (pAudioFormat != nullptr) {
result.audioFormat = pAudioFormat;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(audioFormat);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *audioRate = g_variant_iter_next_value(&iter);
if (audioRate != nullptr) {
int *pAudioRate = (int *)g_variant_get_data(audioRate);
if (pAudioRate != nullptr) {
result.audioRate = *pAudioRate;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(audioRate);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *audioChannel = g_variant_iter_next_value(&iter);
if (audioChannel != nullptr) {
int *pAudioChannel = (int *)g_variant_get_data(audioChannel);
if (pAudioChannel != nullptr) {
result.audioChannel = *pAudioChannel;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(audioChannel);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *errorCode = g_variant_iter_next_value(&iter);
if (errorCode != nullptr) {
int *pErrorCode = (int *)g_variant_get_data(errorCode);
if (pErrorCode != nullptr) {
result.errorCode = *pErrorCode;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(errorCode);
} else {
LOGE("recognition result error: result is nullptr!");
}
GVariant *errorMessage = g_variant_iter_next_value(&iter);
if (errorMessage != nullptr) {
char *pErrorMessage = (char *)g_variant_get_data(errorMessage);
if (pErrorMessage != nullptr) {
result.errorMessage = pErrorMessage;
} else {
LOGE("recognition result error: result is nullptr!");
}
g_variant_unref(errorMessage);
} else {
LOGE("recognition result error: result is nullptr!");
}
return result;
}
void CoreAiSpeechServiceProxy::setRecognitionResultCallback(
SpeechRecognitionResultCallback callback, void *userData) {
recognitionCallback_ = callback;
recognitionUserData_ = userData;
}
void CoreAiSpeechServiceProxy::setSynthesizerCallback(
SpeechSynthesisResultCallback callback, void *userData) {
synthesizerCallback_ = callback;
synthesizerUserData_ = userData;
}
void CoreAiSpeechServiceProxy::setReconnectedCallback(
const ReconnectedCallback &callback) {
reconnectedCallback_ = callback;
}
CoreAiSpeechServiceProxy::~CoreAiSpeechServiceProxy() {
if (not CoreAiSpeechServer::getInstance().available()) {
g_printerr(
"Error creating core speech server proxy: Server proxy "
"connection is unavailable.\n");
return;
}
auto *connection = CoreAiSpeechServer::getInstance().getConnection();
if (recognitionSubscriptionId_ != 0) {
// 取消订阅
g_dbus_connection_signal_unsubscribe(connection,
recognitionSubscriptionId_);
}
if (synthesizerSubscriptionId_ != 0) {
// 取消订阅
g_dbus_connection_signal_unsubscribe(connection,
synthesizerSubscriptionId_);
}
CoreAiSpeechServer::getInstance().setOnConnectionClosed(nullptr);
}
bool CoreAiSpeechServiceProxy::connectToServer() {
CoreAiSpeechServer::getInstance().init();
if (!CoreAiSpeechServer::getInstance().available()) {
g_printerr(
"Error creating core speech server proxy: Server proxy "
"connection is unavailable.\n");
return false;
}
if (proxy_ != nullptr) {
g_object_unref(proxy_);
proxy_ = nullptr;
}
GError *error = nullptr;
auto *connection = CoreAiSpeechServer::getInstance().getConnection();
proxy_ = ai_runtime_core_speech_service_proxy_new_sync(
connection, G_DBUS_PROXY_FLAGS_NONE, nullptr, objectPath, nullptr,
&error);
if (proxy_ == nullptr) {
if (error) {
g_printerr("Error creating text processor proxy %s: %s\n",
objectPath, error->message);
g_error_free(error);
}
return false;
}
g_dbus_proxy_set_default_timeout(G_DBUS_PROXY(proxy_), 60 * 60 * 1000);
CoreAiSpeechServer::getInstance().setOnConnectionClosed([this] {
// 流式语音识别报错
if (continuousRecognitionStarted_ && recognitionCallback_) {
SpeechRecognitionResult result{};
result.reason = SPEECH_ERROR_OCCURRED;
result.text = "";
result.speakerId = -1;
result.errorCode = AISDK_RUNTIME_ERROR;
result.errorMessage = "Server closed unexpectedly.";
recognitionCallback_(&result, recognitionUserData_);
}
continuousRecognitionStarted_ = false;
if (reconnectServer()) {
if (reconnectedCallback_) {
reconnectedCallback_();
}
}
});
return true;
}
bool CoreAiSpeechServiceProxy::reconnectServer() {
if (reconnectAttempts_ >= maxReconnectAttempts) {
LOGE("Max reconnect attempts reached");
return false;
}
reconnectAttempts_++;
LOGD("Reconnect attempt {}.", reconnectAttempts_);
if (!connectToServer()) {
LOGE("Failed to reconnect to server");
std::this_thread::sleep_for(std::chrono::seconds(1));
return reconnectServer();
}
LOGI("Successfully reconnected to server.");
return true;
}
int CoreAiSpeechServiceProxy::initRecognizer(const char *config,
int &sessionId) {
int errorCode = AISDK_NO_ERROR;
GError *error = nullptr;
bool success = ai_runtime_core_speech_service_call_init_recognizer_sync(
proxy_, config, &sessionId, &errorCode, nullptr, &error);
if (!success) {
if (error != nullptr) {
LOGE("Error calling init:", error->code, error->message);
g_error_free(error);
return AISDK_RUNTIME_ERROR;
}
}
if (sessionId != -1) {
if (!recognitionResultSignalConnected_) {
// 只能调用一次,否则回调函数会被调用两次,关闭连接不行
connectSpeechRecognitionResultSignals(sessionId);
}
recognitionResultSignalConnected_ = true;
reconnectAttempts_ = 0;
}
return errorCode;
}
int CoreAiSpeechServiceProxy::initSynthesizer(const char *config,
int &sessionId) {
int errorCode = AISDK_NO_ERROR;
GError *error = nullptr;
LOGD(sessionId);
bool success = ai_runtime_core_speech_service_call_init_synthesizer_sync(
proxy_, config, &sessionId, &errorCode, nullptr, &error);
LOGD(sessionId);
if (!success) {
if (error != nullptr) {
LOGE("Error calling init:", error->code, error->message);
g_error_free(error);
return AISDK_RUNTIME_ERROR;
}
}
if (sessionId != -1) {
if (!synthesisResultSignalConnected_) {
// 只能调用一次,否则回调函数会被调用两次,关闭连接不行
connectSpeechSynthesisResultSignals(sessionId);
}
synthesisResultSignalConnected_ = true;
reconnectAttempts_ = 0;
}
return errorCode;
}
void CoreAiSpeechServiceProxy::destroyRecognizer(int sessionId) {
ai_runtime_core_speech_service_call_destroy_recognizer_sync(
proxy_, sessionId, nullptr, nullptr);
reconnectAttempts_ = 0;
}
void CoreAiSpeechServiceProxy::destroySynthesizer(int sessionId) {
ai_runtime_core_speech_service_call_destroy_synthesizer_sync(
proxy_, sessionId, nullptr, nullptr);
reconnectAttempts_ = 0;
}
void CoreAiSpeechServiceProxy::setRecognizerAudioConfig(
int sessionId, const std::string &audioConfig) {
ai_runtime_core_speech_service_call_set_recognizer_audio_config_sync(
proxy_, sessionId, audioConfig.c_str(), nullptr, nullptr);
LOGD("setRecognizerAudioConfig:");
}
void CoreAiSpeechServiceProxy::startContinuousRecognition(int sessionId) {
ai_runtime_core_speech_service_call_start_continuous_recognition(
proxy_, sessionId, nullptr, nullptr, nullptr);
LOGD("startContinuousRecognition.");
continuousRecognitionStarted_ = true;
}
void CoreAiSpeechServiceProxy::setSynthesizerAudioConfig(
int sessionId, const std::string &audioConfig) {
bool ret =
ai_runtime_core_speech_service_call_set_synthesizer_audio_config_sync(
proxy_, sessionId, audioConfig.c_str(), nullptr, nullptr);
if (!ret) {
std::fprintf(
stderr,
"CoreSpeechServiceProxy: error to set synthesizer audio config\n");
}
}
void CoreAiSpeechServiceProxy::synthesizeTextOnce(int sessionId,
const char *text,
uint32_t textLength) {
ai_runtime_core_speech_service_call_synthesize_text_once(
proxy_, sessionId, text, textLength, nullptr,
onOnceSynthesisFinishedCallback, this);
}
void CoreAiSpeechServiceProxy::synthesizeTextContinuous(int sessionId,
const char *text,
uint32_t textLength) {
ai_runtime_core_speech_service_call_synthesize_text_continuous(
proxy_, sessionId, text, textLength, nullptr, nullptr, nullptr);
}
int CoreAiSpeechServiceProxy::stopSpeaking(int sessionId) {
int errorCode;
GError *error = nullptr;
bool ret = ai_runtime_core_speech_service_call_stop_speaking_sync(
proxy_, sessionId, &errorCode, nullptr, &error);
if (!ret) {
LOGE("Error to stop speaking.", error->message);
return AiSdkCommonErrorCode::AISDK_RUNTIME_ERROR;
}
return (int)errorCode;
}
void CoreAiSpeechServiceProxy::stopContinuousRecognition(int sessionId) {
ai_runtime_core_speech_service_call_stop_continuous_recognition(
proxy_, sessionId, nullptr, nullptr, nullptr);
LOGI("stopContinuousRecognition.");
continuousRecognitionStarted_ = false;
}
void CoreAiSpeechServiceProxy::continuousRecognitionWriteAudioData(
int sessionId, const uint8_t *data, uint32_t length) {
const char *ptr = reinterpret_cast(data);
std::string audioData(ptr, length);
std::string encode = util::base64Encode(audioData);
ai_runtime_core_speech_service_call_continuous_recognition_write_audio_data(
proxy_, sessionId, encode.c_str(), nullptr, nullptr, nullptr);
}
void CoreAiSpeechServiceProxy::recognizeOnce(int sessionId) {
ai_runtime_core_speech_service_call_recognize_once(
proxy_, sessionId, nullptr, onOnceRecognitionFinishedCallback, this);
LOGD("recognizeOnce.");
}
void CoreAiSpeechServiceProxy::connectSpeechRecognitionResultSignals(
int sessionId) {
auto *connection = CoreAiSpeechServer::getInstance().getConnection();
std::string interfaceName = interface + std::to_string(sessionId);
recognitionSubscriptionId_ = g_dbus_connection_signal_subscribe(
connection, nullptr, interfaceName.c_str(),
recognitionSpeechResultSignal, objectPath, nullptr,
G_DBUS_SIGNAL_FLAGS_NONE, onRecognitionResultCallback, this, nullptr);
}
void CoreAiSpeechServiceProxy::connectSpeechSynthesisResultSignals(
int sessionId) {
auto *connection = CoreAiSpeechServer::getInstance().getConnection();
std::string interfaceName = interface + std::to_string(sessionId);
synthesizerSubscriptionId_ = g_dbus_connection_signal_subscribe(
connection, nullptr, interfaceName.c_str(), synthesisResultSignal,
objectPath, nullptr, G_DBUS_SIGNAL_FLAGS_NONE, onSynthesisResult, this,
nullptr);
}
void CoreAiSpeechServiceProxy::onRecognitionResultCallback(
GDBusConnection *connection, const gchar *senderName,
const gchar *objectPath, const gchar *interfaceName,
const gchar *signalName, GVariant *parameters, gpointer userData) {
auto *proxy = static_cast(userData);
if (proxy->recognitionCallback_ != nullptr) {
_SpeechRecognitionResult result = dissectRecognitionResult(parameters);
proxy->recognitionCallback_(&result, proxy->recognitionUserData_);
} else {
LOGE("Recognizing callback is not set!");
}
}
void CoreAiSpeechServiceProxy::onSynthesisResult(
GDBusConnection *connection, const gchar *senderName,
const gchar *objectPath, const gchar *interfaceName,
const gchar *signalName, GVariant *parameters, gpointer userData) {
auto *proxy = static_cast(userData);
if (!proxy->synthesizerCallback_) {
std::fprintf(
stderr,
"CoreSpeechServiceProxy: Synthesizer callback is not set!\n");
return;
}
auto result = dissectSynthesisResult(parameters);
proxy->synthesizerCallback_(&result, proxy->synthesizerUserData_);
}
void CoreAiSpeechServiceProxy::onOnceRecognitionFinishedCallback(
GObject *sourceObject, GAsyncResult *res, gpointer userData) {
auto *proxy = static_cast(userData);
GError *error{nullptr};
bool result = ai_runtime_core_speech_service_call_recognize_once_finish(
(AiRuntimeCoreSpeechService *)sourceObject, res, &error);
if (!result) {
if (error == nullptr) {
g_printerr("Error calling once asr: Unknown error\n");
} else {
g_printerr("Error calling once asr: %s\n", error->message);
proxy->handleOnceRecognitionError(error->code);
g_error_free(error);
}
}
}
void CoreAiSpeechServiceProxy::handleOnceRecognitionError(int errorCode) {
LOGE("Error occurred with code:", errorCode);
switch (errorCode) {
case G_IO_ERROR_CLOSED:
handleOnceRecognitionServerClosed();
break;
case G_IO_ERROR_TIMED_OUT:
handleOnceRecognitionResultTimeout();
break;
default:
handleOnceRecognitionGeneralError();
break;
}
}
void CoreAiSpeechServiceProxy::handleOnceRecognitionServerClosed() {
LOGE("Server closed unexpectedly.");
if (recognitionCallback_) {
SpeechRecognitionResult result{};
result.reason = SPEECH_ERROR_OCCURRED;
result.text = "";
result.speakerId = -1;
result.errorCode = AISDK_RUNTIME_ERROR;
result.errorMessage = "Server closed unexpectedly.";
recognitionCallback_(&result, recognitionUserData_);
}
}
void CoreAiSpeechServiceProxy::handleOnceRecognitionResultTimeout() {
if (recognitionCallback_) {
SpeechRecognitionResult result{};
result.reason = SPEECH_ERROR_OCCURRED;
result.text = "";
result.speakerId = -1;
result.errorCode = AISDK_MODEL_RUN_TIME_OUT;
result.errorMessage = "Run model time out.";
recognitionCallback_(&result, recognitionUserData_);
}
}
void CoreAiSpeechServiceProxy ::handleOnceRecognitionGeneralError() {
if (recognitionCallback_) {
SpeechRecognitionResult result{};
result.reason = SPEECH_ERROR_OCCURRED;
result.text = "";
result.speakerId = -1;
result.errorCode = AISDK_RUNTIME_ERROR;
result.errorMessage = "General error occurred.";
recognitionCallback_(&result, recognitionUserData_);
}
}
void CoreAiSpeechServiceProxy::onOnceSynthesisFinishedCallback(
GObject *sourceObject, GAsyncResult *res, gpointer userData) {
auto *proxy = static_cast(userData);
GError *error{nullptr};
bool result = ai_runtime_core_speech_service_call_recognize_once_finish(
(AiRuntimeCoreSpeechService *)sourceObject, res, &error);
if (!result) {
if (error == nullptr) {
g_printerr("Error calling once tts: Unknown error\n");
} else {
g_printerr("Error calling once tts: %s\n", error->message);
proxy->handleOnceSynthesisErrorOccurred(error->code);
g_error_free(error);
}
}
}
void CoreAiSpeechServiceProxy::handleOnceSynthesisErrorOccurred(int errorCode) {
LOGE("Error occurred with code:", errorCode);
switch (errorCode) {
case G_IO_ERROR_CLOSED:
handleOnceSynthesisServerClosed();
break;
case G_IO_ERROR_TIMED_OUT:
handleOnceSynthesisResultTimeout();
break;
default:
handleOnceSynthesisGeneralError();
break;
}
}
void CoreAiSpeechServiceProxy::handleOnceSynthesisServerClosed() {
if (synthesizerCallback_) {
SpeechSynthesisResult result{};
result.reason = SPEECH_ERROR_OCCURRED;
result.audioChannel = 0;
result.audioFormat = "";
result.audioRate = 16000;
result.audioChannel = 1;
result.audioData = {};
result.errorCode = AISDK_RUNTIME_ERROR;
result.errorMessage = "Server closed unexpectedly.";
synthesizerCallback_(&result, synthesizerUserData_);
}
}
void CoreAiSpeechServiceProxy::handleOnceSynthesisResultTimeout() {
if (synthesizerCallback_) {
SpeechSynthesisResult result{};
result.reason = SPEECH_ERROR_OCCURRED;
result.audioChannel = 0;
result.audioFormat = "";
result.audioRate = 16000;
result.audioChannel = 1;
result.audioData = {};
result.errorCode = AISDK_MODEL_RUN_TIME_OUT;
result.errorMessage = "Run model time out.";
synthesizerCallback_(&result, synthesizerUserData_);
}
}
void CoreAiSpeechServiceProxy::handleOnceSynthesisGeneralError() {
if (synthesizerCallback_) {
SpeechSynthesisResult result{};
result.reason = SPEECH_ERROR_OCCURRED;
result.audioChannel = 0;
result.audioFormat = "";
result.audioRate = 16000;
result.audioChannel = 1;
result.audioData = {};
result.errorCode = AISDK_RUNTIME_ERROR;
result.errorMessage = "General error occurred.";
synthesizerCallback_(&result, synthesizerUserData_);
}
}
} // namespace speech
} // namespace core
} // namespace kyai
libkysdk-coreai-1.1.0.1/src/speech/_speechrecognitionsession.h 0000664 0001750 0001750 00000003623 15207167112 022262 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _SPEECHRECOGNIZER_
#define _SPEECHRECOGNIZER_
#include "_audioconfig.h"
#include "_speechmodelconfig.h"
#include "coreaispeechserviceproxy.h"
#include "result.h"
namespace kyai {
namespace core {
namespace speech {
class _SpeechRecognitionSession {
public:
_SpeechRecognitionSession();
~_SpeechRecognitionSession();
int init();
void destroyRecognizer();
void setRecognizerAudioConfig(_AudioConfig *config);
[[nodiscard]] _AudioConfig *getRecognizerAudioConfig() const;
void setRecognitionResultCallback(SpeechRecognitionResultCallback callback,
void *userData);
void startContinuousRecognition();
void stopContinuousRecognition();
void continuousRecognitionWriteAudioData(const uint8_t *data,
uint32_t length);
void recognizeOnce();
void setRecognitionModelConfig(const SpeechModelConfig &config);
private:
bool initServer();
int initSession();
private:
_AudioConfig *audioConfig_{nullptr};
CoreAiSpeechServiceProxy serviceProxy_;
int sessionId_{-1};
SpeechModelConfig modelConfig_{};
};
} // namespace speech
} // namespace core
} // namespace kyai
#endif
libkysdk-coreai-1.1.0.1/src/speech/_speechmodelconfig.h 0000664 0001750 0001750 00000001770 15207167112 020625 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _COREAISPEECHMODELCONFIG_H
#define _COREAISPEECHMODELCONFIG_H
#include
#include "config.h"
struct _SpeechModelConfig {
std::string name{};
ModelDeployType type{ModelDeployType(-1)};
};
std::string modelConfigToJson(const _SpeechModelConfig &modelconfig);
#endif //_COREAISPEECHMODELCONFIG_H
libkysdk-coreai-1.1.0.1/src/speech/_speechrecognitionresult.h 0000664 0001750 0001750 00000002047 15207167112 022114 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#ifndef _SPEECHRCOGNITIONRESULT_H_
#define _SPEECHRCOGNITIONRESULT_H_
#include
#include "result.h"
struct _SpeechRecognitionResult {
SpeechResultReason reason;
std::string text;
int speakerId;
int errorCode;
std::string errorMessage;
};
const char *speech_recognition_result_get_reason_str(
SpeechRecognitionResult *result);
#endif libkysdk-coreai-1.1.0.1/src/speech/recognizer.cpp 0000664 0001750 0001750 00000011224 15207167112 017505 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "recognizer.h"
#include "_audiodatastream.h"
#include "_speechrecognitionsession.h"
SpeechRecognitionSession *speech_recognizer_create_session() {
auto *session = new kyai::core::speech::_SpeechRecognitionSession();
return (SpeechRecognitionSession *)session;
}
void speech_recognizer_destroy_session(SpeechRecognitionSession **session) {
if (session == nullptr) {
return;
}
if (*session == nullptr) {
return;
}
((kyai::core::speech::_SpeechRecognitionSession *)*session)
->destroyRecognizer();
delete (kyai::core::speech::_SpeechRecognitionSession *)*session;
*session = nullptr;
}
int speech_recognizer_init_session(SpeechRecognitionSession *session) {
if (session == nullptr) {
return AISDK_INVALID_SESSION;
}
return ((kyai::core::speech::_SpeechRecognitionSession *)session)->init();
}
void speech_recognizer_result_set_callback(
SpeechRecognitionSession *session, SpeechRecognitionResultCallback callback,
void *user_data) {
if (session == nullptr) {
return;
}
((kyai::core::speech::_SpeechRecognitionSession *)session)
->setRecognitionResultCallback(callback, user_data);
}
void speech_recognizer_set_model_config(SpeechRecognitionSession *session,
SpeechModelConfig *config) {
if (session == nullptr or config == nullptr) {
return;
}
((kyai::core::speech::_SpeechRecognitionSession *)session)
->setRecognitionModelConfig(*config);
}
static int speech_recognizer_continuous_recognition_write_audio_data(
const uint8_t *audio_data, uint32_t audio_length, void *user_data) {
if (user_data == nullptr) {
return AiSdkCommonErrorCode::AISDK_RUNTIME_ERROR;
}
// 此函数为private
((kyai::core::speech::_SpeechRecognitionSession *)user_data)
->continuousRecognitionWriteAudioData(audio_data, audio_length);
return AiSdkCommonErrorCode::AISDK_NO_ERROR;
}
void speech_recognizer_set_audio_config(SpeechRecognitionSession *session,
AudioConfig *audio_config) {
if (session == nullptr) {
return;
}
if (audio_config == nullptr) {
return;
}
((kyai::core::speech::_SpeechRecognitionSession *)session)
->setRecognizerAudioConfig(audio_config);
if (audio_config->inputAudioInfo.inputSource !=
InputAudioInfo::InputSource::STREAM) {
return;
}
if (audio_config->inputAudioInfo.audioDataStream == nullptr) {
return;
}
audio_config->inputAudioInfo.audioDataStream->p_user_data = session;
audio_config->inputAudioInfo.audioDataStream
->audio_data_to_stream_write_func =
speech_recognizer_continuous_recognition_write_audio_data;
}
void speech_recognizer_start_continuous_recognition_async(
SpeechRecognitionSession *session) {
if (session == nullptr) {
return;
}
((kyai::core::speech::_SpeechRecognitionSession *)session)
->startContinuousRecognition();
}
// 停止连续语音识别
void speech_recognizer_stop_continuous_recognition_async(
SpeechRecognitionSession *session) {
if (session == nullptr) {
return;
}
((kyai::core::speech::_SpeechRecognitionSession *)session)
->stopContinuousRecognition();
}
// 非流式语音识别
// 支持文件和内存数据
void speech_recognizer_recognize_once_async(SpeechRecognitionSession *session) {
if (session == nullptr) {
return;
}
AudioConfig *audioConfig =
((kyai::core::speech::_SpeechRecognitionSession *)session)
->getRecognizerAudioConfig();
if (audioConfig == nullptr) {
return;
}
if (audioConfig->inputAudioInfo.inputSource ==
InputAudioInfo::InputSource::DEFAULT_MIC) {
return;
}
if (audioConfig->inputAudioInfo.inputSource ==
InputAudioInfo::InputSource::STREAM) {
return;
}
((kyai::core::speech::_SpeechRecognitionSession *)session)->recognizeOnce();
}
libkysdk-coreai-1.1.0.1/src/speech/_speechrecognitionresult.cpp 0000664 0001750 0001750 00000004214 15207167112 022445 0 ustar zp zp /*
* Copyright 2024 KylinSoft Co., Ltd.
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see .
*/
#include "_speechrecognitionresult.h"
#include