R2.2 libtensorflowlite_c.so crash for multithread

C35033A 01-04 05:29:57.710 498 12173 F libc : /buildbot/src/android/ndk-release-r18/external/libcxx/…/…/external/libcxxabi/src/abort_message.cpp:73: abort_message: assertion “cannot create thread specific key for __cxa_get_globals()” failed
C350342 01-04 05:29:57.712 498 12173 F libc : Fatal signal 6 (SIGABRT), code -1 (SI_QUEUE) in tid 12173 (Blur), pid 498 (provider@2.4-se)
C350630 01-04 05:29:57.860 12179 12179 F DEBUG : *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
C350631 01-04 05:29:57.860 12179 12179 F DEBUG : Native Crash TIME: 16201049
C350632 01-04 05:29:57.860 12179 12179 F DEBUG : *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
C350633 01-04 05:29:57.860 12179 12179 F DEBUG : Build fingerprint: ‘realme/RMP2105/RE87CCL1:11/RP1A.201005.001/1640606145:userdebug/test-keys’
C350634 01-04 05:29:57.861 12179 12179 F DEBUG : Revision: ‘0’
C350635 01-04 05:29:57.861 12179 12179 F DEBUG : ABI: ‘arm’
C350636 01-04 05:29:57.861 12179 12179 F DEBUG : Timestamp: 2022-01-04 05:29:57+0800
C350637 01-04 05:29:57.861 12179 12179 F DEBUG : pid: 498, tid: 12173, name: Blur >>> /vendor/bin/hw/android.hardware.camera.provider@2.4-service <<<
C350638 01-04 05:29:57.861 12179 12179 F DEBUG : uid: 1047
C350639 01-04 05:29:57.861 12179 12179 F DEBUG : signal 6 (SIGABRT), code -1 (SI_QUEUE), fault addr --------
C35063A 01-04 05:29:57.861 12179 12179 F DEBUG : Abort message: ‘/buildbot/src/android/ndk-release-r18/external/libcxx/…/…/external/libcxxabi/src/abort_message.cpp:73: abort_message: assertion “cannot create thread specific key for __cxa_get_globals()” failed’
C35063B 01-04 05:29:57.861 12179 12179 F DEBUG : r0 00000000 r1 00002f8d r2 00000006 r3 cc06d010
C35063C 01-04 05:29:57.861 12179 12179 F DEBUG : r4 cc06d024 r5 cc06d008 r6 000001f2 r7 0000016b
C35063D 01-04 05:29:57.861 12179 12179 F DEBUG : r8 cc06d010 r9 cc06d020 r10 cc06d040 r11 cc06d030
C35063E 01-04 05:29:57.861 12179 12179 F DEBUG : ip 00002f8d sp cc06cfe0 lr ee9087cd pc ee9087e0
C35064A 01-04 05:29:57.867 12179 12179 F DEBUG : backtrace:
C35064B 01-04 05:29:57.867 12179 12179 F DEBUG : #00 pc 000387e0 /apex/com.android.runtime/lib/bionic/libc.so (abort+172) (BuildId: 724f04e3eb055a58d7517ffbc7210561)
C35064C 01-04 05:29:57.867 12179 12179 F DEBUG : #01 pc 00038a87 /apex/com.android.runtime/lib/bionic/libc.so (__assert2+22) (BuildId: 724f04e3eb055a58d7517ffbc7210561)
C35064D 01-04 05:29:57.867 12179 12179 F DEBUG : #02 pc 001fde95 /vendor/lib/libtensorflowlite_c.so
C35064E 01-04 05:29:57.867 12179 12179 F DEBUG : #03 pc 001fc471 /vendor/lib/libtensorflowlite_c.so
C35064F 01-04 05:29:57.867 12179 12179 F DEBUG : #04 pc 00081cb5 /apex/com.android.runtime/lib/bionic/libc.so (pthread_once+76) (BuildId: 724f04e3eb055a58d7517ffbc7210561)
C350650 01-04 05:29:57.867 12179 12179 F DEBUG : #05 pc 001fc433 /vendor/lib/libtensorflowlite_c.so
C350651 01-04 05:29:57.867 12179 12179 F DEBUG : #06 pc 001fc3e1 /vendor/lib/libtensorflowlite_c.so
C350652 01-04 05:29:57.867 12179 12179 F DEBUG : #07 pc 001fc15b /vendor/lib/libtensorflowlite_c.so
C350653 01-04 05:29:57.867 12179 12179 F DEBUG : #08 pc 001fbf05 /vendor/lib/libtensorflowlite_c.so
C350654 01-04 05:29:57.867 12179 12179 F DEBUG : #09 pc 001fbfe5 /vendor/lib/libtensorflowlite_c.so
C350655 01-04 05:29:57.867 12179 12179 F DEBUG : #10 pc 001fbfa3 /vendor/lib/libtensorflowlite_c.so
C350656 01-04 05:29:57.867 12179 12179 F DEBUG : #11 pc 000fd855 /vendor/lib/libtensorflowlite_c.so
C350657 01-04 05:29:57.867 12179 12179 F DEBUG : #12 pc 00080973 /apex/com.android.runtime/lib/bionic/libc.so (__pthread_start(void*)+40) (BuildId: 724f04e3eb055a58d7517ffbc7210561)
C350658 01-04 05:29:57.867 12179 12179 F DEBUG : #13 pc 00039ce3 /apex/com.android.runtime/lib/bionic/libc.so (__start_thread+30) (BuildId: 724f04e3eb055a58d7517ffbc7210561)

Are you using multiple interpreter instances?

WARNING: This class is not thread-safe. The client is responsible for ensuring serialized interaction to avoid data races and undefined behavior.

#include “tensorflow/lite/c/c_api.h”
#include “tensorflow/lite/c/c_api_experimental.h”
#include “tensorflow/lite/delegates/gpu/delegate.h”
#include “log.h”

class TFLITEObject{
public:
TFLITEObject(){
model = nullptr;
options = nullptr;
interpreter = nullptr;
gpudelegate = nullptr;
}
virtual ~TFLITEObject() {
if(gpudelegate)
TfLiteGpuDelegateV2Delete(gpudelegate);
if(interpreter)
TfLiteInterpreterDelete(interpreter);
if(options)
TfLiteInterpreterOptionsDelete(options);
if(model)
TfLiteModelDelete(model);
model = nullptr;
options = nullptr;
interpreter = nullptr;
gpudelegate = nullptr;
}
public:
void init_network(UNNModel *info) {

    if(info->ld.isFromBuffer)
    {
        LOGD("tflite buffer : 0x%x, size : %d\n", info->ld.model_buffer, info->ld.model_size);
        model = TfLiteModelCreate((const char*)info->ld.model_buffer, info->ld.model_size);
    }
    else
    {
        LOGD("tflite file : %s\n", info->ld.model_name);
        model = TfLiteModelCreateFromFile(info->ld.model_name);
    }

    options = TfLiteInterpreterOptionsCreate();

    if( info->param.number_of_threads > 0)
        TfLiteInterpreterOptionsSetNumThreads(options, info->param.number_of_threads);

    // Create the interpreter.
    interpreter = TfLiteInterpreterCreate(model, options);

    // Allocate tensors and populate the input tensor data.
    TfLiteInterpreterAllocateTensors(interpreter);

}

void deinit_network( ) {

    LOGD("deinit_network \n");

    if(gpudelegate)
        TfLiteGpuDelegateV2Delete(gpudelegate);
    if(interpreter)
        TfLiteInterpreterDelete(interpreter);
    if(options)
        TfLiteInterpreterOptionsDelete(options);
    if(model)
        TfLiteModelDelete(model);

    model = nullptr;
    options = nullptr;
    interpreter = nullptr;
    gpudelegate = nullptr;
}

int run_network( UNNModelInOutBuf *inOut) {
    // SetInput
    LOGD("Set Network Inputs.\n");
    for (int id = 0; id < inOut->inputs_count; id++) {
        TfLiteTensor* input_tensor = TfLiteInterpreterGetInputTensor(interpreter, id);
        TfLiteTensorCopyFromBuffer(input_tensor, (void *)inOut->inputs[id].data, inOut->inputs[id].size);
    }

    // Execute
    TfLiteInterpreterInvoke(interpreter);

    // GetOutput
    LOGD("Get Network Outputs.\n");
    for (int id = 0; id < inOut->outputs_count; id++) {
        const TfLiteTensor* output_tensor = TfLiteInterpreterGetOutputTensor(interpreter, id);
        if(output_tensor->quantization.type == kTfLiteAffineQuantization)
        {
            TfLiteQuantizationParams param = TfLiteTensorQuantizationParams(output_tensor);
            inOut->outputs[id].isQuant     = 1;
            inOut->outputs[id].quant_zero  = param.zero_point;
            inOut->outputs[id].quant_scale = param.scale;
        }
        TfLiteTensorCopyToBuffer(output_tensor, (void *)inOut->outputs[id].data, inOut->outputs[id].size);
    }
    return 0;
}

public:
TfLiteModel *model;
TfLiteInterpreterOptions *options;
TfLiteInterpreter *interpreter;
TfLiteDelegate *gpudelegate;
};

int TFLITEEngineInit(void **handle){
int ire = -1;
TFLITEObject *magic = new TFLITEObject();
if(magic != NULL){
ire = 0;
*handle = magic;
LOGD(“TFLITEEngineInit OK”);
}
else{
*handle = NULL;
LOGE(“TFLITEEngineInit Failed!”);
}

return ire;

}

int TFLITEEngineDeInit(void handle){
int ire = -1;
if(handle != NULL){
TFLITEObject
pTFLITE = static_cast<TFLITEObject *>(handle);
delete pTFLITE;
pTFLITE = NULL;
ire = 0;
}
return ire;
}

int TFLITEEngineCreateNetwork(void *handle, UNNModel info)
{
int ire = -1;
if (handle != NULL)
{
TFLITEObject
pTFLITE = static_cast<TFLITEObject *>(handle);
pTFLITE->init_network(info);
ire = 0;
}
return ire;
}

int TFLITEEngineDestroyNetwork(void handle)
{
int ire = -1;
if(handle != NULL){
TFLITEObject
pTFLITE = static_cast<TFLITEObject *>(handle);
pTFLITE->deinit_network();
ire = 0;
}
return ire;
}

static bool TFLITEEngineOK(void handle){
bool b = false;
if(handle != NULL){
TFLITEObject
pTFLITE = static_cast<TFLITEObject *>(handle);
if (pTFLITE->model && pTFLITE->interpreter){
b = true;
}
}
return b;
}

int TFLITEEngineRunSession(void *handle, UNNModelInOutBuf inOut){
int ire = -99;
if(TFLITEEngineOK(handle)){
TFLITEObject
pTFLITE = static_cast<TFLITEObject *>(handle);
LOGD(“Start Invoke \n”);
if (ire = (int)pTFLITE->run_network( inOut ) != 0) {
LOGE(“Failed to invoke TFLITE Runtime!\n”);
}
ire = 0;
LOGD(“Invoke is OK \n”);
}
return ire;
}

one interpreter instance , code as above,and set threadnum=4

If you set the threads to 0 is it working?

Can you try also with the last TFlite version?

1 Like

the crash is low probability, and i set the threads to 0 and it is working.
have anyone reported the same problem?and the last TFlite version fix multithread bug?

I don’t know, you could try.

i try 2.6 and still crash low probability

As we are not doing any backport other the security fixes I suggest you to always test the last released version (2.7 or 2.8rc) to check if something Is fixed.

You can also debug build TFlite to inspect the break:

https://groups.google.com/a/tensorflow.org/g/tflite/c/c4xNf_TrDrY

ok,i will test the 2.8rc.

but for debug :
I can bulid a “dbg” so with symbols and it can not crash probably for very slow by:
bazel build -c dbg --strip=never --cxxopt=–std=c++11 --config=android_arm //tensorflow/lite/c:tensorflowlite_c

but when I change “-c dbg” to " -c opt --copt=’-g’ " , libtensorflowlite_c.so without symbols.
(bazel build -c opt --copt=‘-g’ --strip=never --cxxopt=–std=c++11 --config=android_arm //tensorflow/lite/c:tensorflowlite_c)

How can I get an opt so with symbols and debug further?

i test the 2.8rc version and still crash low probability

@mihaimaruseac Do you know someone working on TFlite build scripts?

Not directly, but tagging a few more people @ycling @Karim_Nosseir @MeghnaNatraj

1 Like

By default we omit the symbols for non-debug builds. If you want to keep it, remove this line

3 Likes

Thank you very much. It works and can compile an opt so with symbols.

analyze the 2.8rc opt symbol so ,result is:

#02 pc 002fa09b /vendor/lib/libtensorflowlite_c.so
/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/…/…/external/libcxxabi/src/abort_message.cpp:72
#03 pc 002f8619 /vendor/lib/libtensorflowlite_c.so
/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/…/…/external/libcxxabi/src/cxa_exception_storage.cpp:70
#05 pc 002f85db /vendor/lib/libtensorflowlite_c.so
/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/…/…/external/libcxxabi/…/libcxx/include/__threading_support:312
#06 pc 002f8589 /vendor/lib/libtensorflowlite_c.so
/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/…/…/external/libcxxabi/src/cxa_exception_storage.cpp:77
#07 pc 002f8317 /vendor/lib/libtensorflowlite_c.so
/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/…/…/external/libcxxabi/src/cxa_exception.cpp:259
#08 pc 002f7e11 /vendor/lib/libtensorflowlite_c.so
/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/src/system_error.cpp:287
#09 pc 002f7f01 /vendor/lib/libtensorflowlite_c.so
/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/include/thread:178
#10 pc 002f7ec1 /vendor/lib/libtensorflowlite_c.so
/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/src/thread.cpp:131
#11 pc 002c68ed /vendor/lib/libtensorflowlite_c.so
/proc/self/cwd/external/androidndk/ndk/sources/cxx-stl/llvm-libc++/include/thread:281

2.8rc crash:

C18B475 12-15 00:30:48.166 6320 16287 F libc : /usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/…/…/external/libcxxabi/src/abort_message.cpp:72: abort_message: assertion “cannot create thread specific key for __cxa_get_globals()” failed
C18B477 12-15 00:30:48.166 6320 16287 F libc : Fatal signal 6 (SIGABRT), code -1 (SI_QUEUE) in tid 16287 (Blur), pid 6320 (provider@2.4-se)
C18B5FD 12-15 00:30:48.300 16292 16292 F DEBUG : *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
C18B5FE 12-15 00:30:48.301 16292 16292 F DEBUG : Native Crash TIME: 471457
C18B5FF 12-15 00:30:48.301 16292 16292 F DEBUG : *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
C18B603 12-15 00:30:48.301 16292 16292 F DEBUG : Build fingerprint: ‘UNISOC/ums9230_4h10_Natv/ums9230_4h10:11/RP1A.201005.001/50324:userdebug/test-keys’
C18B604 12-15 00:30:48.301 16292 16292 F DEBUG : Revision: ‘0’
C18B605 12-15 00:30:48.301 16292 16292 F DEBUG : ABI: ‘arm’
C18B60C 12-15 00:30:48.302 16292 16292 F DEBUG : Timestamp: 2021-12-15 00:30:48+0800
C18B60F 12-15 00:30:48.302 16292 16292 F DEBUG : pid: 6320, tid: 16287, name: Blur >>> /vendor/bin/hw/android.hardware.camera.provider@2.4-service <<<
C18B613 12-15 00:30:48.302 16292 16292 F DEBUG : uid: 1047
C18B615 12-15 00:30:48.302 16292 16292 F DEBUG : signal 6 (SIGABRT), code -1 (SI_QUEUE), fault addr --------
C18B616 12-15 00:30:48.302 16292 16292 F DEBUG : Abort message: ‘/usr/local/google/buildbot/src/android/ndk-release-r21/external/libcxx/…/…/external/libcxxabi/src/abort_message.cpp:72: abort_message: assertion “cannot create thread specific key for __cxa_get_globals()” failed’
C18B618 12-15 00:30:48.302 16292 16292 F DEBUG : r0 00000000 r1 00003f9f r2 00000006 r3 d4cfd010
C18B619 12-15 00:30:48.302 16292 16292 F DEBUG : r4 d4cfd024 r5 d4cfd008 r6 000018b0 r7 0000016b
C18B61A 12-15 00:30:48.302 16292 16292 F DEBUG : r8 d4cfd010 r9 d4cfd020 r10 d4cfd040 r11 d4cfd030
C18B61C 12-15 00:30:48.302 16292 16292 F DEBUG : ip 00003f9f sp d4cfcfe0 lr f48c37cd pc f48c37e0
C18B652 12-15 00:30:48.314 16292 16292 F DEBUG : backtrace:
C18B653 12-15 00:30:48.315 16292 16292 F DEBUG : #00 pc 000387e0 /apex/com.android.runtime/lib/bionic/libc.so (abort+172) (BuildId: 724f04e3eb055a58d7517ffbc7210561)
C18B654 12-15 00:30:48.315 16292 16292 F DEBUG : #01 pc 00038a87 /apex/com.android.runtime/lib/bionic/libc.so (__assert2+22) (BuildId: 724f04e3eb055a58d7517ffbc7210561)
C18B655 12-15 00:30:48.315 16292 16292 F DEBUG : #02 pc 002fa09b /vendor/lib/libtensorflowlite_c.so
C18B656 12-15 00:30:48.315 16292 16292 F DEBUG : #03 pc 002f8619 /vendor/lib/libtensorflowlite_c.so
C18B657 12-15 00:30:48.315 16292 16292 F DEBUG : #04 pc 00081cb5 /apex/com.android.runtime/lib/bionic/libc.so (pthread_once+76) (BuildId: 724f04e3eb055a58d7517ffbc7210561)
C18B658 12-15 00:30:48.316 16292 16292 F DEBUG : #05 pc 002f85db /vendor/lib/libtensorflowlite_c.so
C18B659 12-15 00:30:48.316 16292 16292 F DEBUG : #06 pc 002f8589 /vendor/lib/libtensorflowlite_c.so
C18B65A 12-15 00:30:48.316 16292 16292 F DEBUG : #07 pc 002f8317 /vendor/lib/libtensorflowlite_c.so
C18B65B 12-15 00:30:48.316 16292 16292 F DEBUG : #08 pc 002f7e11 /vendor/lib/libtensorflowlite_c.so
C18B65C 12-15 00:30:48.316 16292 16292 F DEBUG : #09 pc 002f7f01 /vendor/lib/libtensorflowlite_c.so
C18B65D 12-15 00:30:48.316 16292 16292 F DEBUG : #10 pc 002f7ec1 /vendor/lib/libtensorflowlite_c.so
C18B65E 12-15 00:30:48.316 16292 16292 F DEBUG : #11 pc 002c68ed /vendor/lib/libtensorflowlite_c.so
C18B65F 12-15 00:30:48.316 16292 16292 F DEBUG : #12 pc 00080973 /apex/com.android.runtime/lib/bionic/libc.so (__pthread_start(void*)+40) (BuildId: 724f04e3eb055a58d7517ffbc7210561)
C18B660 12-15 00:30:48.316 16292 16292 F DEBUG : #13 pc 00039ce3 /apex/com.android.runtime/lib/bionic/libc.so (__start_thread+30) (BuildId: 724f04e3eb055a58d7517ffbc7210561)

above the analysis results are all system files about thread and have no other infomation, there is no way of further debug the crash.

Could you please add relevant others to help analyze and solve this crash?

anyone else have the same problem ?