How to use memory_buffer_alloc_init() in multi-thread?

I have a simple code using mbedtls to perform RSA decryption operations on an encrypted message. I’m trying to use multiple-thread to do the same operation concurrently. I’m interested in using stack memory instead of heap. mbedtls provides memory_buffer_alloc.h for stack-based memory allocator. Documentation of memory_buffer_alloc_init() says,

Initialize use of stack-based memory allocator.
The stack-based allocator does memory management inside the
presented buffer and does not call malloc() and free().
It sets the global polarssl_malloc() and polarssl_free() pointers
to its own functions.
(Provided polarssl_malloc() and polarssl_free() are thread-safe if
POLARSSL_THREADING_C is defined
)

Therefore, I have added the following configuration into my config.h file,

#define POLARSSL_THREADING_PTHREAD
#define POLARSSL_THREADING_C
#define POLARSSL_MEMORY_C
#define POLARSSL_MEMORY_BUFFER_ALLOC_C
#define POLARSSL_PLATFORM_MEMORY
#define POLARSSL_PLATFORM_C

My code works for a single thread. However, when I increase the number of threads, my code shows error. Following is my source code,

#include "rsa/config.h"
#include "rsa/aes.h"
#include "rsa/bignum.h"
#include "rsa/rsa.h"
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/stat.h> 
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <pthread.h>
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "rsa/key.h"

#include "rsa/memory_buffer_alloc.h"
#include "rsa/memory.h"
#include "rsa/platform.h"
#include "rsa/threading.h"


#define NUM_OF_THREAD 2  

//threading_mutex_t lock;


void decryption(){

    // initialize stack memory
    unsigned char alloc_buf[10000];
    memory_buffer_alloc_init( &alloc_buf, sizeof(alloc_buf) );

    unsigned char private_encrypt[KEY_BUFFER_SIZE];
    int total_dec=5;
    unsigned char * buffer = 0;
    long length;
    unsigned char msg_decrypted[KEY_LEN];


    // reading encrypted msg
    FILE * fp2 = fopen ("msg.enc", "rb");
    int size1=KEY_BUFFER_SIZE;
    if(fp2){
        while(size1>0){
            fread(private_encrypt,1,sizeof (private_encrypt),fp2);
            size1=size1-1;
        }
    }
    fclose(fp2);

    // reading rsa-private key
    FILE * fp = fopen ("rsa_priv.txt", "rb");
    if (fp){
        fseek (fp, 0, SEEK_END);
        length = ftell (fp);
        fseek (fp, 0, SEEK_SET);
        buffer = calloc (1,length+1);
        if (buffer){
            fread (buffer, 1, length, fp);
        }
    fclose (fp);
    }

    // initialize rsaContext
    rsa_context rsaContext;
    rsa_init(&rsaContext,RSA_PKCS_V15, 0);
    rsaContext.len=KEY_LEN;

    // spliting keys and load into rsa context
    const char s[3] = "= ";
    char *token;
    int k=0, size;
    char *rest=buffer;

    // get the first token
    token = strtok_r(rest,s,&rest);

    // walk through other tokens
    while( token != NULL ) {
        size = strlen(token);
        switch (k) {
            case 1:
                token[size-1]='\0';
                mpi_read_string(&rsaContext.N, 16, token);
                break;

            case 3:
                token[size-1]='\0';
                mpi_read_string(&rsaContext.E, 16, token);
                break;

            case 5:
                token[size-1]='\0';
                mpi_read_string(&rsaContext.D, 16, token);
                break;

            case 7:
                token[size-1]='\0';
                mpi_read_string(&rsaContext.P, 16, token);
                break;

            case 9:
                token[size-1]='\0';
                mpi_read_string(&rsaContext.Q, 16, token);
                break;

            case 11:
                token[size-1]='\0';
                mpi_read_string(&rsaContext.DP, 16, token);
                break;

            case 13:
                token[size-1]='\0';
                mpi_read_string(&rsaContext.DQ, 16, token);
                break;

            case 15:
                token[size-1]='\0';
                mpi_read_string(&rsaContext.QP, 16, token);
                break;
        }
        k=k+1;
        token = strtok_r(rest, "= \n", &rest);
    }


    if( rsa_private(&rsaContext,private_encrypt, msg_decrypted) != 0 ) {
        printf( "Decryption failed! %d\n", rsa_private(&rsaContext,private_encrypt, msg_decrypted));
    }else{
        printf("Decrypted plaintext-----> %s\n",msg_decrypted );
    }

   // free memory 
   memory_buffer_alloc_free();

}


void thread_function(void * input){

    printf("Test thread\n");
    int total_loop=5;
    while(total_loop>0){
        //pthread_mutex_lock(&lock); <-- multi-thread works with lock
	    decryption();
        //pthread_mutex_unlock(&lock); 
	    total_loop--;			
    }
}


int main(){ 
    int i;
    
    // total number of thread
    pthread_t ths[NUM_OF_THREAD];

    for (i = 0; i < NUM_OF_THREAD; i++) {
        pthread_create(&ths[i], NULL, thread_function, NULL);
    }

    for (i = 0; i < NUM_OF_THREAD; i++) {
        void* res;
        pthread_join(ths[i], &res);
    }
    return 0;
}

If I use mutex, the above code works. I do not want to use a lock. It took a long time.
Can anyone please tell me what I’m doing wrong? How can I fix it?