Patrick Patrick - 6 days ago 5
C Question

Read multiple files using multiple threads in c

#include <dirent.h>
#include <pthread.h>
#include <string.h>
#include <stdio.h>
#include <semaphore.h>


int file_index = 0; // index for array[500];


struct webData {
char web_names [255];
};


void *thread(void *wData_element)
{
struct webData *temp = wData_element;

FILE *fp;
char line[255]=""; // hold each line;
fp = fopen(temp->web_names, "r");

if(fp == NULL)
{
perror("Error: File open failure.");
}
else
{
fgets(line,255, fp);
printf("%s\n", line);
}
fclose(fp);
return NULL;
}

int main(int argc, char const* argv[])
{


DIR * dir_pointer; // define a dir pointer;
struct dirent * entry; // entry under dir;
//char *dir = "./data/";
dir_pointer = opendir("./data/"); // assign dir location into dir pointer


// declare the struct wData array for each file.
struct webData wData[500];
// declare the threads array.
pthread_t tid_array[500];


while( (entry = readdir(dir_pointer)) != NULL)
{

if(entry->d_type == DT_REG) // avoid the . and .. dir;
{

char full_path[255];
full_path[0] = '\0'; // initilize the string;

strcat(full_path, "./data/"); // concatenate file directory;
strcat(full_path, entry->d_name); // concatenate filename;
strcpy(wData[file_index].web_names, full_path); // store file name into web_names array;


pthread_create(&tid_array[file_index], NULL, thread, &wData[file_index]);



file_index++; // increase the file index for next file.


}


}


for(int i=0; i<500; i++)
{
pthread_join(tid_array[i], NULL);
}


return 0;
}





For this program:

There are 500 files in the data folder.

For each file, I create a thread to do some action on the file.

After I iterate all 500 files. I join all the threads.

My question is:

How can I create 10 threads, and each thread does some action on exact 50 files?

How can I make sure each thread only handle 50 files since they are running concurrently?

For example:

thread 1 handles files number from 1-50

thread 2 handles files number from 51-100

.

.

.

Thanks a lot for any related source or example.

Answer

First you declare a parameter-struct for the threads

typedef struct thread_param_s {
     // each thread will get an array of webData-files
     struct webData* data;
     // number of elements
     int n;    
} thread_param_t;

You create this param-struct for each thread, fill it accordingly and pass it in pthread_create instead of the wData*

Now you adjust your current code

#include <dirent.h>
#include <pthread.h>
#include <string.h>
#include <stdio.h>
#include <semaphore.h>


int file_index = 0;         // index for array[500];


struct webData {
    char web_names [255];           
};


void *thread(void *param)
{
    thread_param_t* thread_param = (thread_param_t*)param;
    int i;
    // iterate through all files
    for (i = 0; i < thread_param->n; i++) {
        struct webData *temp = thread_param->data + i;

        FILE *fp; 
        char line[255]="";      // hold each line;
        fp = fopen(temp->web_names, "r");

        if(fp == NULL)
        {
            perror("Error: File open failure.");
        }
        else
        {
            fgets(line,255, fp);
            printf("%s\n", line);
        }
    }
    return NULL;
}

int main(int argc, char const* argv[]) 
{


    DIR * dir_pointer;          // define a dir pointer;
    struct dirent * entry;      // entry under dir;
    //char *dir = "./data/";
    dir_pointer = opendir("./data/"); // assign dir location into dir pointer


    // declare the struct wData array for each file. 
    struct webData wData[500];
    // declare the threads array.



    while( (entry = readdir(dir_pointer)) != NULL)
    {

        if(entry->d_type == DT_REG) // avoid the . and .. dir;
        {

            char full_path[255];     
            full_path[0] = '\0';    // initilize the string;

            strcat(full_path, "./data/");  // concatenate file directory;
            strcat(full_path, entry->d_name);    // concatenate filename;
            strcpy(wData[file_index].web_names, full_path); // store file name into web_names array;

            file_index++;   // increase the file index for next file.
            // just fill wData here

        }


    }

    pthread_t tid_array[10];
    thread_param_t thread_param[10];
    int thread_counter = 0;

    // number of files for each thread
    int step = file_index / 10;
    int i;

    // create all threads
    for(i = 0; i < 9; i++)      
    {
       thread_param[i].n = step;
       thread_param[i].data = wData + step * i;

       pthread_create(&tid_array[i], NULL, thread, thread_param + i);
    }
    // the last thread may get more data, because of integer rounding
    thread_param[i].n = file_index - step * i;
    thread_param[i].data = wData + step * i;

    pthread_create(&tid_array[i], NULL, thread, thread_param + i);



    for(int i=0; i<10; i++)      
    {
        pthread_join(tid_array[i], NULL);
    }


    return 0;
}
Comments