Location>code7788 >text

Web Services Performance Optimization: Wrktcp and Perf Tools Explained

Popularity:905 ℃/2024-09-19 11:35:05
  • wrktcp installation
    Code Cloud Address:/icesky1stm/wrktcp
    Directly download, cd wrktcp-master && make, will generate wrktcp, it's ok, very simple!

  • wrktcp usage
    Pressure test first need a service, wrote an epoll + edge triggered service, the business is to determine whether the ip is at home or abroad, rq: 00000015CHECKIP1.0.4.0, rs: 000000010, write some of these on the humble to pay to see it, mainly to pressure test and analyze performance bottlenecks.

#include <>
#include <>
#include <>
#include <>
#include <sys/>
#include <sys/>
#include <>
#include <arpa/>
#include <sys/>
#include <sys/>
#include <>
#include <>
#include <iostream>
#include <sstream>
#include <thread>
#include <netinet/>
#include <>
#include <cstring>
#include <map>
#include <fstream>
#include <cstdio>
#include <cstdlib>
#include <>


std::map<unsigned long, unsigned long> g_ip_list; // stockpile IP realm

bool init_ip_list(const char* file_name, std::map<unsigned long, unsigned long> &ip_list)
{
    FILE *fp = nullptr;
    if ((fp = fopen(file_name, "r")) == nullptr)
    {
        return false;
    }

    int i = 0;
    int total_count = 0;
    char buf[64] = {0};

    while (fgets(buf, sizeof(buf), fp))
    {
        i++;
        if (buf[0] == '#')
            continue;

        char *pout = nullptr;
        char *pbuf = buf;
        char *pc[10];
        int j = 0;

        while ((pc[j] = strtok_r(pbuf, "|", &pout)) != nullptr)
        {
            j++;
            pbuf = nullptr;
            if (j > 7)
                break;
        }

        if (j != 7)
        {
            syslog(LOG_ERR, "%s:%d, unknown format the line is %d", __FILE__, __LINE__, i);
            continue;
        }

        if (strcmp(pc[2], "ipv4") == 0 && strcmp(pc[1], "CN") == 0)
        {
            unsigned long ip_begin = inet_addr(pc[3]);

            if (ip_begin == INADDR_NONE)
            {
                syslog(LOG_ERR, "%s:%d, ip is unknown, the line is %d, the ip is %s", __FILE__, __LINE__, i, pc[3]);
                continue;
            }
            int count = atoi(pc[4]);
            ip_begin = ntohl(ip_begin);
            unsigned long ip_end = ip_begin + count - 1;
            ip_list.insert(std::make_pair(ip_end, ip_begin));

            total_count++;
        }
    }

    syslog(LOG_INFO, "%s:%d, init_ip_list, total count is %d", __FILE__, __LINE__, total_count);

    fclose(fp);
    return true;
}

void extract_ip(char *buf, char *ip) {  
    // Assuming the protocol string format is always ""  
    // locateIPStarting location of the address  
    char *start = strstr(buf, "CHECKIP");  
    if (start == NULL) {  
        fprintf(stderr, "Invalid protocol string\n");  
        return;  
    }  
    // skip over"CHECKIP"  
    start += 7;  
    // make a copy ofIPAddress toipvariant,Attention to checking boundaries  
    strncpy(ip, start, 15); // IPlargest address15characters,including through'\0'  
    ip[15] = '\0'; // Make sure the string starts with'\0'wind up  
} 

// server
int main(int argc, const char* argv[])
{
	const char* file_name = "ip_list.txt";
    if (!init_ip_list(file_name, g_ip_list)) {
        std::cerr << "Failed to initialize IP list." << std::endl;
        return 1;
    }
	
    // Creating a listening socket
    int lfd = socket(AF_INET, SOCK_STREAM, 0);
    if(lfd == -1)
    {
        perror("socket error");
        exit(1);
    }

    // bind
    struct sockaddr_in serv_addr;
    memset(&serv_addr, 0, sizeof(serv_addr));
    serv_addr.sin_family = AF_INET;
    serv_addr.sin_port = htons(9999);
    serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);  // localIP
    // 127.0.0.1
    // inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr.s_addr);
    
    // Setting up port multiplexing
    int opt = 1;
    setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));

    // bind端口
    int ret = bind(lfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr));
    if(ret == -1)
    {
        perror("bind error");
        exit(1);
    }

    // monitor
    ret = listen(lfd, 64);
    if(ret == -1)
    {
        perror("listen error");
        exit(1);
    }

    // 现在只有monitor的文件描述符
    // All file descriptors corresponding to read and write buffer status are delegated to the kernel for detectionepoll
    // Create aepollmould
    int epfd = epoll_create(100);
    if(epfd == -1)
    {
        perror("epoll_create");
        exit(0);
    }

    // go (in a direction)epollAdd the nodes to be detected in the instance, 现在只有monitor的文件描述符
    struct epoll_event ev;
     = EPOLLIN;    // sensinglfdRead if there is data in the read buffer
     = lfd;
    ret = epoll_ctl(epfd, EPOLL_CTL_ADD, lfd, &ev);
    if(ret == -1)
    {
        perror("epoll_ctl");
        exit(0);
    }


    struct epoll_event evs[1024];
    int size = sizeof(evs) / sizeof(struct epoll_event);
    // 持续sensing
    while(1)
    {
        // invoke once, sensing一次
        int num = epoll_wait(epfd, evs, size, -1);
        printf("==== num: %d\n", num);

        for(int i=0; i<num; ++i)
        {
            // Retrieve the current file descriptor
            int curfd = evs[i].;
            // 判断这个文件描述符是不是用于monitor的
            if(curfd == lfd)
            {
                // Establishing a new connection
                int cfd = accept(curfd, NULL, NULL);
                // Setting file descriptors to non-blocking
                // Get the properties of the file descriptor
                int flag = fcntl(cfd, F_GETFL);
                flag |= O_NONBLOCK;
                fcntl(cfd, F_SETFL, flag);
                // The newly obtained file descriptor is added to theepollmould中, 下一轮循环的时候就可以被sensing了
                // correspond (by letter etc)的文件描述符sensing读缓冲区数据的时候设置为边沿模式
                 = EPOLLIN | EPOLLET;    // Read if there is data in the buffer
                 = cfd;
                ret = epoll_ctl(epfd, EPOLL_CTL_ADD, cfd, &ev);
                if(ret == -1)
                {
                    perror("epoll_ctl-accept");
                    exit(0);
                }
            }
            else
            {
                // File descriptors for handling communications
                // receive data
                char buf[128];
                memset(buf, 0, sizeof(buf));
                // Cyclic read data
                while(1)
                {
                    int len = recv(curfd, buf, sizeof(buf)-1, 0);
                    if(len == 0)
                    {
                        // Non-blocking mode is the same as blocking mode => Determine if the other party is disconnected
                        printf("Client disconnected....\n");
                        // Remove this file descriptor from theepollmould中删除
                        epoll_ctl(epfd, EPOLL_CTL_DEL, curfd, NULL);
                        close(curfd);
                        break;
                    }
                    else if(len > 0)
                    {
                        // correspond (by letter etc)
                        // Received data is printed to the terminal
                        write(STDOUT_FILENO, buf, len);
						char ip[16]; // stockpileIPaddress  
						extract_ip(buf, ip);  
						printf("Received IP: %s\n", ip);
						
                        // Send data
                        //send(curfd, buf, len, 0);
						// validate (a theory) IP address
						struct in_addr address;
						int result = inet_pton(AF_INET, ip, &address); // probe IP address的有效性
						if (result < 0) {
							std::cout << "Invalid IP address: " << result << " " << ip << std::endl;
							send(curfd, "-Err\n", 5, 0);
							continue;
						}

						unsigned long ip_num = ntohl(address.s_addr);
						auto it = g_ip_list.lower_bound(ip_num);
						if (it != g_ip_list.end() && it->first >= ip_num && it->second <= ip_num) {
							send(curfd, "000000010", 9, 0); // internal (a country)
						} else {
							send(curfd, "000000011", 9, 0); // external (affairs)
						}
                    }
                    else
                    {
                        // len == -1
                        if(errno == EAGAIN)
                        {
                            printf("The data is read....\n");
							close(curfd);
                            break;
                        }
                        else
                        {
                            perror("recv");
                            exit(0);
                        }
                    }
                }
            }
        }
    }

    return 0;
}

Compile g++ epoll_test.cpp -o epoll_test, direct execution. /epoll_test, listening on port 9999 of 0

  • wrk configuration file sample_tiny.ini
[common]
# ip & port
host = 127.0.0.1
port = 9999

[request]
req_body = CHECKIP1.0.4.0

[response]
rsp_code_location = head

Say one of the pitfalls, req_body is the protocol to be sent, but wrktcp will be added in front of the length of a fixed 8-bit: 00000015; the default success of the success of the response code is 000000, set the rsp_code_location of this will allow wrktcp to return to the protocol (000000010) header to start looking for the success of the response code
Note: The README for wrktcp has some instructions, but they are not very complete, so you need to try it yourself and read the source code.

  • todo
    Fixed protocol preceded by 8-bit length, it is impossible for each service to be such a protocol, how to go about customizing the protocol, I hope the big brother to teach, as if wrk can customize the protocol.
  • The wrk pressure test command
    ./wrktcp -t15 -c15 -d100s --latency sample_tiny.ini
-t, --threads: the total number of threads to use, generally recommended to use 2x-1 CPU cores
-c, --connections: total number of connections, independent of threads. The number of connections per thread is connections/threads.
-d, --duration: the duration of the stress test, you can write 2s, 2m, 2h
--latency: Print latency distribution
---timeout: Specify the timeout, default is 5000 milliseconds, the longer the timeout, the more memory is consumed by the statistics.
--trace: print out the distribution.
--html: output the result data of the pressure test to an html file.
--test: execute only once for each connection, usually used to test if the configuration is correct.
-v --version: Print the version information.

Tested twice, TPS can stay around 1600

  Running 2m loadtest @ 127.0.0.1:9999 using sample_tiny.ini
  15 threads and 15 connections
  Time:100s TPS:1644.64/0.00 Latency:7.69ms BPS:14.45KB Error:0
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency     4.66ms   14.17ms 318.09ms   98.89%
    Req/Sec   113.66    233.09     1.69k    94.95%
  Latency Distribution
     50%  823.00us
     75%    8.17ms
     90%    9.15ms
     99%   23.08ms
  164554 requests in 1.67m, 1.41MB read
Requests/sec:   1643.21    (Success:1643.21/Failure:0.00)
Transfer/sec:     14.44KB
  • perf
    Pressure monitoring service: perf record -p 10263 -a -g -F 99 -- sleep 10
    Parameter Description:
    -p : process
    -a : record all events
    -g : Enable function call stack tracing based on DWARF debugging information. This will log function call stack information, allowing for more detailed reports to be generated that show the relationship of function calls.
    -F : Sampling frequency
    --sleep: Execute the sleep command to put the system to sleep for 10 seconds. During this period, perf record will record the performance data of the specified process.

The file will be generated in the current directory, execute the perf report, you will see that printf and write occupy higher CPU, delete the printf and write functions of the above services, and re-press test

TPS can be maintained at 3W+ after recompression testing

Running 2m loadtest @ 127.0.0.1:9999 using sample_tiny.ini
  15 threads and 15 connections
  Time:100s TPS:32748.45/0.00 Latency:438.00us BPS:287.83KB Error:0
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency   519.35us    1.24ms  63.18ms   97.47%
    Req/Sec     2.19k   536.83     4.83k    76.97%
  Latency Distribution
     50%  349.00us
     75%  426.00us
     90%  507.00us
     99%    5.12ms
  3275261 requests in 1.67m, 28.11MB read
Requests/sec:  32716.39    (Success:32716.39/Failure:0.00)
Transfer/sec:    287.55KB