-
Notifications
You must be signed in to change notification settings - Fork 29.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cluster: use linkedlist for round_robin_handle #40615
Conversation
I thought we had a benchmark for round robin somewhere but I'm not seeing it. @nodejs/benchmarking |
You can just use the offical code of cluster: const cluster = require('cluster');
const http = require('http');
const numCPUs = 2;//require('os').cpus().length;
const process = require('process');
if (cluster.isMaster) {
console.log(`Primary ${process.pid} is running`);
// Fork workers.
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`worker ${worker.process.pid} died`);
});
} else {
// Workers can share any TCP connection
// In this case it is an HTTP server
http.createServer((req, res) => {
res.writeHead(200);
res.end('hello world\n');
}).listen(8000);
console.log(`Worker ${process.pid} started`);
} After run the log is print as follows:
And then you can use some benchmark tools, such as jmeter, to request to the service you have just started. Here is an example configuration of jmeter: <?xml version="1.0" encoding="UTF-8"?>
<jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
<hashTree>
<TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Test Plan" enabled="true">
<stringProp name="TestPlan.comments"></stringProp>
<boolProp name="TestPlan.functional_mode">false</boolProp>
<boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
<boolProp name="TestPlan.serialize_threadgroups">false</boolProp>
<elementProp name="TestPlan.user_defined_variables" elementType="Arguments" guiclass="ArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
<collectionProp name="Arguments.arguments"/>
</elementProp>
<stringProp name="TestPlan.user_define_classpath"></stringProp>
</TestPlan>
<hashTree>
<ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="Thread Group" enabled="true">
<stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
<elementProp name="ThreadGroup.main_controller" elementType="LoopController" guiclass="LoopControlPanel" testclass="LoopController" testname="Loop Controller" enabled="true">
<boolProp name="LoopController.continue_forever">false</boolProp>
<intProp name="LoopController.loops">-1</intProp>
</elementProp>
<stringProp name="ThreadGroup.num_threads">8</stringProp>
<stringProp name="ThreadGroup.ramp_time">1</stringProp>
<boolProp name="ThreadGroup.scheduler">false</boolProp>
<stringProp name="ThreadGroup.duration"></stringProp>
<stringProp name="ThreadGroup.delay"></stringProp>
<boolProp name="ThreadGroup.same_user_on_next_iteration">true</boolProp>
</ThreadGroup>
<hashTree>
<HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="HTTP Request" enabled="true">
<elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
<collectionProp name="Arguments.arguments"/>
</elementProp>
<stringProp name="HTTPSampler.domain">please_replace_this_to_the_ip_of_your_service</stringProp>
<stringProp name="HTTPSampler.port">8000</stringProp>
<stringProp name="HTTPSampler.protocol"></stringProp>
<stringProp name="HTTPSampler.contentEncoding"></stringProp>
<stringProp name="HTTPSampler.path">/</stringProp>
<stringProp name="HTTPSampler.method">GET</stringProp>
<boolProp name="HTTPSampler.follow_redirects">true</boolProp>
<boolProp name="HTTPSampler.auto_redirects">false</boolProp>
<boolProp name="HTTPSampler.use_keepalive">false</boolProp><!-- disable keepalive to create connection as fast as it can-->
<boolProp name="HTTPSampler.DO_MULTIPART_POST">false</boolProp>
<stringProp name="HTTPSampler.embedded_url_re"></stringProp>
<stringProp name="HTTPSampler.connect_timeout"></stringProp>
<stringProp name="HTTPSampler.response_timeout"></stringProp>
</HTTPSamplerProxy>
<hashTree>
<ResultCollector guiclass="ViewResultsFullVisualizer" testclass="ResultCollector" testname="View Results Tree" enabled="false">
<boolProp name="ResultCollector.error_logging">false</boolProp>
<objProp>
<name>saveConfig</name>
<value class="SampleSaveConfiguration">
<time>true</time>
<latency>true</latency>
<timestamp>true</timestamp>
<success>true</success>
<label>true</label>
<code>true</code>
<message>true</message>
<threadName>true</threadName>
<dataType>true</dataType>
<encoding>false</encoding>
<assertions>true</assertions>
<subresults>true</subresults>
<responseData>false</responseData>
<samplerData>false</samplerData>
<xml>false</xml>
<fieldNames>true</fieldNames>
<responseHeaders>false</responseHeaders>
<requestHeaders>false</requestHeaders>
<responseDataOnError>false</responseDataOnError>
<saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
<assertionsResultsToSave>0</assertionsResultsToSave>
<bytes>true</bytes>
<sentBytes>true</sentBytes>
<url>true</url>
<threadCounts>true</threadCounts>
<idleTime>true</idleTime>
<connectTime>true</connectTime>
</value>
</objProp>
<stringProp name="filename"></stringProp>
</ResultCollector>
<hashTree/>
<ResultCollector guiclass="SummaryReport" testclass="ResultCollector" testname="Summary Report" enabled="true">
<boolProp name="ResultCollector.error_logging">false</boolProp>
<objProp>
<name>saveConfig</name>
<value class="SampleSaveConfiguration">
<time>true</time>
<latency>true</latency>
<timestamp>true</timestamp>
<success>true</success>
<label>true</label>
<code>true</code>
<message>true</message>
<threadName>true</threadName>
<dataType>true</dataType>
<encoding>false</encoding>
<assertions>true</assertions>
<subresults>true</subresults>
<responseData>false</responseData>
<samplerData>false</samplerData>
<xml>false</xml>
<fieldNames>true</fieldNames>
<responseHeaders>false</responseHeaders>
<requestHeaders>false</requestHeaders>
<responseDataOnError>false</responseDataOnError>
<saveAssertionResultsFailureMessage>true</saveAssertionResultsFailureMessage>
<assertionsResultsToSave>0</assertionsResultsToSave>
<bytes>true</bytes>
<sentBytes>true</sentBytes>
<url>true</url>
<threadCounts>true</threadCounts>
<idleTime>true</idleTime>
<connectTime>true</connectTime>
</value>
</objProp>
<stringProp name="filename"></stringProp>
</ResultCollector>
<hashTree/>
</hashTree>
</hashTree>
</hashTree>
</hashTree>
</jmeterTestPlan> We give a name of this confugration of cluster.jmx. At last , run the command of jmeter: bin/jmeter.sh -n -t /dir_of_jmx/cluster.jmx -l /tmp/cluster.jtl -e -o /tmp/cluster.out you can see the CPU usages via the command of The primary process (with pid |
@nodejs/cluster |
Bench CI for cluster: https://ci.nodejs.org/view/Node.js%20benchmark/job/benchmark-node-micro-benchmarks/1058/ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with a positive or neutral benchmark CI
Benchmark CI is neutral. |
cc @Trott could you take a look on how to land this? |
@mcollina I pulled the changes down locally, rebased against master (to get the fix on master branch for the test that was failing on debug builds in Jenkins), squashed the two commits into one (to fix the GitHub Action commit linter complaint), force pushed to twchn's branch for this PR, and did a rebuild on Jenkins. Hopefully everything is green and then the |
Commit Queue failed- Loading data for nodejs/node/pull/40615 ✔ Done loading data for nodejs/node/pull/40615 ----------------------------------- PR info ------------------------------------ Title cluster: use linkedlist for round_robin_handle (#40615) ⚠ Could not retrieve the email or name of the PR author's from user's GitHub profile! Branch twchn:feat_use_linkedlist_for_round_robin -> nodejs:master Labels cluster, timers, needs-ci Commits 1 - cluster: use linkedlist for round_robin_handle Committers 1 - Rich Trott PR-URL: https://github.com/nodejs/node/pull/40615 Reviewed-By: Matteo Collina Reviewed-By: James M Snell ------------------------------ Generated metadata ------------------------------ PR-URL: https://github.com/nodejs/node/pull/40615 Reviewed-By: Matteo Collina Reviewed-By: James M Snell -------------------------------------------------------------------------------- ⚠ Commits were pushed since the last review: ⚠ - cluster: use linkedlist for round_robin_handle ℹ This PR was created on Tue, 26 Oct 2021 16:12:34 GMT ✔ Approvals: 2 ✔ - Matteo Collina (@mcollina) (TSC): https://github.com/nodejs/node/pull/40615#pullrequestreview-811397947 ✔ - James M Snell (@jasnell) (TSC): https://github.com/nodejs/node/pull/40615#pullrequestreview-811856870 ✔ Last GitHub Actions successful ℹ Last Benchmark CI on 2021-11-19T16:05:03Z: https://ci.nodejs.org/view/Node.js%20benchmark/job/benchmark-node-micro-benchmarks/1058/ ℹ Last Full PR CI on 2021-11-29T02:31:19Z: https://ci.nodejs.org/job/node-test-pull-request/41180/ - Querying data for job/node-test-pull-request/41180/ ✔ Last Jenkins CI successful -------------------------------------------------------------------------------- ✔ Aborted `git node land` session in /home/runner/work/node/node/.ncuhttps://github.com/nodejs/node/actions/runs/1514597734 |
PR-URL: #40615 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
Landed in 4b65dec |
Thanks for the contribution! 🎉 |
I have writen a kubernetes yaml file to test the performance of cluster. The node version I used is 14.18.1. apiVersion: apps/v1
kind: Deployment
metadata:
name: hello-deployment
labels:
app: hello
spec:
replicas: 1
selector:
matchLabels:
app: hello
template:
metadata:
name: hello-app
labels:
app: hello
spec:
initContainers:
- image: busybox
command:
- sh
- -c
- |
sysctl -w net.core.somaxconn=10240
sysctl -w net.ipv4.ip_local_port_range="1024 65535"
sysctl -w net.ipv4.tcp_tw_reuse=1
sysctl -w fs.file-max=6048576
name: setsysctl
securityContext:
privileged: true
containers:
- name: hello-app
image: registry.cn-hangzhou.aliyuncs.com/whyun/base:hello-latest
imagePullPolicy: Always
resources:
requests:
cpu: 2000m
memory: 2Gi
limits:
cpu: 4000m
memory: 4Gi
env:
- name: APP_ID
value: "17959"
- name: APP_SECRET
value: "6994ea9b6a8d1e673d9cc53aab8e45dd8eaec8d2"
---
kind: Service
apiVersion: v1
metadata:
name: hello-service
spec:
selector:
app: hello
ports:
- port: 8000 # Default port for image
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: bench-deployment
labels:
app: bench
spec:
replicas: 16
selector:
matchLabels:
app: bench
template:
metadata:
name: bench-node
labels:
app: bench
spec:
initContainers:
- image: busybox
command:
- sh
- -c
- |
sysctl -w net.core.somaxconn=10240
sysctl -w net.ipv4.ip_local_port_range="1024 65535"
sysctl -w net.ipv4.tcp_tw_reuse=1
sysctl -w fs.file-max=1048576
name: setsysctl
securityContext:
privileged: true
containers:
- name: bench-node
image: registry.cn-hangzhou.aliyuncs.com/whyun/base:node-bench-0.2.0
env:
- name: APP_ID
value: "the app id from alinode"
- name: APP_SECRET
value: "the app secret from alinode"
- name: REQ_URL
value: http://hello-service:8000/
- name: REQ_INTERVAL_MS
value: "5"
- name: REQ_TIMEOUT_MS
value: "20" I use alinode as a tool to generate CPU flame graph, which integrated in the docker image. You have to change the environment variable of We can just change the environment variable of I first set the value of
Then set it to 4:
It changed little from 5 ms. Then set it to 3:
We can figure out that it uses more CPU time than 4ms. Then set it to 2:
Firstly I took a screenshot of the CPU uages. It's almost the same as the usage under 3ms. Next step, when I generated the CPU flame graph, the cpu usage of the parent process raised to 100%.
And I also found that the memery usage raised fast.
Combine the CPU flame graph , we can deduce the reason, 100% CPU is used by V8 engine, the task of sending socket handle to the children processes slows down, some socket handles would be saved in the queue (which is an array in js), the queue length becomes larger, it will use more time to do shift operation of the array, the CPU usage will be still high, the queue length will be larger next time. Whe the queue length is too large, the memory will exceed the limit of old generation, and the process will be OOM. The merge request can reduce the probability of OOM, but we can also found the commuication between parent and children is not cheap. So I will be glad to see the feature of |
PR-URL: #40615 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
PR-URL: #40615 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
PR-URL: #40615 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
PR-URL: #40615 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
Currently, an array is used as a queue to manage handles, when there are many handles, the
ArrayPrototypeShift
may become a bottleneck, so using the builtinlinkedlist
to reduce the time complexity ofhandoff
method to a constant level and may be helpful for #37343.