资讯专栏INFORMATION COLUMN

「旁门右道」CURL持久连接技巧

dongfangyiyu / 3493人阅读

摘要:支持多路复用支持对和已建立连接的复用,如果旧连接已失效则主动关闭旧连接,如果连接有效则尝试使用已有连接传输数据。

背景

对于同一服务可能存在多次调用的情况,然而每次调用都需要建立一次tcp连接导致大量重复工作的同时还增加了连接超时或连接错误的概率,为了减少tcp连接次数最大限度的提高连接利用率,需要能够重复利用每个tcp连接。

原理

HTTP1.1与HTTP2.0支持对于一次TCP连接建立的通道重复使用。

HTTP2.0支持多路复用

CURL支持对HTTP1.1和HTTP2.0已建立连接的复用,如果旧连接已失效则主动关闭旧连接,如果连接有效则尝试使用已有连接传输数据。关键代码如下:

</>复制代码

  1. // php/ext/url/interface.c
  2. /* {{{ proto bool curl_exec(resource ch)
  3. Perform a cURL session */
  4. PHP_FUNCTION(curl_exec)
  5. {
  6. CURLcode error;
  7. zval *zid;
  8. php_curl *ch;
  9. if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "r", &zid) == FAILURE) {
  10. return;
  11. }
  12. ZEND_FETCH_RESOURCE(ch, php_curl *, &zid, -1, le_curl_name, le_curl);
  13. _php_curl_verify_handlers(ch, 1 TSRMLS_CC);
  14. _php_curl_cleanup_handle(ch);
  15. // 调用CURL方法
  16. error = curl_easy_perform(ch->cp);
  17. SAVE_CURL_ERROR(ch, error);
  18. /* CURLE_PARTIAL_FILE is returned by HEAD requests */
  19. if (error != CURLE_OK && error != CURLE_PARTIAL_FILE) {
  20. if (ch->handlers->write->buf.len > 0) {
  21. smart_str_free(&ch->handlers->write->buf);
  22. }
  23. RETURN_FALSE;
  24. }
  25. if (ch->handlers->std_err) {
  26. php_stream *stream;
  27. stream = (php_stream*)zend_fetch_resource(&ch->handlers->std_err TSRMLS_CC, -1, NULL, NULL, 2, php_file_le_stream(), php_file_le_pstream());
  28. if (stream) {
  29. php_stream_flush(stream);
  30. }
  31. }
  32. if (ch->handlers->write->method == PHP_CURL_RETURN && ch->handlers->write->buf.len > 0) {
  33. smart_str_0(&ch->handlers->write->buf);
  34. RETURN_STRINGL(ch->handlers->write->buf.c, ch->handlers->write->buf.len, 1);
  35. }
  36. /* flush the file handle, so any remaining data is synched to disk */
  37. if (ch->handlers->write->method == PHP_CURL_FILE && ch->handlers->write->fp) {
  38. fflush(ch->handlers->write->fp);
  39. }
  40. if (ch->handlers->write_header->method == PHP_CURL_FILE && ch->handlers->write_header->fp) {
  41. fflush(ch->handlers->write_header->fp);
  42. }
  43. if (ch->handlers->write->method == PHP_CURL_RETURN) {
  44. RETURN_EMPTY_STRING();
  45. } else {
  46. RETURN_TRUE;
  47. }
  48. }
  49. /* }}} */
  50. // curl/lib/url.c line 4328
  51. // 主动关闭已失效的连接
  52. prune_dead_connections(data);
  53. /*************************************************************
  54. * Check the current list of connections to see if we can
  55. * re-use an already existing one or if we have to create a
  56. * new one.
  57. *************************************************************/
  58. /* reuse_fresh is TRUE if we are told to use a new connection by force, but
  59. we only acknowledge this option if this is not a re-used connection
  60. already (which happens due to follow-location or during a HTTP
  61. authentication phase). */
  62. if(data->set.reuse_fresh && !data->state.this_is_a_follow)
  63. reuse = FALSE;
  64. else
  65. // 从已存在的链接中查找出可以复用的连接(如果是不支持多路复用且正在使用中的连接会被忽略)
  66. reuse = ConnectionExists(data, conn, &conn_temp, &force_reuse, &waitpipe);
  67. /* If we found a reusable connection, we may still want to
  68. open a new connection if we are pipelining. */
  69. if(reuse && !force_reuse && IsPipeliningPossible(data, conn_temp)) {
  70. size_t pipelen = conn_temp->send_pipe.size + conn_temp->recv_pipe.size;
  71. if(pipelen > 0) {
  72. infof(data, "Found connection %ld, with requests in the pipe (%zu)
  73. ",
  74. conn_temp->connection_id, pipelen);
  75. if(conn_temp->bundle->num_connections < max_host_connections &&
  76. data->state.conn_cache->num_connections < max_total_connections) {
  77. /* We want a new connection anyway */
  78. reuse = FALSE;
  79. infof(data, "We can reuse, but we want a new connection anyway
  80. ");
  81. }
  82. }
  83. }
  84. if(reuse) {
  85. /*
  86. * We already have a connection for this, we got the former connection
  87. * in the conn_temp variable and thus we need to cleanup the one we
  88. * just allocated before we can move along and use the previously
  89. * existing one.
  90. */
  91. conn_temp->inuse = TRUE; /* mark this as being in use so that no other
  92. handle in a multi stack may nick it */
  93. reuse_conn(conn, conn_temp);
  94. free(conn); /* we don"t need this anymore */
  95. conn = conn_temp;
  96. *in_connect = conn;
  97. infof(data, "Re-using existing connection! (#%ld) with %s %s
  98. ",
  99. conn->connection_id,
  100. conn->bits.proxy?"proxy":"host",
  101. conn->socks_proxy.host.name ? conn->socks_proxy.host.dispname :
  102. conn->http_proxy.host.name ? conn->http_proxy.host.dispname :
  103. conn->host.dispname);
  104. }
  105. else {
  106. /* We have decided that we want a new connection. However, we may not
  107. be able to do that if we have reached the limit of how many
  108. connections we are allowed to open. */
  109. struct connectbundle *bundle = NULL;
  110. if(conn->handler->flags & PROTOPT_ALPN_NPN) {
  111. /* The protocol wants it, so set the bits if enabled in the easy handle
  112. (default) */
  113. if(data->set.ssl_enable_alpn)
  114. conn->bits.tls_enable_alpn = TRUE;
  115. if(data->set.ssl_enable_npn)
  116. conn->bits.tls_enable_npn = TRUE;
  117. }
  118. if(waitpipe)
  119. /* There is a connection that *might* become usable for pipelining
  120. "soon", and we wait for that */
  121. connections_available = FALSE;
  122. else
  123. bundle = Curl_conncache_find_bundle(conn, data->state.conn_cache);
  124. if(max_host_connections > 0 && bundle &&
  125. (bundle->num_connections >= max_host_connections)) {
  126. struct connectdata *conn_candidate;
  127. /* The bundle is full. Let"s see if we can kill a connection. */
  128. conn_candidate = find_oldest_idle_connection_in_bundle(data, bundle);
  129. if(conn_candidate) {
  130. /* Set the connection"s owner correctly, then kill it */
  131. conn_candidate->data = data;
  132. (void)Curl_disconnect(conn_candidate, /* dead_connection */ FALSE);
  133. }
  134. else {
  135. infof(data, "No more connections allowed to host: %d
  136. ",
  137. max_host_connections);
  138. connections_available = FALSE;
  139. }
  140. }
  141. if(connections_available &&
  142. (max_total_connections > 0) &&
  143. (data->state.conn_cache->num_connections >= max_total_connections)) {
  144. struct connectdata *conn_candidate;
  145. /* The cache is full. Let"s see if we can kill a connection. */
  146. conn_candidate = Curl_conncache_oldest_idle(data);
  147. if(conn_candidate) {
  148. /* Set the connection"s owner correctly, then kill it */
  149. conn_candidate->data = data;
  150. (void)Curl_disconnect(conn_candidate, /* dead_connection */ FALSE);
  151. }
  152. else {
  153. infof(data, "No connections available in cache
  154. ");
  155. connections_available = FALSE;
  156. }
  157. }
  158. if(!connections_available) {
  159. infof(data, "No connections available.
  160. ");
  161. conn_free(conn);
  162. *in_connect = NULL;
  163. result = CURLE_NO_CONNECTION_AVAILABLE;
  164. goto out;
  165. }
  166. else {
  167. /*
  168. * This is a brand new connection, so let"s store it in the connection
  169. * cache of ours!
  170. */
  171. Curl_conncache_add_conn(data->state.conn_cache, conn);
  172. }
  173. #if defined(USE_NTLM)
  174. /* If NTLM is requested in a part of this connection, make sure we don"t
  175. assume the state is fine as this is a fresh connection and NTLM is
  176. connection based. */
  177. if((data->state.authhost.picked & (CURLAUTH_NTLM | CURLAUTH_NTLM_WB)) &&
  178. data->state.authhost.done) {
  179. infof(data, "NTLM picked AND auth done set, clear picked!
  180. ");
  181. data->state.authhost.picked = CURLAUTH_NONE;
  182. data->state.authhost.done = FALSE;
  183. }
  184. if((data->state.authproxy.picked & (CURLAUTH_NTLM | CURLAUTH_NTLM_WB)) &&
  185. data->state.authproxy.done) {
  186. infof(data, "NTLM-proxy picked AND auth done set, clear picked!
  187. ");
  188. data->state.authproxy.picked = CURLAUTH_NONE;
  189. data->state.authproxy.done = FALSE;
  190. }
  191. #endif
  192. }
  193. // curl/lib/multi.c
  194. /*
  195. * This function scans the connection cache for half-open/dead connections,
  196. * closes and removes them.
  197. * The cleanup is done at most once per second.
  198. */
  199. static void prune_dead_connections(struct Curl_easy *data)
  200. {
  201. struct curltime now = Curl_now();
  202. time_t elapsed = Curl_timediff(now, data->state.conn_cache->last_cleanup);
  203. if(elapsed >= 1000L) {
  204. Curl_conncache_foreach(data, data->state.conn_cache, data,
  205. call_disconnect_if_dead);
  206. data->state.conn_cache->last_cleanup = now;
  207. }
  208. }
PHP实现

</>复制代码

  1. class Curl
  2. {
  3. protected $ch = null;
  4. protected $errorCode = 0;
  5. protected $errorMsg = "";
  6. protected $curlInfo = array();
  7. protected $verbose = null;
  8. private static $instance = null;
  9. public function getLastErrorCode()
  10. {
  11. return $this->errorCode;
  12. }
  13. public function getLastErrorMsg()
  14. {
  15. return $this->errorMsg;
  16. }
  17. public function getLastCurlInfo()
  18. {
  19. return $this->curlInfo;
  20. }
  21. private function __construct()
  22. {
  23. $this->ch = curl_init();
  24. }
  25. /*
  26. * 单例模式防止被clone
  27. */
  28. private function __clone(){
  29. throw new CurlException("The Curl library can"t be cloned");
  30. }
  31. /*
  32. * 使用单例模式调用
  33. */
  34. public static function getInstance(){
  35. if(!self::$instance instanceof self){
  36. self::$instance = new self();
  37. }
  38. return self::$instance;
  39. }
  40. /**
  41. * curl以get的方式访问
  42. * @param $url
  43. * @param int $timeout
  44. * @param array $params get请求的参数,可以在url中直接带参数,也可以在这里传
  45. * @param array $headers 支持["Accept" => "application/json"]和["Accept: application/json"]两种方式
  46. * @return mixed
  47. */
  48. public function get($url, $timeout = 3, $params = [], $headers = [])
  49. {
  50. $url = $this->buildQuery($url,$params);
  51. $this->setGeneralOption($url,$timeout,$headers);
  52. $result = $this->execute();
  53. return $result;
  54. }
  55. /**
  56. * curl以post的方式访问
  57. * @param $url
  58. * @param array $params
  59. * @param array $headers 支持["Accept" => "application/json"]和["Accept: application/json"]两种方式
  60. * @param bool $withHttpBuildQuery
  61. * @param int $timeout
  62. * @return mixed
  63. */
  64. public function post($url, $params = [], $headers = [], $withHttpBuildQuery = true, $timeout=3)
  65. {
  66. if ($withHttpBuildQuery) {
  67. if (!is_array($params)) {
  68. $params = [$params];
  69. }
  70. $params = http_build_query($params);
  71. }
  72. curl_setopt($this->ch, CURLOPT_POST, 1);
  73. curl_setopt($this->ch, CURLOPT_POSTFIELDS, $params);
  74. $this->setGeneralOption($url,$timeout,$headers);
  75. $result = $this->execute();
  76. return $result;
  77. }
  78. /**
  79. * curl以HTTP2.0 get的方式访问
  80. * @param string $url 请求URL
  81. * @param int $timeout 超时时间,单位秒
  82. * @param array $params get请求的参数,可以在url中直接带参数,也可以在这里传
  83. * @param array $headers 支持["Accept" => "application/json"]和["Accept: application/json"]两种方式
  84. * @return mixed
  85. */
  86. public function get2($url, $timeout = 3, $params = [], $headers = [])
  87. {
  88. $url = $this->buildQuery($url,$params);
  89. $this->setGeneralOption($url,$timeout,$headers,CURL_HTTP_VERSION_2_0);
  90. $result = $this->execute();
  91. return $result;
  92. }
  93. /**
  94. * curlHTTP2.0 post的方式访问
  95. * @param string $url 请求URL
  96. * @param array $params
  97. * @param array $headers 支持["Accept" => "application/json"]和["Accept: application/json"]两种方式
  98. * @param bool $withHttpBuildQuery
  99. * @param int $timeout 超时时间,单位秒
  100. * @return mixed
  101. */
  102. public function post2($url, $params = [], $headers = [], $withHttpBuildQuery = true, $timeout=3)
  103. {
  104. if ($withHttpBuildQuery) {
  105. if (!is_array($params)) {
  106. $params = [$params];
  107. }
  108. $params = http_build_query($params);
  109. }
  110. curl_setopt($this->ch, CURLOPT_POST, 1);
  111. curl_setopt($this->ch, CURLOPT_POSTFIELDS, $params);
  112. $this->setGeneralOption($url,$timeout,$headers,CURL_HTTP_VERSION_2_0);
  113. $result = $this->execute();
  114. return $result;
  115. }
  116. /**
  117. * 实例销毁前主动关闭所有连接
  118. */
  119. public function __destruct()
  120. {
  121. $this->close();
  122. }
  123. /**
  124. * 关闭所有连接
  125. * Description: 这一步在php-fpm中可以省略,实例结束后php-fpm的垃圾回收机制会关闭
  126. */
  127. public function close()
  128. {
  129. if (is_resource($this->ch)) {
  130. curl_close($this->ch);
  131. $this->ch = null;
  132. }
  133. }
  134. /**
  135. * 拼接请求URL
  136. * @param string $url 请求URL
  137. * @param array $params 待拼接参数
  138. * @return string
  139. */
  140. protected function buildQuery($url,$params)
  141. {
  142. if (!$params) {
  143. return $url;
  144. }
  145. if (strpos($url, "?") === false) {
  146. $url .= "?";
  147. } else {
  148. $url .= "&";
  149. }
  150. $url .= http_build_query($params);
  151. return $url;
  152. }
  153. /**
  154. * 设置通用curl配置
  155. * @param string $url 请求URL
  156. * @param int $timeout 超时时间,单位秒
  157. * @param array $headers 请求header
  158. * @param int $httpVersion 使用的http协议,默认为1.1
  159. */
  160. protected function setGeneralOption($url,$timeout,$headers=array(),$httpVersion=CURL_HTTP_VERSION_1_1)
  161. {
  162. curl_setopt($this->ch, CURLOPT_URL, $url);
  163. curl_setopt($this->ch, CURLOPT_SSL_VERIFYPEER, true); //让CURL支持HTTPS访问
  164. curl_setopt($this->ch, CURLOPT_SSL_VERIFYHOST, 2);
  165. curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1);
  166. curl_setopt($this->ch, CURLOPT_TIMEOUT, $timeout);
  167. curl_setopt($this->ch, CURLOPT_HTTP_VERSION, $httpVersion);
  168. // 启用debug获取更详细的连接信息,与CURLOPT_HEADER互斥
  169. curl_setopt($this->ch, CURLOPT_VERBOSE, 1);
  170. $this->verbose = fopen("php://temp", "w+");
  171. curl_setopt($this->ch, CURLOPT_STDERR, $this->verbose);
  172. if ($headers && is_array($headers)) {
  173. $realHeader = [];
  174. foreach ($headers as $key => $val) {
  175. if (is_string($key)) {
  176. $realHeader[] = $key. ": ". $val;
  177. } else {
  178. $realHeader[] = $val;
  179. }
  180. }
  181. curl_setopt($this->ch, CURLOPT_HTTPHEADER, $realHeader);
  182. }
  183. }
  184. /**
  185. * 执行请求
  186. * @return mixed
  187. */
  188. protected function execute()
  189. {
  190. $result = curl_exec($this->ch);
  191. // 记录详细的debug信息
  192. $this->curlInfo = curl_getinfo($this->ch);
  193. rewind($this->verbose);
  194. $this->curlInfo["verbose"] = stream_get_contents($this->verbose);
  195. $this->verbose = null;
  196. if ($result === false) {
  197. $this->errorCode = curl_errno($this->ch);
  198. $this->errorMsg = curl_error($this->ch);
  199. $this->curlInfo["error_code"] = $this->errorCode;
  200. $this->curlInfo["error_message"] = $this->errorMsg;
  201. }
  202. curl_reset($this->ch);
  203. return $result;
  204. }
  205. }
  206. class CurlException extends Exception {}
拓展

由于PHP-FPM的回收机制,一次请求结束后CURL的资源将会被回收,这意味着这次请求建立的TCP连接将会被关闭,在这种情况下就无法达到垮请求复用的目的。因此可以利用独立进程的方式来维护已建立的TCP连接专门负责CURL的请求。

对于HTTP2.0而言,由于支持多路复用,因此对于一个域名的请求建立一次tcp连接后可以支持同时多个请求的处理(HTTP1.1一个tcp连接同时只支持一个请求,如果第二个请求同时到达则CURL将建立新的tcp连接以便完成请求),利用这一特性使用独立进程配合协程可以达到对于单一场景的curl高并发的支撑。

同理除PHP外可扩展到其他语言。

源地址 By佐柱

转载请注明出处,也欢迎偶尔逛逛我的小站,谢谢 :)

文章版权归作者所有,未经允许请勿转载,若此文章存在违规行为,您可以联系管理员删除。

转载请注明本文地址:https://www.ucloud.cn/yun/26307.html

相关文章

  • 有必要参加SEO培训吗?自学可以吗?

    摘要:所以,我强烈建议新人要舍得投资自己的大脑,至少要参加一个系统的培训班,系统地学习,避免自学浪费宝贵的时间,没有建站技术能学好吗答这个问题要看情况,曾庆平在前面也讲了,不会建站技术的很大程度上是属于第一层次的。 SEO人员在职场上总会碰上一些难解的问题,很多人也不懂得自己学习SEO该往...

    不知名网友 评论0 收藏0
  • Lumen 初体验(二)

    摘要:的现状目前是版本,是基于开发。入口文件启动文件和配置文件框架的入口文件是。在路由中指定控制器类必须写全命名空间,不然会提示找不到类。目前支持四种数据库系统以及。使用时发生错误,因为在文件中,的默认驱动是。 最近使用 Lumen 做了 2 个业余项目,特此记录和分享一下。 Lumen 的介绍 在使用一项新的技术时,了解其应用场景是首要的事情。 Lumen 的口号:为速度而生的 La...

    Cheriselalala 评论0 收藏0

发表评论

0条评论

最新活动
阅读需要支付1元查看
<