概述
有网站接入nginx以后报浏览器有白页。
我curl的结果返回是 curl: (52) Empty reply from server
, 从日志报表也看出什么异常的请求。
查了下对应的access日志,返回的是正常的状态码。
看错误日志,提示upstream sent invalid chunked response while reading upstream
.
通过nc和openssl 分别对80和443端口发送请求过去,返回的确实是chunked编码的内容。
到底是返回的内容有问题,还是nginx解析chunked有问题。
问题定位
我们先跟踪一下nginx代码:
反向代理的handler,ngx_http_proxy_handler
函数,调用ngx_http_read_client_request_body(r, ngx_http_upstream_init);
开始回源。
ngx_http_upstream_init -> ngx_http_upstream_init_request -> ngx_http_upstream_connect
在ngx_http_upstream_connect 中设置回掉handler:
u->write_event_handler = ngx_http_upstream_send_request_handler;
u->read_event_handler = ngx_http_upstream_process_header;
当upstream有响应时,该ngx_http_upstream_process_header 被调用。
ngx_http_upstream_process_header -> ngx_http_upstream_process_upstream -> ngx_http_upstream_process_upstream -> ngx_event_pipe(p, 0) -> ngx_event_pipe_read_upstream -> p->input_filter.
在ngx_http_upstream_process_upstream函数中,把上下游回掉函数再次做了修改
u->read_event_handler = ngx_http_upstream_process_upstream;
r->write_event_handler = ngx_http_upstream_process_downstream;
最后p->input_filter 的赋值是ngx_http_upstream_send_response函数中调用了u->input_filter_init,而input_filter_init 是指向ngx_http_proxy_input_filter_init函数,其值是在proxy 模块的ngx_http_proxy_handler函数赋值的。
在ngx_http_proxy_input_filter_init函数中可以看到,对于chunked的响应,pipe->input_filter 指向ngx_http_proxy_chunked_filter 函数。
static ngx_int_t
ngx_http_proxy_input_filter_init(void *data)
{
...
else if (u->headers_in.chunked) {
/* chunked */
u->pipe->input_filter = ngx_http_proxy_chunked_filter;
u->pipe->length = 3; /* "0" LF LF */
u->input_filter = ngx_http_proxy_non_buffered_chunked_filter;
u->length = 1;
} else if
...
}
在ngx_http_proxy_chunked_filter函数中调用了ngx_http_parse_chunked解析chunked协议的内容。
static ngx_int_t
ngx_http_proxy_chunked_filter(ngx_event_pipe_t *p, ngx_buf_t *buf)
{
ngx_int_t rc;
ngx_buf_t *b, **prev;
ngx_chain_t *cl;
ngx_http_request_t *r;
ngx_http_proxy_ctx_t *ctx;
if (buf->pos == buf->last) {
return NGX_OK;
}
r = p->input_ctx;
ctx = ngx_http_get_module_ctx(r, ngx_http_proxy_module);
if (ctx == NULL) {
return NGX_ERROR;
}
b = NULL;
prev = &buf->shadow;
for ( ;; ) {
rc = ngx_http_parse_chunked(r, buf, &ctx->chunked);
if (rc == NGX_OK) {
/* a chunk has been parsed successfully */
cl = ngx_chain_get_free_buf(p->pool, &p->free);
if (cl == NULL) {
return NGX_ERROR;
}
b = cl->buf;
ngx_memzero(b, sizeof(ngx_buf_t));
b->pos = buf->pos;
b->start = buf->start;
b->end = buf->end;
b->tag = p->tag;
b->temporary = 1;
b->recycled = 1;
*prev = b;
prev = &b->shadow;
if (p->in) {
*p->last_in = cl;
} else {
p->in = cl;
}
p->last_in = &cl->next;
/* STUB */ b->num = buf->num;
ngx_log_debug2(NGX_LOG_DEBUG_EVENT, p->log, 0,
"input buf #%d %p", b->num, b->pos);
if (buf->last - buf->pos >= ctx->chunked.size) {
buf->pos += (size_t) ctx->chunked.size;
b->last = buf->pos;
ctx->chunked.size = 0;
continue;
}
ctx->chunked.size -= buf->last - buf->pos;
buf->pos = buf->last;
b->last = buf->last;
continue;
}
if (rc == NGX_DONE) {
/* a whole response has been parsed successfully */
p->upstream_done = 1;
r->upstream->keepalive = !r->upstream->headers_in.connection_close;
break;
}
if (rc == NGX_AGAIN) {
/* set p->length, minimal amount of data we want to see */
p->length = ctx->chunked.length;
break;
}
/* invalid response */
// 代码执行到这里,打印错误日志并返回
ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
"upstream sent invalid chunked response");
return NGX_ERROR;
}
ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
"http proxy chunked state %ui, length %O",
ctx->chunked.state, p->length);
if (b) {
b->shadow = buf;
b->last_shadow = 1;
ngx_log_debug2(NGX_LOG_DEBUG_EVENT, p->log, 0,
"input buf %p %z", b->pos, b->last - b->pos);
return NGX_OK;
}
/* there is no data record in the buf, add it to free chain */
if (ngx_event_pipe_add_free_buf(p, buf) != NGX_OK) {
return NGX_ERROR;
}
return NGX_OK;
}
我们已经跟踪到报错的地方,这里就是确认是否是返回内容不是chunked编码还是nginx解析有问题。
因为回源是https协议回源,通过curl 返回的内容已经被curl解码过了,看不出具体chunked编码的内容。
我们需要用openssl openssl s_client -servername www.SERVERNAME.com -tlsextdebug -connect www.YOURSERVER.com:443
这样,建立tls连接后,把curl要发送的内容发过去。返回的内容确实是chunked编码的,怀疑nginx有bug,对比了线上nginx版本和nginx git上相关的提交,chunked部分的解码确实有改动, 手动把nginx中解析相关的代码摘出来(代码如下)运行结果并没问题,注释掉nginx最新改动的代码也不影响结果。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NGX_OK 0
#define NGX_ERROR -1
#define NGX_AGAIN -2
#define NGX_BUSY -3
#define NGX_DONE -4
#define NGX_DECLINED -5
#define NGX_ABORT -6
#ifndef NGX_MAX_OFF_T_VALUE
#define NGX_MAX_OFF_T_VALUE 9223372036854775807LL
#endif
typedef unsigned char u_char;
#define LF (u_char) '\n'
#define CR (u_char) '\r'
#define CRLF "\r\n"
typedef struct ngx_buf_s ngx_buf_t;
struct ngx_buf_s {
u_char *pos;
u_char *last;
};
typedef struct ngx_http_chunked_s ngx_http_chunked_t;
struct ngx_http_chunked_s {
unsigned int state;
off_t size;
off_t length;
};
int
ngx_http_parse_chunked(ngx_buf_t *b,
ngx_http_chunked_t *ctx)
{
u_char *pos, ch, c;
int rc;
enum {
sw_chunk_start = 0,
sw_chunk_size,
sw_chunk_extension,
sw_chunk_extension_almost_done,
sw_chunk_data,
sw_after_data,
sw_after_data_almost_done,
sw_last_chunk_extension,
sw_last_chunk_extension_almost_done,
sw_trailer,
sw_trailer_almost_done,
sw_trailer_header,
sw_trailer_header_almost_done
} state;
state = ctx->state;
if (state == sw_chunk_data && ctx->size == 0) {
state = sw_after_data;
}
rc = NGX_AGAIN;
for (pos = b->pos; pos < b->last; pos++) {
ch = *pos;
switch (state) {
case sw_chunk_start:
if (ch >= '0' && ch <= '9') {
state = sw_chunk_size;
ctx->size = ch - '0';
break;
}
c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'f') {
state = sw_chunk_size;
ctx->size = c - 'a' + 10;
break;
}
goto invalid;
case sw_chunk_size:
if (ctx->size > NGX_MAX_OFF_T_VALUE / 16) {
goto invalid;
}
if (ch >= '0' && ch <= '9') {
ctx->size = ctx->size * 16 + (ch - '0');
break;
}
c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'f') {
ctx->size = ctx->size * 16 + (c - 'a' + 10);
break;
}
if (ctx->size == 0) {
switch (ch) {
case CR:
state = sw_last_chunk_extension_almost_done;
break;
case LF:
state = sw_trailer;
break;
case ';':
case ' ':
case '\t':
state = sw_last_chunk_extension;
break;
default:
goto invalid;
}
break;
}
switch (ch) {
case CR:
state = sw_chunk_extension_almost_done;
break;
case LF:
state = sw_chunk_data;
break;
case ';':
case ' ':
case '\t':
state = sw_chunk_extension;
break;
default:
goto invalid;
}
break;
case sw_chunk_extension:
switch (ch) {
case CR:
state = sw_chunk_extension_almost_done;
break;
case LF:
state = sw_chunk_data;
}
break;
case sw_chunk_extension_almost_done:
if (ch == LF) {
state = sw_chunk_data;
break;
}
goto invalid;
case sw_chunk_data:
rc = NGX_OK;
goto data;
case sw_after_data:
switch (ch) {
case CR:
state = sw_after_data_almost_done;
break;
case LF:
state = sw_chunk_start;
break;
default:
goto invalid;
}
break;
case sw_after_data_almost_done:
if (ch == LF) {
state = sw_chunk_start;
break;
}
goto invalid;
case sw_last_chunk_extension:
switch (ch) {
case CR:
state = sw_last_chunk_extension_almost_done;
break;
case LF:
state = sw_trailer;
}
break;
case sw_last_chunk_extension_almost_done:
if (ch == LF) {
state = sw_trailer;
break;
}
goto invalid;
case sw_trailer:
switch (ch) {
case CR:
state = sw_trailer_almost_done;
break;
case LF:
goto done;
default:
state = sw_trailer_header;
}
break;
case sw_trailer_almost_done:
if (ch == LF) {
goto done;
}
goto invalid;
case sw_trailer_header:
switch (ch) {
case CR:
state = sw_trailer_header_almost_done;
break;
case LF:
state = sw_trailer;
}
break;
case sw_trailer_header_almost_done:
if (ch == LF) {
state = sw_trailer;
break;
}
goto invalid;
}
}
data:
ctx->state = state;
b->pos = pos;
if (ctx->size > NGX_MAX_OFF_T_VALUE - 5) {
goto invalid;
}
switch (state) {
case sw_chunk_start:
ctx->length = 3 /* "0" LF LF */;
break;
case sw_chunk_size:
ctx->length = 1 /* LF */
+ (ctx->size ? ctx->size + 4 /* LF "0" LF LF */
: 1 /* LF */);
break;
case sw_chunk_extension:
case sw_chunk_extension_almost_done:
ctx->length = 1 /* LF */ + ctx->size + 4 /* LF "0" LF LF */;
break;
case sw_chunk_data:
ctx->length = ctx->size + 4 /* LF "0" LF LF */;
break;
case sw_after_data:
case sw_after_data_almost_done:
ctx->length = 4 /* LF "0" LF LF */;
break;
case sw_last_chunk_extension:
case sw_last_chunk_extension_almost_done:
ctx->length = 2 /* LF LF */;
break;
case sw_trailer:
case sw_trailer_almost_done:
ctx->length = 1 /* LF */;
break;
case sw_trailer_header:
case sw_trailer_header_almost_done:
ctx->length = 2 /* LF LF */;
break;
}
return rc;
done:
ctx->state = 0;
b->pos = pos + 1;
return NGX_DONE;
invalid:
return NGX_ERROR;
}
int main(void)
{
ngx_http_chunked_t chunked;
memset(&chunked, 0, sizeof(chunked));
ngx_buf_t b, *buf;
u_char *p = (u_char *)"8a\r\n{\"timestamp\":1584953056141,\"status\":400,\"error\":\"Bad Request\",\"message\":\"No message available\",\"path\":\"/recipt/checkFp/getFPInfoByNSRSBH\"}\r\n0\r\n\r\n";
// len = 149;
int rc;
buf = &b;
b.pos = p;
b.last = p + 149;
for ( ;; ) {
rc = ngx_http_parse_chunked(buf, &chunked);
if (rc == NGX_OK) {
if (buf->last - buf->pos >= chunked.size) {
buf->pos += (size_t) chunked.size;
chunked.size = 0;
} else {
chunked.size -= buf->last - buf->pos;
buf->pos = buf->last;
}
fprintf(stderr, "ok\n");
continue;
}
if (rc == NGX_DONE) {
/* a whole response has been parsed successfully */
fprintf(stderr, "done\n");
break;
}
if (rc == NGX_AGAIN) {
fprintf(stderr, "again\n");
break;
}
/* invalid response */
fprintf(stderr, "error\n");
return NGX_ERROR;
}
return 0;
}
排查了将近2个小时没有结果。只能在本地搭一套和线上一样的nginx环境进行调试了。根据nginx代码调用路径在ngx_http_proxy_chunked_filter函数中打断点,打印从源站接受到的响应,body的编码并不是chunked格式的。
通过openssl 返回的编码是chunked的,为什么gdb从nginx看到就不是chunked格式。把nginx回源时的请求抓下来,在openssl中回放,发现只要带了Connection: close
头,resp body的编码就不是chunked的,而响应的头依然带了Transfer-Encoding: chunked
。
最后在nginx中把该头清理proxy_set_header Connection "";
可以正常返回响应。
在curl中带了该头-H'Connection: close'
发现curl也解析不了,并曝出了错误curl: (56) Illegal or missing hexadecimal sequence in chunked-encoding
。
最后只能给源站反馈这个问题。源站在没接入nginx之前,所有的浏览器请求都不会带着Connection: close 这个头么,否则源站的问题早已经暴露才对。
评论前必须登录!
注册