From 011b51cb7e912af2674ee6685a51651182ceab4f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 17 Jun 2007 23:39:28 +0000 Subject: [PATCH] Marginal hacking to improve the speed of COPY OUT. I had found in a bit of profiling that CopyAttributeOutText was taking an unreasonable fraction of the backend run time (like 66%!) on the following trivial test case: $ time psql -c "copy (select repeat('xyzzy',50) from generate_series(1,10000000)) to stdout" regression >/dev/null The time is all being spent on scanning the string for characters to be escaped, which most of the time there aren't any of. Some tweaking to take as many tests as possible out of the inner loop reduced the runtime of this example by more than 10%. In a real-world case it wouldn't be as useful a speedup, but it still seems worth adding a few lines here. --- src/backend/commands/copy.c | 128 +++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 54 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 885411cf2c..493d2944f1 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.283 2007/04/27 22:05:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.284 2007/06/17 23:39:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3075,68 +3075,88 @@ CopyAttributeOutText(CopyState cstate, char *string) * We have to grovel through the string searching for control characters * and instances of the delimiter character. In most cases, though, these * are infrequent. To avoid overhead from calling CopySendData once per - * character, we dump out all characters between replaceable characters in + * character, we dump out all characters between escaped characters in * a single call. The loop invariant is that the data from "start" to * "ptr" can be sent literally, but hasn't yet been. + * + * We can skip pg_encoding_mblen() overhead when encoding is safe, because + * in valid backend encodings, extra bytes of a multibyte character never + * look like ASCII. This loop is sufficiently performance-critical that + * it's worth making two copies of it to get the IS_HIGHBIT_SET() test + * out of the normal safe-encoding path. */ - start = ptr; - while ((c = *ptr) != '\0') + if (cstate->encoding_embeds_ascii) { - switch (c) + start = ptr; + while ((c = *ptr) != '\0') { - case '\b': - DUMPSOFAR(); - CopySendString(cstate, "\\b"); - start = ++ptr; - break; - case '\f': - DUMPSOFAR(); - CopySendString(cstate, "\\f"); - start = ++ptr; - break; - case '\n': - DUMPSOFAR(); - CopySendString(cstate, "\\n"); - start = ++ptr; - break; - case '\r': - DUMPSOFAR(); - CopySendString(cstate, "\\r"); - start = ++ptr; - break; - case '\t': - DUMPSOFAR(); - CopySendString(cstate, "\\t"); - start = ++ptr; - break; - case '\v': + if (c == '\\' || c == delimc) + { DUMPSOFAR(); - CopySendString(cstate, "\\v"); - start = ++ptr; - break; - case '\\': + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + } + else if ((unsigned char) c < (unsigned char) 0x20) + { + switch (c) + { + /* \r and \n must be escaped, the others are traditional */ + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + break; + default: + /* All ASCII control chars are length 1 */ + ptr++; + break; + } + } + else if (IS_HIGHBIT_SET(c)) + ptr += pg_encoding_mblen(cstate->client_encoding, ptr); + else + ptr++; + } + } + else + { + start = ptr; + while ((c = *ptr) != '\0') + { + if (c == '\\' || c == delimc) + { DUMPSOFAR(); - CopySendString(cstate, "\\\\"); - start = ++ptr; - break; - default: - if (c == delimc) + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + } + else if ((unsigned char) c < (unsigned char) 0x20) + { + switch (c) { - DUMPSOFAR(); - CopySendChar(cstate, '\\'); - start = ptr; /* we include char in next run */ + /* \r and \n must be escaped, the others are traditional */ + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + DUMPSOFAR(); + CopySendChar(cstate, '\\'); + start = ptr++; /* we include char in next run */ + break; + default: + /* All ASCII control chars are length 1 */ + ptr++; + break; } - - /* - * We can skip pg_encoding_mblen() overhead when encoding is - * safe, because in valid backend encodings, extra bytes of a - * multibyte character never look like ASCII. - */ - if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii) - ptr += pg_encoding_mblen(cstate->client_encoding, ptr); - else - ptr++; - break; + } + else + ptr++; } } -- 2.11.0