diff --git a/programs/util.c b/programs/util.c index 96459b9d..81c10d0c 100644 --- a/programs/util.c +++ b/programs/util.c @@ -1103,7 +1103,7 @@ FileNamesTable* UTIL_createFNT_fromROTable(const char** filenames, size_t nbFile /*-**************************************** -* count the number of physical cores +* count the number of cores ******************************************/ #if defined(_WIN32) || defined(WIN32) @@ -1112,10 +1112,26 @@ FileNamesTable* UTIL_createFNT_fromROTable(const char** filenames, size_t nbFile typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); -int UTIL_countPhysicalCores(void) +DWORD CountSetBits(ULONG_PTR bitMask) { - static int numPhysicalCores = 0; - if (numPhysicalCores != 0) return numPhysicalCores; + DWORD LSHIFT = sizeof(ULONG_PTR)*8 - 1; + DWORD bitSetCount = 0; + ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT; + DWORD i; + + for (i = 0; i <= LSHIFT; ++i) + { + bitSetCount += ((bitMask & bitTest)?1:0); + bitTest/=2; + } + + return bitSetCount; +} + +int UTIL_countCores(int logical) +{ + static int numCores = 0; + if (numCores != 0) return numCores; { LPFN_GLPI glpi; BOOL done = FALSE; @@ -1161,7 +1177,10 @@ int UTIL_countPhysicalCores(void) while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) { if (ptr->Relationship == RelationProcessorCore) { - numPhysicalCores++; + if (logical) + numCores += CountSetBits(ptr->ProcessorMask); + else + numCores++; } ptr++; @@ -1170,17 +1189,17 @@ int UTIL_countPhysicalCores(void) free(buffer); - return numPhysicalCores; + return numCores; } failed: /* try to fall back on GetSystemInfo */ { SYSTEM_INFO sysinfo; GetSystemInfo(&sysinfo); - numPhysicalCores = sysinfo.dwNumberOfProcessors; - if (numPhysicalCores == 0) numPhysicalCores = 1; /* just in case */ + numCores = sysinfo.dwNumberOfProcessors; + if (numCores == 0) numCores = 1; /* just in case */ } - return numPhysicalCores; + return numCores; } #elif defined(__APPLE__) @@ -1189,24 +1208,24 @@ failed: /* Use apple-provided syscall * see: man 3 sysctl */ -int UTIL_countPhysicalCores(void) +int UTIL_countCores(int logical) { - static S32 numPhysicalCores = 0; /* apple specifies int32_t */ - if (numPhysicalCores != 0) return numPhysicalCores; + static S32 numCores = 0; /* apple specifies int32_t */ + if (numCores != 0) return numCores; { size_t size = sizeof(S32); - int const ret = sysctlbyname("hw.physicalcpu", &numPhysicalCores, &size, NULL, 0); + int const ret = sysctlbyname(logical ? "hw.logicalcpu" : "hw.physicalcpu", &numCores, &size, NULL, 0); if (ret != 0) { if (errno == ENOENT) { /* entry not present, fall back on 1 */ - numPhysicalCores = 1; + numCores = 1; } else { - perror("zstd: can't get number of physical cpus"); + perror("zstd: can't get number of cpus"); exit(1); } } - return numPhysicalCores; + return numCores; } } @@ -1215,16 +1234,16 @@ int UTIL_countPhysicalCores(void) /* parse /proc/cpuinfo * siblings / cpu cores should give hyperthreading ratio * otherwise fall back on sysconf */ -int UTIL_countPhysicalCores(void) +int UTIL_countCores(int logical) { - static int numPhysicalCores = 0; + static int numCores = 0; - if (numPhysicalCores != 0) return numPhysicalCores; + if (numCores != 0) return numCores; - numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN); - if (numPhysicalCores == -1) { + numCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numCores == -1) { /* value not queryable, fall back on 1 */ - return numPhysicalCores = 1; + return numCores = 1; } /* try to determine if there's hyperthreading */ @@ -1238,7 +1257,7 @@ int UTIL_countPhysicalCores(void) if (cpuinfo == NULL) { /* fall back on the sysconf value */ - return numPhysicalCores; + return numCores; } /* assume the cpu cores/siblings values will be constant across all @@ -1271,13 +1290,13 @@ int UTIL_countPhysicalCores(void) ratio = siblings / cpu_cores; } - if (ratio && numPhysicalCores > ratio) { - numPhysicalCores = numPhysicalCores / ratio; + if (ratio && numCores > ratio && !logical) { + numCores = numCores / ratio; } failed: fclose(cpuinfo); - return numPhysicalCores; + return numCores; } } @@ -1288,52 +1307,70 @@ failed: /* Use physical core sysctl when available * see: man 4 smp, man 3 sysctl */ -int UTIL_countPhysicalCores(void) +int UTIL_countCores(int logical) { - static int numPhysicalCores = 0; /* freebsd sysctl is native int sized */ - if (numPhysicalCores != 0) return numPhysicalCores; + static int numCores = 0; /* freebsd sysctl is native int sized */ +#if __FreeBSD_version >= 1300008 + static int perCore = 1; +#endif + if (numCores != 0) return numCores; #if __FreeBSD_version >= 1300008 - { size_t size = sizeof(numPhysicalCores); - int ret = sysctlbyname("kern.smp.cores", &numPhysicalCores, &size, NULL, 0); - if (ret == 0) return numPhysicalCores; + { size_t size = sizeof(numCores); + int ret = sysctlbyname("kern.smp.cores", &numCores, &size, NULL, 0); + if (ret == 0) { + if (logical) { + ret = sysctlbyname("kern.smp.threads_per_core", &perCore, &size, NULL, 0); + /* default to physical cores if logical cannot be read */ + if (ret == 0) + numCores *= perCore; + } + + return numCores; + } if (errno != ENOENT) { - perror("zstd: can't get number of physical cpus"); + perror("zstd: can't get number of cpus"); exit(1); } /* sysctl not present, fall through to older sysconf method */ } +#else + /* suppress unused parameter warning */ + (void) logical; #endif - numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN); - if (numPhysicalCores == -1) { + numCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numCores == -1) { /* value not queryable, fall back on 1 */ - numPhysicalCores = 1; + numCores = 1; } - return numPhysicalCores; + return numCores; } #elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__CYGWIN__) /* Use POSIX sysconf * see: man 3 sysconf */ -int UTIL_countPhysicalCores(void) +int UTIL_countCores(int logical) { - static int numPhysicalCores = 0; + /* suppress unused parameter warning */ + (void) logical; - if (numPhysicalCores != 0) return numPhysicalCores; + static int numCores = 0; - numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN); - if (numPhysicalCores == -1) { + if (numCores != 0) return numCores; + + numCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numCores == -1) { /* value not queryable, fall back on 1 */ - return numPhysicalCores = 1; + return numCores = 1; } - return numPhysicalCores; + return numCores; } #else -int UTIL_countPhysicalCores(void) +int UTIL_countCores(int logical) { /* assume 1 */ return 1; @@ -1341,6 +1378,16 @@ int UTIL_countPhysicalCores(void) #endif +int UTIL_countPhysicalCores(void) +{ + return UTIL_countCores(0); +} + +int UTIL_countLogicalCores(void) +{ + return UTIL_countCores(1); +} + #if defined (__cplusplus) } #endif diff --git a/programs/util.h b/programs/util.h index f146f3a0..c1c96343 100644 --- a/programs/util.h +++ b/programs/util.h @@ -289,13 +289,19 @@ void UTIL_refFilename(FileNamesTable* fnt, const char* filename); FileNamesTable* UTIL_createExpandedFNT(const char* const* filenames, size_t nbFilenames, int followLinks); +#if defined(_WIN32) || defined(WIN32) +DWORD CountSetBits(ULONG_PTR bitMask); +#endif /*-**************************************** * System ******************************************/ +int UTIL_countCores(int logical); + int UTIL_countPhysicalCores(void); +int UTIL_countLogicalCores(void); #if defined (__cplusplus) } diff --git a/programs/zstd.1.md b/programs/zstd.1.md index ae509286..066bcb83 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -125,6 +125,9 @@ the last one takes effect. This mode is the only one available when multithread support is disabled. Single-thread mode features lower memory usage. Final compressed result is slightly different from `-T1`. +* `--auto-threads={physical,logical} (default: physical)`: + When using a default amount of threads via `-T0`, choose the default based on the number + of detected physical or logical cores. * `--adapt[=min=#,max=#]` : `zstd` will dynamically adapt compression level to perceived I/O conditions. Compression level adaptation can be observed live by using command `-v`. diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 5dba729a..e496852f 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -213,6 +213,7 @@ static void usage_advanced(const char* programName) DISPLAYOUT( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); DISPLAYOUT( " -B# : select size of each job (default: 0==automatic) \n"); DISPLAYOUT( "--single-thread : use a single thread for both I/O and compression (result slightly different than -T1) \n"); + DISPLAYOUT( "--auto-threads={physical,logical} (default: physical} : use either physical cores or logical cores as default when specifying -T0 \n"); DISPLAYOUT( "--rsyncable : compress using a rsync-friendly method (-B sets block size) \n"); # endif DISPLAYOUT( "--exclude-compressed: only compress files that are not already compressed \n"); @@ -761,6 +762,9 @@ int main(int const argCount, const char* argv[]) separateFiles = 0, setRealTimePrio = 0, singleThread = 0, +#ifdef ZSTD_MULTITHREAD + defaultLogicalCores = 0, +#endif showDefaultCParams = 0, ultra=0, contentSize=1; @@ -950,6 +954,15 @@ int main(int const argCount, const char* argv[]) if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; } if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; } if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; } +#ifdef ZSTD_MULTITHREAD + if (longCommandWArg(&argument, "--auto-threads")) { + const char* threadDefault = NULL; + NEXT_FIELD(threadDefault); + if (strcmp(threadDefault, "logical") == 0) + defaultLogicalCores = 1; + continue; + } +#endif #ifdef UTIL_HAS_MIRRORFILELIST if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; } #endif @@ -1156,8 +1169,13 @@ int main(int const argCount, const char* argv[]) #ifdef ZSTD_MULTITHREAD if ((nbWorkers==0) && (!singleThread)) { /* automatically set # workers based on # of reported cpus */ - nbWorkers = UTIL_countPhysicalCores(); - DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers); + if (defaultLogicalCores) { + nbWorkers = UTIL_countLogicalCores(); + DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers); + } else { + nbWorkers = UTIL_countPhysicalCores(); + DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers); + } } #else (void)singleThread; (void)nbWorkers; diff --git a/tests/playTests.sh b/tests/playTests.sh index e3a8ebd2..6d202c71 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -1351,6 +1351,8 @@ if [ -n "$hasMT" ] then println "\n===> zstdmt round-trip tests " roundTripTest -g4M "1 -T0" + roundTripTest -g4M "1 -T0 --auto-threads=physical" + roundTripTest -g4M "1 -T0 --auto-threads=logical" roundTripTest -g8M "3 -T2" roundTripTest -g8M "19 --long" roundTripTest -g8000K "2 --threads=2"