Test and tidy

Made the Python more Python-like. Added notes and general tidy. Tested exclusions and building with various options. Tested all scripts.
2022-01-19 11:32:53 +01:00 · 2022-01-19 11:32:53 +01:00 · 7d90f0b520
parent 8f1e51f99f
commit 7d90f0b520
7 changed files with 133 additions and 118 deletions
--- a/build/single_file_libs/build_library_test.sh
+++ b/build/single_file_libs/build_library_test.sh
@ -69,7 +69,7 @@ fi
 echo "Single file library creation script: PASSED"

 # Copy the header to here (for the tests)
-cp "$ZSTD_SRC_ROOT/zstd.h" zstd.h
+cp "$ZSTD_SRC_ROOT/zstd.h" examples/zstd.h

 # Compile the generated output
 cc -Wall -Wextra -Werror -Wshadow -pthread -I. -Os -g0 -o $OUT_FILE zstd.c examples/roundtrip.c
--- a/build/single_file_libs/combine.py
+++ b/build/single_file_libs/combine.py
@ -2,6 +2,18 @@

 # Tool to bundle multiple C/C++ source files, inlining any includes.
 # 
+# Note: there are two types of exclusion options: the '-x' flag, which besides
+# excluding a file also adds an #error directive in place of the #include, and
+# the '-k' flag, which keeps the #include and doesn't inline the file. The
+# intended use cases are: '-x' for files that would normally be #if'd out, so
+# features that 100% won't be used in the amalgamated file, for which every
+# occurrence adds the error, and '-k' for headers that we wish to manually
+# include, such as a project's public API, for which occurrences after the first
+# are removed.
+# 
+# Todo: the error handling could be better, which currently throws and halts
+# (which is functional just not very friendly).
+# 
 # Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)

 import argparse, re, sys
@ -31,18 +43,18 @@ found: Set[Path] = set()

 # Compiled regex Patern to handle the following type of file includes:
 # 
-#	#include "file"
-#	  #include "file"
-#	#  include "file"
-#	#include   "file"
-#	#include "file" // comment
-#	#include "file" // comment with quote "
+#   #include "file"
+#     #include "file"
+#   #  include "file"
+#   #include   "file"
+#   #include "file" // comment
+#   #include "file" // comment with quote "
 # 
 # And all combinations of, as well as ignoring the following:
 # 
-#	#include <file>
-#	//#include "file"
-#	/*#include "file"*/
+#   #include <file>
+#   //#include "file"
+#   /*#include "file"*/
 # 
 # We don't try to catch errors since the compiler will do this (and the code is
 # expected to be valid before processing) and we don't care what follows the
@ -54,27 +66,27 @@ include_regex: Pattern = re.compile(r'^\s*#\s*include\s*"(.+?)"')
 # Simple tests to prove include_regex's cases.
 # 
 def test_match_include() -> bool:
-	if (include_regex.match('#include "file"')   and
-		include_regex.match('  #include "file"') and
-		include_regex.match('#  include "file"') and
-		include_regex.match('#include   "file"') and
-		include_regex.match('#include "file" // comment')):
-			if (not include_regex.match('#include <file>')   and
-				not include_regex.match('//#include "file"') and
-				not include_regex.match('/*#include "file"*/')):
-					found = include_regex.match('#include "file" // "')
-					if (found and found.group(1) == 'file'):
-						print('#include match valid')
-						return True
-	return False
+    if (include_regex.match('#include "file"')   and
+        include_regex.match('  #include "file"') and
+        include_regex.match('#  include "file"') and
+        include_regex.match('#include   "file"') and
+        include_regex.match('#include "file" // comment')):
+            if (not include_regex.match('#include <file>')   and
+                not include_regex.match('//#include "file"') and
+                not include_regex.match('/*#include "file"*/')):
+                    found = include_regex.match('#include "file" // "')
+                    if (found and found.group(1) == 'file'):
+                        print('#include match valid')
+                        return True
+    return False

 # Compiled regex Patern to handle "#pragma once" in various formats:
 # 
-#	#pragma once
-#	  #pragma once
-#	#  pragma once
-#	#pragma   once
-#	#pragma once // comment
+#   #pragma once
+#     #pragma once
+#   #  pragma once
+#   #pragma   once
+#   #pragma once // comment
 # 
 # Ignoring commented versions, same as include_regex.
 # 
@ -83,103 +95,105 @@ pragma_regex: Pattern = re.compile(r'^\s*#\s*pragma\s*once\s*')
 # Simple tests to prove pragma_regex's cases.
 # 
 def text_match_pragma() -> bool:
-	if (pragma_regex.match('#pragma once')   and
-		pragma_regex.match('  #pragma once') and
-		pragma_regex.match('#  pragma once') and
-		pragma_regex.match('#pragma   once') and
-		pragma_regex.match('#pragma once // comment')):
-			if (not pragma_regex.match('//#pragma once') and
-				not pragma_regex.match('/*#pragma once*/')):
-					print('#pragma once match valid')
-					return True
-	return False
+    if (pragma_regex.match('#pragma once')   and
+        pragma_regex.match('  #pragma once') and
+        pragma_regex.match('#  pragma once') and
+        pragma_regex.match('#pragma   once') and
+        pragma_regex.match('#pragma once // comment')):
+            if (not pragma_regex.match('//#pragma once') and
+                not pragma_regex.match('/*#pragma once*/')):
+                    print('#pragma once match valid')
+                    return True
+    return False

 # Finds 'file'. First the currently processing file's 'parent' path is looked at
 # for a match, followed by the list of 'root' paths, returning a valid Path in
 # canonical form. If no match is found None is returned.
 # 
 def resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]:
-	if (parent):
-		found = parent.joinpath(file).resolve();
-	else:
-		found = Path(file)
-	if (found.is_file()):
-		return found
-	for root in roots:
-		found = root.joinpath(file).resolve()
-		if (found.is_file()):
-			return found
-	return None
+    if (parent):
+        found = parent.joinpath(file).resolve();
+    else:
+        found = Path(file)
+    if (found.is_file()):
+        return found
+    for root in roots:
+        found = root.joinpath(file).resolve()
+        if (found.is_file()):
+            return found
+    return None

 # Helper to resolve lists of files. 'file_list' is passed in from the arguments
 # and each entry resolved to its canonical path (like any include entry, either
 # from the list of root paths or the owning file's 'parent', which in this case
 # is case is the input file). The results are stored in 'resolved'.
 # 
-def resolve_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None:
-	if (file_list):
-		for filename in file_list:
-			found = resolve_include(filename, parent)
-			if (found):
-				resolved.add(found)
-			else:
-				error_line(f'Warning: excluded file not found: {filename}')
+def resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None:
+    if (file_list):
+        for filename in file_list:
+            found = resolve_include(filename, parent)
+            if (found):
+                resolved.add(found)
+            else:
+                error_line(f'Warning: excluded file not found: {filename}')

 # Writes 'line' to the open 'destn' (or stdout).
 # 
 def write_line(line: str) -> None:
-	print(line, file=destn)
+    print(line, file=destn)

 # Logs 'line' to stderr. This is also used for general notifications that we
 # don't want to go to stdout (so the source can be piped).
 # 
 def error_line(line: Any) -> None:
-	print(line, file=sys.stderr)
+    print(line, file=sys.stderr)

 # Inline the contents of 'file' (with any of its includes also inlined, etc.).
 # 
-def add_file(file: Path) -> None:
-	if (file.is_file()):
-		error_line(f'Processing: {file}')
-		with file.open('r') as opened:
-			for line in opened:
-				line = line.rstrip('\n')
-				match_include = include_regex.match(line);
-				if (match_include):
-					# We have a quoted include directive so grab the file
-					inc_name = match_include.group(1)
-					resolved = resolve_include(inc_name, file.parent)
-					if (resolved):
-						if (resolved in excludes):
-							# The file was excluded so error if the source attempts to use it
-							write_line(f'#error Using excluded file: {inc_name}')
-							error_line(f'Excluding: {inc_name}')
-						else:
-							if (resolved not in found):
-								# The file was not previously encountered
-								found.add(resolved)
-								if (resolved in keeps):
-									# But the include was flagged to keep as included
-									write_line(f'/**** *NOT* inlining {inc_name} ****/')
-									write_line(line)
-									error_line('Not Inlining: {inc_name}')
-								else:
-									 # The file was neither excluded nor seen before so inline it
-									write_line(f'/**** start inlining {inc_name} ****/')
-									add_file(resolved)
-									write_line(f'/**** ended inlining {inc_name} ****/')
-							else:
-								write_line(f'/**** skipping file: {inc_name} ****/')
-					else:
-						# The include file didn't resolve to a file
-						write_line(f'#error Unable to find: {inc_name}')
-						error_line(f'Error: Unable to find: {inc_name}')
-				else:
-					# Skip any 'pragma once' directives, otherwise write the source line
-					if (keep_pragma or not pragma_regex.match(line)):
-						write_line(line)
-	else:
-		error_line(f'Error: Invalid file: {file}')
+def add_file(file: Path, file_name: str = None) -> None:
+    if (file.is_file()):
+        if (not file_name):
+            file_name = file.name
+        error_line(f'Processing: {file_name}')
+        with file.open('r') as opened:
+            for line in opened:
+                line = line.rstrip('\n')
+                match_include = include_regex.match(line);
+                if (match_include):
+                    # We have a quoted include directive so grab the file
+                    inc_name = match_include.group(1)
+                    resolved = resolve_include(inc_name, file.parent)
+                    if (resolved):
+                        if (resolved in excludes):
+                            # The file was excluded so error if the compiler uses it
+                            write_line(f'#error Using excluded file: {inc_name}')
+                            error_line(f'Excluding: {inc_name}')
+                        else:
+                            if (resolved not in found):
+                                # The file was not previously encountered
+                                found.add(resolved)
+                                if (resolved in keeps):
+                                    # But the include was flagged to keep as included
+                                    write_line(f'/**** *NOT* inlining {inc_name} ****/')
+                                    write_line(line)
+                                    error_line(f'Not inlining: {inc_name}')
+                                else:
+                                    # The file was neither excluded nor seen before so inline it
+                                    write_line(f'/**** start inlining {inc_name} ****/')
+                                    add_file(resolved, inc_name)
+                                    write_line(f'/**** ended inlining {inc_name} ****/')
+                            else:
+                                write_line(f'/**** skipping file: {inc_name} ****/')
+                    else:
+                        # The include file didn't resolve to a file
+                        write_line(f'#error Unable to find: {inc_name}')
+                        error_line(f'Error: Unable to find: {inc_name}')
+                else:
+                    # Skip any 'pragma once' directives, otherwise write the source line
+                    if (keep_pragma or not pragma_regex.match(line)):
+                        write_line(line)
+    else:
+        error_line(f'Error: Invalid file: {file}')

 # Start here
 parser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c')
@ -197,19 +211,19 @@ found.add(args.input)

 # Resolve all of the root paths upfront (we'll halt here on invalid roots)
 if (args.root):
-	for path in args.root:
-		roots.add(path.resolve(strict=True))
+    for path in args.root:
+        roots.add(path.resolve(strict=True))

 # The remaining params: so resolve the excluded files and #pragma once directive
-resolve_files(args.exclude, excludes, args.input.parent)
-resolve_files(args.keep,    keeps,    args.input.parent)
+resolve_excluded_files(args.exclude, excludes, args.input.parent)
+resolve_excluded_files(args.keep,    keeps,    args.input.parent)
 keep_pragma = args.pragma;

 # Then recursively process the input file
 try:
-	if (args.output):
-		destn = args.output
-	add_file(args.input)
+    if (args.output):
+        destn = args.output
+    add_file(args.input)
 finally:
-	if (not destn):
-		destn.close()
+    if (not destn):
+        destn.close()
--- a/build/single_file_libs/combine.sh
+++ b/build/single_file_libs/combine.sh
@ -200,6 +200,7 @@ if [ -n "$1" ]; then
      printf "" > "$DESTN"
    fi
    test_deps
+    log_line "Processing using the slower shell script; this might take a while"
    add_file "$1"
  else
    echo "Input file not found: \"$1\""
--- a/build/single_file_libs/create_single_file_decoder.sh
+++ b/build/single_file_libs/create_single_file_decoder.sh
@ -4,12 +4,12 @@
 ZSTD_SRC_ROOT="../../lib"

 # Amalgamate the sources
-echo "Amalgamating files... this can take a while"
+echo "Amalgamating files..."
 # Using the faster Python script if we have 3.8 or higher
 if python3 -c 'import sys; assert sys.version_info >= (3,8)' 2>/dev/null; then
-  ./combine.py -r "$ZSTD_SRC_ROOT" -o zstddeclib.c zstddeclib-in.c
+  ./combine.py -r "$ZSTD_SRC_ROOT" -x legacy/zstd_legacy.h -o zstddeclib.c zstddeclib-in.c
 else
-  ./combine.sh -r "$ZSTD_SRC_ROOT" -o zstddeclib.c zstddeclib-in.c
+  ./combine.sh -r "$ZSTD_SRC_ROOT" -x legacy/zstd_legacy.h -o zstddeclib.c zstddeclib-in.c
 fi
 # Did combining work?
 if [ $? -ne 0 ]; then
--- a/build/single_file_libs/create_single_file_library.sh
+++ b/build/single_file_libs/create_single_file_library.sh
@ -4,12 +4,12 @@
 ZSTD_SRC_ROOT="../../lib"

 # Amalgamate the sources
-echo "Amalgamating files... this can take a while"
+echo "Amalgamating files..."
 # Using the faster Python script if we have 3.8 or higher
 if python3 -c 'import sys; assert sys.version_info >= (3,8)' 2>/dev/null; then
-  ./combine.py -r "$ZSTD_SRC_ROOT" -o zstd.c zstd-in.c
+  ./combine.py -r "$ZSTD_SRC_ROOT" -x legacy/zstd_legacy.h -o zstd.c zstd-in.c
 else
-  ./combine.sh -r "$ZSTD_SRC_ROOT" -o zstd.c zstd-in.c
+  ./combine.sh -r "$ZSTD_SRC_ROOT" -x legacy/zstd_legacy.h -o zstd.c zstd-in.c
 fi
 # Did combining work?
 if [ $? -ne 0 ]; then
--- a/build/single_file_libs/zstd-in.c
+++ b/build/single_file_libs/zstd-in.c
@ -4,7 +4,7 @@
 *
 * Generate using:
 * \code
- *	combine.sh -r ../../lib -o zstd.c zstd-in.c
+ *	combine.sh -r ../../lib -x legacy/zstd_legacy.h -o zstd.c zstd-in.c
 * \endcode
 */
 /*
--- a/build/single_file_libs/zstddeclib-in.c
+++ b/build/single_file_libs/zstddeclib-in.c
@ -4,7 +4,7 @@
 *
 * Generate using:
 * \code
- *	combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c
+ *	combine.sh -r ../../lib -x legacy/zstd_legacy.h -o zstddeclib.c zstddeclib-in.c
 * \endcode
 */
 /*