Using faster Python script to amalgamate
parent
f4a552a3fa
commit
7e50d1e8c1
|
@ -0,0 +1,156 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Tool to bundle multiple C/C++ source files, inlining any includes.
|
||||
#
|
||||
# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)
|
||||
|
||||
import argparse, os, re, sys
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# File roots when searching (equivalent to -I paths for the compiler).
|
||||
roots = set()
|
||||
|
||||
# File Path objects previously inlined.
|
||||
found = set()
|
||||
|
||||
# Destination file object (or stdout if no output file was supplied).
|
||||
destn = None
|
||||
|
||||
# Regex to handle the following type of file includes:
|
||||
#
|
||||
# #include "file"
|
||||
# #include "file"
|
||||
# # include "file"
|
||||
# #include "file"
|
||||
# #include "file" // comment
|
||||
# #include "file" // comment with quote "
|
||||
#
|
||||
# And all combinations of, as well as ignoring the following:
|
||||
#
|
||||
# #include <file>
|
||||
# //#include "file"
|
||||
# /*#include "file"*/
|
||||
#
|
||||
# We don't try to catch errors since the compiler will do this (and the code is
|
||||
# expected to be valid before processing) and we don't care what follows the
|
||||
# file (whether it's a valid comment or not, since anything after the quoted
|
||||
# string is ignored)
|
||||
#
|
||||
include_regex = re.compile(r'^\s*#\s*include\s*"(.+?)"')
|
||||
|
||||
# Simple tests to prove include_regex's cases.
|
||||
#
|
||||
def test_match_include():
|
||||
if (include_regex.match('#include "file"') and
|
||||
include_regex.match(' #include "file"') and
|
||||
include_regex.match('# include "file"') and
|
||||
include_regex.match('#include "file"') and
|
||||
include_regex.match('#include "file" // comment')):
|
||||
if (not include_regex.match('#include <file>') and
|
||||
not include_regex.match('//#include "file"') and
|
||||
not include_regex.match('/*#include "file"*/')):
|
||||
found = include_regex.match('#include "file" // "')
|
||||
if (found and found.group(1) == 'file'):
|
||||
print('#include match valid')
|
||||
return True
|
||||
return False
|
||||
|
||||
# Regex to handle "#pragma once" in various formats:
|
||||
#
|
||||
# #pragma once
|
||||
# #pragma once
|
||||
# # pragma once
|
||||
# #pragma once
|
||||
# #pragma once // comment
|
||||
#
|
||||
# Ignoring commented versions, same as include_regex.
|
||||
#
|
||||
pragma_regex = re.compile(r'^\s*#\s*pragma\s*once\s*')
|
||||
|
||||
# Simple tests to prove pragma_regex's cases.
|
||||
#
|
||||
def text_match_pragma():
|
||||
if (pragma_regex.match('#pragma once') and
|
||||
pragma_regex.match(' #pragma once') and
|
||||
pragma_regex.match('# pragma once') and
|
||||
pragma_regex.match('#pragma once') and
|
||||
pragma_regex.match('#pragma once // comment')):
|
||||
if (not pragma_regex.match('//#pragma once') and
|
||||
not pragma_regex.match('/*#pragma once*/')):
|
||||
print('#pragma once match valid')
|
||||
return True
|
||||
return False
|
||||
|
||||
# Finds 'file'. First the currently processing file's 'parent' path is looked at
|
||||
# for a match, followed by the list of 'root', returning a valid Path in
|
||||
# canonical form. If no match is found None is returned.
|
||||
#
|
||||
def resolve_include(parent: Path, file: str):
|
||||
found = parent.joinpath(file).resolve();
|
||||
if (found.is_file()):
|
||||
return found
|
||||
for root in roots:
|
||||
found = root.joinpath(file).resolve()
|
||||
if (found.is_file()):
|
||||
return found
|
||||
return None
|
||||
|
||||
# Writes 'line' to the open file 'destn' (or stdout).
|
||||
#
|
||||
def write_line(line):
|
||||
print(line, file=destn)
|
||||
|
||||
# Logs 'line' to stderr.
|
||||
#
|
||||
def log_line(line):
|
||||
print(line, file=sys.stderr)
|
||||
|
||||
def add_file(file):
|
||||
if (isinstance(file, Path) and file.is_file()):
|
||||
log_line(f'Processing: {file}')
|
||||
with file.open('r') as opened:
|
||||
for line in opened:
|
||||
line = line.rstrip('\n')
|
||||
match_include = include_regex.match(line);
|
||||
if (match_include):
|
||||
inc_name = match_include.group(1)
|
||||
resolved = resolve_include(file.parent, inc_name)
|
||||
if (resolved not in found):
|
||||
# The file was not previously encountered
|
||||
found.add(resolved)
|
||||
write_line(f'/**** start inlining {inc_name} ****/')
|
||||
add_file(resolved)
|
||||
write_line(f'/**** ended inlining {inc_name} ****/')
|
||||
else:
|
||||
write_line(f'/**** skipping file: {inc_name} ****/')
|
||||
else:
|
||||
if (not pragma_regex.match(line)):
|
||||
write_line(line)
|
||||
else:
|
||||
log_line(f'Error: Unable to find: {file}')
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c')
|
||||
parser.add_argument('-r', '--root', action='append', type=Path, help='file root search path')
|
||||
parser.add_argument('-x', '--exclude', action='append', help='file to completely exclude from inlining')
|
||||
parser.add_argument('-k', '--keep', action='append', help='file to exclude from inlining but keep the include directive')
|
||||
parser.add_argument('-p', '--pragma', action='store_true', default=False, help='keep any "#pragma once" directives (removed by default)')
|
||||
parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='output file (otherwise stdout)')
|
||||
parser.add_argument('input', type=Path, help='input file')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve all of the root paths upfront (we'll halt here on invalid roots)
|
||||
if (args.root is not None):
|
||||
for path in args.root:
|
||||
roots.add(path.resolve(strict=True))
|
||||
|
||||
try:
|
||||
if (args.output is None):
|
||||
destn = sys.stdout
|
||||
else:
|
||||
destn = args.output
|
||||
add_file(args.input)
|
||||
finally:
|
||||
if (destn is not None):
|
||||
destn.close()
|
|
@ -5,7 +5,12 @@ ZSTD_SRC_ROOT="../../lib"
|
|||
|
||||
# Amalgamate the sources
|
||||
echo "Amalgamating files... this can take a while"
|
||||
./combine.sh -r "$ZSTD_SRC_ROOT" -o zstddeclib.c zstddeclib-in.c
|
||||
# Using the faster Python script if we have 3.8 or higher
|
||||
if python3 -c 'import sys; assert sys.version_info >= (3,8)' 2>/dev/null; then
|
||||
./combine.py -r "$ZSTD_SRC_ROOT" -o zstddeclib.c zstddeclib-in.c
|
||||
else
|
||||
./combine.sh -r "$ZSTD_SRC_ROOT" -o zstddeclib.c zstddeclib-in.c
|
||||
fi
|
||||
# Did combining work?
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Combine script: FAILED"
|
||||
|
|
|
@ -5,7 +5,12 @@ ZSTD_SRC_ROOT="../../lib"
|
|||
|
||||
# Amalgamate the sources
|
||||
echo "Amalgamating files... this can take a while"
|
||||
./combine.sh -r "$ZSTD_SRC_ROOT" -o zstd.c zstd-in.c
|
||||
# Using the faster Python script if we have 3.8 or higher
|
||||
if python3 -c 'import sys; assert sys.version_info >= (3,8)' 2>/dev/null; then
|
||||
./combine.py -r "$ZSTD_SRC_ROOT" -o zstd.c zstd-in.c
|
||||
else
|
||||
./combine.sh -r "$ZSTD_SRC_ROOT" -o zstd.c zstd-in.c
|
||||
fi
|
||||
# Did combining work?
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Combine script: FAILED"
|
||||
|
|
Loading…
Reference in New Issue