first working (but ugly) version in C++
This commit is contained in:
		
							parent
							
								
									512744013e
								
							
						
					
					
						commit
						e288c12ade
					
				
							
								
								
									
										6
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								Makefile
									
									
									
									
									
								
							@ -3,15 +3,15 @@ OUTPUT:= preparse
 | 
			
		||||
 | 
			
		||||
.PHONY: test debug clean
 | 
			
		||||
 | 
			
		||||
test: debug
 | 
			
		||||
all test: debug
 | 
			
		||||
	./$(OUTPUT) test.tex
 | 
			
		||||
 | 
			
		||||
debug: CFLAGS:= -g
 | 
			
		||||
debug: CXXFLAGS:= -g
 | 
			
		||||
debug: $(OUTPUT)
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
	$(RM) $(OBJ) $(OUTPUT)
 | 
			
		||||
 | 
			
		||||
$(OUTPUT): $(OBJ)
 | 
			
		||||
	$(CC) $(OBJ) -o preparse
 | 
			
		||||
	$(CXX) $(OBJ) -o preparse
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										248
									
								
								main.c
									
									
									
									
									
								
							
							
						
						
									
										248
									
								
								main.c
									
									
									
									
									
								
							@ -1,248 +0,0 @@
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
 | 
			
		||||
typedef struct { char *start, *end; } substr_t;
 | 
			
		||||
 | 
			
		||||
inline static int substr_len(substr_t * str) {
 | 
			
		||||
	return (int)(str->end - str->start);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void substr_conv(char * src, substr_t * target) {
 | 
			
		||||
	target->start	= src;
 | 
			
		||||
	target->end 	= src + strlen(src);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// make new substr on heap using calloc
 | 
			
		||||
// does not copy take ownership of from
 | 
			
		||||
substr_t * substr_new(char * from) {
 | 
			
		||||
	substr_t * ptr = calloc(1, sizeof(substr_t));
 | 
			
		||||
	if (ptr == NULL) { return ptr; }
 | 
			
		||||
 | 
			
		||||
	ptr->start	= from;
 | 
			
		||||
	ptr->end	= ptr->start + strlen(ptr->start);
 | 
			
		||||
 | 
			
		||||
	return ptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define testEnd(current) if (*current->start == '\0' || current->start == current->end) return 0
 | 
			
		||||
#define next(current) current->start++; testEnd(current)
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
	substr_t name;
 | 
			
		||||
} macro_t;
 | 
			
		||||
 | 
			
		||||
macro_t * macro_table = NULL;
 | 
			
		||||
size_t macro_table_size = 0;
 | 
			
		||||
 | 
			
		||||
void macro_add(macro_t *m) {
 | 
			
		||||
	macro_table = realloc(macro_table, sizeof(macro_t)*(macro_table_size+1));
 | 
			
		||||
	if (macro_table== NULL) {
 | 
			
		||||
		printf("out of MEM!!\n");
 | 
			
		||||
		exit(1);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// copy macro
 | 
			
		||||
	macro_table[macro_table_size] = *m;
 | 
			
		||||
	macro_table_size++;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void macro_table_drop() {
 | 
			
		||||
	free(macro_table);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void macro_table_init() {
 | 
			
		||||
	// add default macros
 | 
			
		||||
	// /newCommand
 | 
			
		||||
	// /def
 | 
			
		||||
	// /begin
 | 
			
		||||
	// /end
 | 
			
		||||
	macro_t temp;
 | 
			
		||||
	substr_conv("newCommand", &temp.name);
 | 
			
		||||
	macro_add(&temp);
 | 
			
		||||
 | 
			
		||||
	substr_conv(&temp.name, "renewCommand", &temp.name);
 | 
			
		||||
	macro_add(&temp);
 | 
			
		||||
 | 
			
		||||
	substr_conv(&temp.name, "def");
 | 
			
		||||
	macro_add(&temp);
 | 
			
		||||
 | 
			
		||||
	substr_conv(&temp.name, "begin");
 | 
			
		||||
	macro_add(&temp);
 | 
			
		||||
 | 
			
		||||
	substr_conv(&temp.name, "end");
 | 
			
		||||
	macro_add(&temp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int macro_name_cmp(macro_t * one, macro_t * two) {
 | 
			
		||||
	// length mismatch results in failure 
 | 
			
		||||
	if (substr_len(&one->name) != substr_len(&two->name)) return 0;
 | 
			
		||||
 | 
			
		||||
	substr_t A = one->name, B = two->name;
 | 
			
		||||
 | 
			
		||||
	for(; A.start != A.end && *A.start == *B.start; A.start++, B.start++) {}
 | 
			
		||||
 | 
			
		||||
	if (A.start != A.end) {
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int macro_lookup(macro_t *m) {
 | 
			
		||||
	if (substr_len(&m->name) == 0) {
 | 
			
		||||
		return 0; // len == 0 is not allowed
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	size_t i = 0;
 | 
			
		||||
	for (; i < macro_table_size; i++) {
 | 
			
		||||
		if (macro_name_cmp(macro_table + i, m)) {
 | 
			
		||||
			break; // found it
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return i;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// tex macro calls always start with backslash
 | 
			
		||||
int parseMacro(macro_t *target, substr_t * current) {
 | 
			
		||||
	if (*current->start != '\\') {
 | 
			
		||||
		return 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	target->name.end = target->name.start = current->start;
 | 
			
		||||
 | 
			
		||||
	while((*current->start != '\0') && 
 | 
			
		||||
			substr_len(current) > 0 && 
 | 
			
		||||
			!isspace(*current->start) &&
 | 
			
		||||
			*current->start != '[' &&
 | 
			
		||||
			*current->start != '{' ) {
 | 
			
		||||
		target->name.end = ++current->start;
 | 
			
		||||
		// check for known
 | 
			
		||||
		if (macro_lookup(target)) {
 | 
			
		||||
			break;
 | 
			
		||||
			// this macros name is known, break for now
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	target->name.end = current->start;
 | 
			
		||||
 | 
			
		||||
	// read name		
 | 
			
		||||
	next(current);
 | 
			
		||||
 | 
			
		||||
	int depth = 1;
 | 
			
		||||
 | 
			
		||||
	// optional arguments
 | 
			
		||||
	if (*current->start == '[') {
 | 
			
		||||
		depth = 1;
 | 
			
		||||
		while(*current->start != '\0' && substr_len(current) > 0 && depth) {
 | 
			
		||||
			current->start++;
 | 
			
		||||
			if (*current->start == '[') depth++;
 | 
			
		||||
			if (*current->start == ']') depth--;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		next(current);	
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	// required arguments
 | 
			
		||||
	if (*current->start == '{') {
 | 
			
		||||
		depth = 1;
 | 
			
		||||
		while(*current->start != '\0' && substr_len(current) > 0 && depth) {
 | 
			
		||||
			current->start++;
 | 
			
		||||
			if (*current->start == '{') depth++;
 | 
			
		||||
			if (*current->start == '}') depth--;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// all done
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void printMacro(macro_t * macro) {
 | 
			
		||||
	printf("macro: ");
 | 
			
		||||
	char * current = macro->name.start;
 | 
			
		||||
	while(current != macro->name.end) {
 | 
			
		||||
		fputc(*current, stdout);
 | 
			
		||||
		++current;
 | 
			
		||||
	}
 | 
			
		||||
	fputc('\n', stdout);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int parseText(substr_t * str) {
 | 
			
		||||
	while(substr_len(str) > 0) {
 | 
			
		||||
		// a text can contain macro calls too
 | 
			
		||||
		while(substr_len(str) > 0 && *str->start != '\\') {
 | 
			
		||||
			++str->start;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// here starts a macro
 | 
			
		||||
		macro_t macro;
 | 
			
		||||
		if (!parseMacro(¯o, str)) {
 | 
			
		||||
			return 0; // strange
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		printMacro(¯o);
 | 
			
		||||
	}
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include <sys/mman.h> 	// for mmap()
 | 
			
		||||
#include <sys/stat.h>	// for fstat()
 | 
			
		||||
#include <fcntl.h>		// for open()
 | 
			
		||||
#include <unistd.h>		// for close()
 | 
			
		||||
#include <errno.h>	// for perror()
 | 
			
		||||
 | 
			
		||||
int main(int argc, char ** args) {
 | 
			
		||||
	// find all the files the given tex files depend on
 | 
			
		||||
 | 
			
		||||
	int fd = 0;
 | 
			
		||||
	struct stat filestat;
 | 
			
		||||
 | 
			
		||||
	for(;argc > 1; --argc) {
 | 
			
		||||
		char * filename = args[argc-1];
 | 
			
		||||
		printf("looking at %s\n", filename);
 | 
			
		||||
 | 
			
		||||
		// try to open file
 | 
			
		||||
		fd = open(filename, O_RDONLY);
 | 
			
		||||
		if (fd == -1) {
 | 
			
		||||
			perror("could not open input file");
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
		fstat(fd, &filestat);
 | 
			
		||||
		printf("file size: %d\n", filestat.st_size);	
 | 
			
		||||
		
 | 
			
		||||
		// try to mmap file
 | 
			
		||||
		char * memory_area = mmap(NULL, filestat.st_size, PROT_READ, MAP_SHARED, fd, 0);
 | 
			
		||||
		if (memory_area == NULL) {
 | 
			
		||||
			perror("could not mmap the input");
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		printf("mmap success, parse file\n");
 | 
			
		||||
		
 | 
			
		||||
		substr_t file = {
 | 
			
		||||
			.start = memory_area,
 | 
			
		||||
			.end = memory_area + filestat.st_size,
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
		macro_table_init();
 | 
			
		||||
 | 
			
		||||
		if (!parseText(&file)) {
 | 
			
		||||
			printf("error parsing!");
 | 
			
		||||
		} else {
 | 
			
		||||
			printf("parsed %s\n", filename);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// cleanup
 | 
			
		||||
		munmap(memory_area, filestat.st_size);
 | 
			
		||||
		close(fd);
 | 
			
		||||
		macro_table_drop();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	printf("done\n");
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										427
									
								
								main.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										427
									
								
								main.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,427 @@
 | 
			
		||||
/*
 | 
			
		||||
 * main.cpp
 | 
			
		||||
 *
 | 
			
		||||
 *  Created on: 07.10.2017
 | 
			
		||||
 *      Author: julian
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <sys/mman.h> 	// for mmap()
 | 
			
		||||
#include <sys/stat.h>	// for fstat()
 | 
			
		||||
#include <fcntl.h>		// for open()
 | 
			
		||||
#include <unistd.h>		// for close()
 | 
			
		||||
#include <errno.h>	// for perror()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <map>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
struct Substring {
 | 
			
		||||
	const char * start, * end;
 | 
			
		||||
 | 
			
		||||
	Substring(const char * start, const char * end) :
 | 
			
		||||
		start(start), end(end) {}
 | 
			
		||||
 | 
			
		||||
	Substring(const char * str):
 | 
			
		||||
		start(str), end(str + strlen(str))
 | 
			
		||||
	{}
 | 
			
		||||
 | 
			
		||||
	Substring():
 | 
			
		||||
		start(nullptr), end(nullptr)
 | 
			
		||||
	{}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	std::string toString() const {
 | 
			
		||||
		std::string temp;
 | 
			
		||||
		temp.reserve(size());
 | 
			
		||||
 | 
			
		||||
		const char * it = start;
 | 
			
		||||
		while(it != end) {
 | 
			
		||||
			temp += *it;
 | 
			
		||||
			it++;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return temp;
 | 
			
		||||
	}
 | 
			
		||||
	operator std::string() {
 | 
			
		||||
		return toString();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	std::string::size_type size() const {
 | 
			
		||||
		return (std::string::size_type)(end - start);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	friend std::ostream &operator<<(std::ostream &out, const Substring &str);
 | 
			
		||||
 | 
			
		||||
	template <class IterA, class IterB>
 | 
			
		||||
	int __compare(IterA a, IterA aend, IterB b, IterB bend) const {
 | 
			
		||||
		while(a != aend && b != bend) {
 | 
			
		||||
			if (*a < * b) {
 | 
			
		||||
				return -1;
 | 
			
		||||
			}
 | 
			
		||||
			if (*a > *b) {
 | 
			
		||||
				return 1;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			a++;
 | 
			
		||||
			b++;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (a == aend && b == bend) {
 | 
			
		||||
			return 0;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (a != aend) {
 | 
			
		||||
			return 1;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	int compare(const Substring &other) const {
 | 
			
		||||
		return __compare(start, end, other.start, other.end);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	int compare(const std::string &other) const {
 | 
			
		||||
		return __compare(start, end, other.begin(), other.end());
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	template <typename T>
 | 
			
		||||
	bool operator<(const T &other) const {
 | 
			
		||||
		return compare(other) < 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	template <typename T>
 | 
			
		||||
	bool operator>(const T &other) const {
 | 
			
		||||
		return compare(other) > 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	template <typename T>
 | 
			
		||||
	bool operator==(const T &other) const {
 | 
			
		||||
		return compare(other) == 0;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	template <typename T>
 | 
			
		||||
	bool operator!=(const T &other) const {
 | 
			
		||||
		return !(*this == other);
 | 
			
		||||
	}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
std::ostream &operator<<(std::ostream &out, const Substring &str) {
 | 
			
		||||
	auto temp = str.start;
 | 
			
		||||
	while(temp != str.end) {
 | 
			
		||||
		out << *temp;
 | 
			
		||||
		temp++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class InputExtractor
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
	class Exception : public std::runtime_error {
 | 
			
		||||
	public:
 | 
			
		||||
		Exception(const std::string &str) : std::runtime_error(str) {}
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	typedef std::vector<std::string> List;
 | 
			
		||||
	List operator()(const Substring &input);
 | 
			
		||||
 | 
			
		||||
	std::string macroExpand(Substring input);
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
	Substring file;
 | 
			
		||||
	std::map<std::string, Substring> macros;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
Substring readBrackets(Substring &input, const char * brackets) {
 | 
			
		||||
	if (input.size() <= 0 || *input.start != brackets[0]) {
 | 
			
		||||
		cout << "expected " << brackets[0] << ", got: '" << *input.start << "'";
 | 
			
		||||
		return Substring();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	input.start++;
 | 
			
		||||
 | 
			
		||||
	int depth = 1;
 | 
			
		||||
	Substring result(input.start, input.start);
 | 
			
		||||
 | 
			
		||||
	while(depth > 0 && input.size() > 0) {
 | 
			
		||||
		result.end = ++input.start;
 | 
			
		||||
 | 
			
		||||
		if (*input.start == brackets[0]) {
 | 
			
		||||
			depth++;
 | 
			
		||||
		}
 | 
			
		||||
		if (*input.start == brackets[1]) {
 | 
			
		||||
			depth--;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	// advance beyond last bracket
 | 
			
		||||
	if (input.size())
 | 
			
		||||
		input.start++;
 | 
			
		||||
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string InputExtractor::macroExpand(Substring input) {
 | 
			
		||||
	std::string result;
 | 
			
		||||
	Substring name;
 | 
			
		||||
	std::map<std::string, Substring>::iterator lookup;
 | 
			
		||||
	//cout << "expanding: " << input << endl;
 | 
			
		||||
 | 
			
		||||
	while(input.size() > 0) {
 | 
			
		||||
		if (*input.start == '\\') {
 | 
			
		||||
			input.start++;
 | 
			
		||||
			name.start = name.end = input.start;
 | 
			
		||||
 | 
			
		||||
			while(input.size() > 0) {
 | 
			
		||||
				name.end = ++input.start;
 | 
			
		||||
 | 
			
		||||
				if ((lookup = macros.find(name.toString())) != macros.end()) {
 | 
			
		||||
					break;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if (lookup == macros.end()) {
 | 
			
		||||
				throw Exception("unknown macro in macro expansion: " + name.toString());
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			result += lookup->second.toString();
 | 
			
		||||
		} else {
 | 
			
		||||
			result += *input.start;
 | 
			
		||||
		}
 | 
			
		||||
		input.start++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#include <functional>
 | 
			
		||||
typedef std::map<std::string, std::function<void(InputExtractor::List&, std::string)>> CommandList;
 | 
			
		||||
 | 
			
		||||
std::string Extension(std::string str) {
 | 
			
		||||
	auto it = str.end();
 | 
			
		||||
	while(it != str.begin() && *it != '.' && *it != '/' && *it != '\\') {
 | 
			
		||||
		it--;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return std::string(it, str.end());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string Basedir(std::string path) {
 | 
			
		||||
	auto it = path.end();
 | 
			
		||||
	while(it != path.begin() && *it != '/' && *it != '\\') {
 | 
			
		||||
		it--;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return std::string(path.begin(), it+1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string Name(std::string path) {
 | 
			
		||||
	auto it = path.end();
 | 
			
		||||
	while(it != path.begin() && *it != '/' && *it != '\\') {
 | 
			
		||||
		it--;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return std::string(it, path.end());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string Basename(std::string path) {
 | 
			
		||||
	std::string temp = Name(path);
 | 
			
		||||
 | 
			
		||||
	auto it = temp.end();
 | 
			
		||||
	while(it != temp.begin() && *it != '.') {
 | 
			
		||||
		it--;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return std::string(temp.begin(), it);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#include <fstream>
 | 
			
		||||
 | 
			
		||||
bool Exists(std::string path) {
 | 
			
		||||
	std::ifstream file(path);
 | 
			
		||||
	if (!file) {
 | 
			
		||||
		return false;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	file.close();
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
InputExtractor::List Include(std::string path) {
 | 
			
		||||
	InputExtractor::List list;
 | 
			
		||||
 | 
			
		||||
	int fd = open(path.c_str(), O_RDONLY);
 | 
			
		||||
	if (fd == -1) {
 | 
			
		||||
		cerr << "cannot open " << path << endl;
 | 
			
		||||
		return list;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	struct stat fileinfo;
 | 
			
		||||
	if (fstat(fd, &fileinfo) == -1) {
 | 
			
		||||
		perror("stat");
 | 
			
		||||
		close(fd);
 | 
			
		||||
		return list;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	void * memptr = mmap(NULL, fileinfo.st_size, PROT_READ, MAP_SHARED, fd, 0);
 | 
			
		||||
 | 
			
		||||
	if (memptr == NULL) {
 | 
			
		||||
		perror("mmap");
 | 
			
		||||
		close(fd);
 | 
			
		||||
		return list;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	Substring str((const char *)memptr, (const char*)memptr + fileinfo.st_size);
 | 
			
		||||
 | 
			
		||||
	std::string basedir = Basedir(path);
 | 
			
		||||
	list = InputExtractor()(str);
 | 
			
		||||
	// add basedir to list
 | 
			
		||||
	for (auto it = list.begin(); it != list.end(); it++) {
 | 
			
		||||
		*it = basedir + '/' + *it;
 | 
			
		||||
	}
 | 
			
		||||
	// cleanup
 | 
			
		||||
	munmap(memptr, fileinfo.st_size);
 | 
			
		||||
	close(fd);
 | 
			
		||||
	return list;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
InputExtractor::List InputExtractor::operator()(const Substring &input){
 | 
			
		||||
	file = input;
 | 
			
		||||
	List result;
 | 
			
		||||
	CommandList IncludeCommands;
 | 
			
		||||
 | 
			
		||||
	IncludeCommands["input"] = [](List &l, std::string a) {
 | 
			
		||||
		if (Extension(a) != ".tex") a += ".tex";
 | 
			
		||||
		l.push_back(a);
 | 
			
		||||
		// try to extract all inputs of that file
 | 
			
		||||
		auto sub = Include(a);
 | 
			
		||||
		std::copy(sub.begin(), sub.end(), std::inserter(l, l.end()));
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	IncludeCommands["include"] = IncludeCommands["input"];
 | 
			
		||||
	IncludeCommands["lstinputlisting"] = [](List &l, std::string a){ l.push_back(a); };
 | 
			
		||||
 | 
			
		||||
	// skip normie text
 | 
			
		||||
	while(file.size()) {
 | 
			
		||||
 | 
			
		||||
		if (*file.start == '%') {
 | 
			
		||||
			// line commment
 | 
			
		||||
			while(file.size() > 0 && *file.start != '\n')
 | 
			
		||||
				file.start++;
 | 
			
		||||
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
		if (*file.start != '\\') {
 | 
			
		||||
			file.start++;
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// read macro name
 | 
			
		||||
		file.start++;
 | 
			
		||||
 | 
			
		||||
		// TODO: throw exception
 | 
			
		||||
		if (!file.size())
 | 
			
		||||
			throw Exception("unexpected EOF");
 | 
			
		||||
 | 
			
		||||
		Substring name;
 | 
			
		||||
		name.start = name.end = file.start;
 | 
			
		||||
		for(name.end = ++file.start; file.size() > 0 && !isspace(*file.start) && *file.start != '{' && *file.start != '\\'; name.end = (++file.start+1)) {
 | 
			
		||||
			auto searchHit = IncludeCommands.find(name);
 | 
			
		||||
			if ((searchHit = IncludeCommands.find(name)) != IncludeCommands.end()) {
 | 
			
		||||
				file.start++;
 | 
			
		||||
				cout << searchHit->first;
 | 
			
		||||
				cout << "[" << readBrackets(file, "[]") << "]";
 | 
			
		||||
				auto args = readBrackets(file, "{}");
 | 
			
		||||
				cout << ":" << args << endl;
 | 
			
		||||
				searchHit->second(result, macroExpand(args));
 | 
			
		||||
				break;
 | 
			
		||||
			} else if (name == std::string("def")) {
 | 
			
		||||
				file.start++;
 | 
			
		||||
 | 
			
		||||
				if (file.size() <= 0 || *file.start != '\\') {
 | 
			
		||||
					continue;
 | 
			
		||||
				}
 | 
			
		||||
				file.start++;
 | 
			
		||||
 | 
			
		||||
				Substring name(file.start, file.end);
 | 
			
		||||
				while (file.size() > 0 && *file.start != '{' && !isspace(*file.start)) {
 | 
			
		||||
					name.end = ++file.start;
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				cout << "new macro definition: " << name << endl;
 | 
			
		||||
				macros.insert(std::pair<std::string, Substring>(name.toString(), readBrackets(file, "{}")));
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char ** args) {
 | 
			
		||||
	// find all the files the given tex files depend on
 | 
			
		||||
 | 
			
		||||
	int fd = 0;
 | 
			
		||||
	struct stat filestat;
 | 
			
		||||
 | 
			
		||||
	for(;argc > 1; --argc) {
 | 
			
		||||
		char * filename = args[argc-1];
 | 
			
		||||
		cout << "looking at " << filename << std::endl;
 | 
			
		||||
 | 
			
		||||
		// try to open file
 | 
			
		||||
		fd = open(filename, O_RDONLY);
 | 
			
		||||
		if (fd == -1) {
 | 
			
		||||
			perror("could not open input file");
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		fstat(fd, &filestat);
 | 
			
		||||
		//cout << "file size: " << filestat.st_size << endl;
 | 
			
		||||
 | 
			
		||||
		// try to mmap file
 | 
			
		||||
		void * memory_area = mmap(NULL, filestat.st_size, PROT_READ, MAP_SHARED, fd, 0);
 | 
			
		||||
		if (memory_area == nullptr) {
 | 
			
		||||
			perror("could not mmap the input");
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		//cout << "mmap success, parse file" << endl;
 | 
			
		||||
 | 
			
		||||
		Substring file((const char*)memory_area, (const char*)memory_area + filestat.st_size);
 | 
			
		||||
		try {
 | 
			
		||||
			auto list = InputExtractor()(file);
 | 
			
		||||
			// write in makefile style
 | 
			
		||||
			std::ofstream output(Basename(filename) + ".d");
 | 
			
		||||
			if (!output) {
 | 
			
		||||
				std::cout << "could not create output file" << std::endl;
 | 
			
		||||
			} else {
 | 
			
		||||
				output << "filename: ";
 | 
			
		||||
				for (auto it = list.begin(); it != list.end(); it++) {
 | 
			
		||||
					output << *it << "\\\\\n";
 | 
			
		||||
				}
 | 
			
		||||
				output << endl;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			output.close();
 | 
			
		||||
		} catch(InputExtractor::Exception &e) {
 | 
			
		||||
			cout << e.what() << endl;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// cleanup
 | 
			
		||||
		munmap(memory_area, filestat.st_size);
 | 
			
		||||
		close(fd);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	printf("done\n");
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user