first working (but ugly) version in C++
This commit is contained in:
parent
512744013e
commit
e288c12ade
6
Makefile
6
Makefile
@ -3,15 +3,15 @@ OUTPUT:= preparse
|
||||
|
||||
.PHONY: test debug clean
|
||||
|
||||
test: debug
|
||||
all test: debug
|
||||
./$(OUTPUT) test.tex
|
||||
|
||||
debug: CFLAGS:= -g
|
||||
debug: CXXFLAGS:= -g
|
||||
debug: $(OUTPUT)
|
||||
|
||||
clean:
|
||||
$(RM) $(OBJ) $(OUTPUT)
|
||||
|
||||
$(OUTPUT): $(OBJ)
|
||||
$(CC) $(OBJ) -o preparse
|
||||
$(CXX) $(OBJ) -o preparse
|
||||
|
||||
|
248
main.c
248
main.c
@ -1,248 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct { char *start, *end; } substr_t;
|
||||
|
||||
inline static int substr_len(substr_t * str) {
|
||||
return (int)(str->end - str->start);
|
||||
}
|
||||
|
||||
void substr_conv(char * src, substr_t * target) {
|
||||
target->start = src;
|
||||
target->end = src + strlen(src);
|
||||
}
|
||||
|
||||
// make new substr on heap using calloc
|
||||
// does not copy take ownership of from
|
||||
substr_t * substr_new(char * from) {
|
||||
substr_t * ptr = calloc(1, sizeof(substr_t));
|
||||
if (ptr == NULL) { return ptr; }
|
||||
|
||||
ptr->start = from;
|
||||
ptr->end = ptr->start + strlen(ptr->start);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define testEnd(current) if (*current->start == '\0' || current->start == current->end) return 0
|
||||
#define next(current) current->start++; testEnd(current)
|
||||
|
||||
typedef struct {
|
||||
substr_t name;
|
||||
} macro_t;
|
||||
|
||||
macro_t * macro_table = NULL;
|
||||
size_t macro_table_size = 0;
|
||||
|
||||
void macro_add(macro_t *m) {
|
||||
macro_table = realloc(macro_table, sizeof(macro_t)*(macro_table_size+1));
|
||||
if (macro_table== NULL) {
|
||||
printf("out of MEM!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// copy macro
|
||||
macro_table[macro_table_size] = *m;
|
||||
macro_table_size++;
|
||||
}
|
||||
|
||||
void macro_table_drop() {
|
||||
free(macro_table);
|
||||
}
|
||||
|
||||
void macro_table_init() {
|
||||
// add default macros
|
||||
// /newCommand
|
||||
// /def
|
||||
// /begin
|
||||
// /end
|
||||
macro_t temp;
|
||||
substr_conv("newCommand", &temp.name);
|
||||
macro_add(&temp);
|
||||
|
||||
substr_conv(&temp.name, "renewCommand", &temp.name);
|
||||
macro_add(&temp);
|
||||
|
||||
substr_conv(&temp.name, "def");
|
||||
macro_add(&temp);
|
||||
|
||||
substr_conv(&temp.name, "begin");
|
||||
macro_add(&temp);
|
||||
|
||||
substr_conv(&temp.name, "end");
|
||||
macro_add(&temp);
|
||||
}
|
||||
|
||||
int macro_name_cmp(macro_t * one, macro_t * two) {
|
||||
// length mismatch results in failure
|
||||
if (substr_len(&one->name) != substr_len(&two->name)) return 0;
|
||||
|
||||
substr_t A = one->name, B = two->name;
|
||||
|
||||
for(; A.start != A.end && *A.start == *B.start; A.start++, B.start++) {}
|
||||
|
||||
if (A.start != A.end) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int macro_lookup(macro_t *m) {
|
||||
if (substr_len(&m->name) == 0) {
|
||||
return 0; // len == 0 is not allowed
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < macro_table_size; i++) {
|
||||
if (macro_name_cmp(macro_table + i, m)) {
|
||||
break; // found it
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
// tex macro calls always start with backslash
|
||||
int parseMacro(macro_t *target, substr_t * current) {
|
||||
if (*current->start != '\\') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
target->name.end = target->name.start = current->start;
|
||||
|
||||
while((*current->start != '\0') &&
|
||||
substr_len(current) > 0 &&
|
||||
!isspace(*current->start) &&
|
||||
*current->start != '[' &&
|
||||
*current->start != '{' ) {
|
||||
target->name.end = ++current->start;
|
||||
// check for known
|
||||
if (macro_lookup(target)) {
|
||||
break;
|
||||
// this macros name is known, break for now
|
||||
}
|
||||
}
|
||||
|
||||
target->name.end = current->start;
|
||||
|
||||
// read name
|
||||
next(current);
|
||||
|
||||
int depth = 1;
|
||||
|
||||
// optional arguments
|
||||
if (*current->start == '[') {
|
||||
depth = 1;
|
||||
while(*current->start != '\0' && substr_len(current) > 0 && depth) {
|
||||
current->start++;
|
||||
if (*current->start == '[') depth++;
|
||||
if (*current->start == ']') depth--;
|
||||
}
|
||||
|
||||
next(current);
|
||||
}
|
||||
|
||||
// required arguments
|
||||
if (*current->start == '{') {
|
||||
depth = 1;
|
||||
while(*current->start != '\0' && substr_len(current) > 0 && depth) {
|
||||
current->start++;
|
||||
if (*current->start == '{') depth++;
|
||||
if (*current->start == '}') depth--;
|
||||
}
|
||||
}
|
||||
|
||||
// all done
|
||||
return 1;
|
||||
}
|
||||
|
||||
void printMacro(macro_t * macro) {
|
||||
printf("macro: ");
|
||||
char * current = macro->name.start;
|
||||
while(current != macro->name.end) {
|
||||
fputc(*current, stdout);
|
||||
++current;
|
||||
}
|
||||
fputc('\n', stdout);
|
||||
}
|
||||
|
||||
int parseText(substr_t * str) {
|
||||
while(substr_len(str) > 0) {
|
||||
// a text can contain macro calls too
|
||||
while(substr_len(str) > 0 && *str->start != '\\') {
|
||||
++str->start;
|
||||
}
|
||||
|
||||
// here starts a macro
|
||||
macro_t macro;
|
||||
if (!parseMacro(¯o, str)) {
|
||||
return 0; // strange
|
||||
}
|
||||
|
||||
printMacro(¯o);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
#include <sys/mman.h> // for mmap()
|
||||
#include <sys/stat.h> // for fstat()
|
||||
#include <fcntl.h> // for open()
|
||||
#include <unistd.h> // for close()
|
||||
#include <errno.h> // for perror()
|
||||
|
||||
int main(int argc, char ** args) {
|
||||
// find all the files the given tex files depend on
|
||||
|
||||
int fd = 0;
|
||||
struct stat filestat;
|
||||
|
||||
for(;argc > 1; --argc) {
|
||||
char * filename = args[argc-1];
|
||||
printf("looking at %s\n", filename);
|
||||
|
||||
// try to open file
|
||||
fd = open(filename, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
perror("could not open input file");
|
||||
continue;
|
||||
}
|
||||
fstat(fd, &filestat);
|
||||
printf("file size: %d\n", filestat.st_size);
|
||||
|
||||
// try to mmap file
|
||||
char * memory_area = mmap(NULL, filestat.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (memory_area == NULL) {
|
||||
perror("could not mmap the input");
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("mmap success, parse file\n");
|
||||
|
||||
substr_t file = {
|
||||
.start = memory_area,
|
||||
.end = memory_area + filestat.st_size,
|
||||
};
|
||||
|
||||
macro_table_init();
|
||||
|
||||
if (!parseText(&file)) {
|
||||
printf("error parsing!");
|
||||
} else {
|
||||
printf("parsed %s\n", filename);
|
||||
}
|
||||
|
||||
// cleanup
|
||||
munmap(memory_area, filestat.st_size);
|
||||
close(fd);
|
||||
macro_table_drop();
|
||||
}
|
||||
|
||||
printf("done\n");
|
||||
}
|
427
main.cpp
Normal file
427
main.cpp
Normal file
@ -0,0 +1,427 @@
|
||||
/*
|
||||
* main.cpp
|
||||
*
|
||||
* Created on: 07.10.2017
|
||||
* Author: julian
|
||||
*/
|
||||
|
||||
#include <sys/mman.h> // for mmap()
|
||||
#include <sys/stat.h> // for fstat()
|
||||
#include <fcntl.h> // for open()
|
||||
#include <unistd.h> // for close()
|
||||
#include <errno.h> // for perror()
|
||||
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct Substring {
|
||||
const char * start, * end;
|
||||
|
||||
Substring(const char * start, const char * end) :
|
||||
start(start), end(end) {}
|
||||
|
||||
Substring(const char * str):
|
||||
start(str), end(str + strlen(str))
|
||||
{}
|
||||
|
||||
Substring():
|
||||
start(nullptr), end(nullptr)
|
||||
{}
|
||||
|
||||
|
||||
std::string toString() const {
|
||||
std::string temp;
|
||||
temp.reserve(size());
|
||||
|
||||
const char * it = start;
|
||||
while(it != end) {
|
||||
temp += *it;
|
||||
it++;
|
||||
}
|
||||
|
||||
return temp;
|
||||
}
|
||||
operator std::string() {
|
||||
return toString();
|
||||
}
|
||||
|
||||
std::string::size_type size() const {
|
||||
return (std::string::size_type)(end - start);
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &out, const Substring &str);
|
||||
|
||||
template <class IterA, class IterB>
|
||||
int __compare(IterA a, IterA aend, IterB b, IterB bend) const {
|
||||
while(a != aend && b != bend) {
|
||||
if (*a < * b) {
|
||||
return -1;
|
||||
}
|
||||
if (*a > *b) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
|
||||
if (a == aend && b == bend) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (a != aend) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int compare(const Substring &other) const {
|
||||
return __compare(start, end, other.start, other.end);
|
||||
}
|
||||
|
||||
int compare(const std::string &other) const {
|
||||
return __compare(start, end, other.begin(), other.end());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator<(const T &other) const {
|
||||
return compare(other) < 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator>(const T &other) const {
|
||||
return compare(other) > 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator==(const T &other) const {
|
||||
return compare(other) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator!=(const T &other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, const Substring &str) {
|
||||
auto temp = str.start;
|
||||
while(temp != str.end) {
|
||||
out << *temp;
|
||||
temp++;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
class InputExtractor
|
||||
{
|
||||
public:
|
||||
class Exception : public std::runtime_error {
|
||||
public:
|
||||
Exception(const std::string &str) : std::runtime_error(str) {}
|
||||
};
|
||||
|
||||
typedef std::vector<std::string> List;
|
||||
List operator()(const Substring &input);
|
||||
|
||||
std::string macroExpand(Substring input);
|
||||
|
||||
protected:
|
||||
Substring file;
|
||||
std::map<std::string, Substring> macros;
|
||||
};
|
||||
|
||||
Substring readBrackets(Substring &input, const char * brackets) {
|
||||
if (input.size() <= 0 || *input.start != brackets[0]) {
|
||||
cout << "expected " << brackets[0] << ", got: '" << *input.start << "'";
|
||||
return Substring();
|
||||
}
|
||||
|
||||
input.start++;
|
||||
|
||||
int depth = 1;
|
||||
Substring result(input.start, input.start);
|
||||
|
||||
while(depth > 0 && input.size() > 0) {
|
||||
result.end = ++input.start;
|
||||
|
||||
if (*input.start == brackets[0]) {
|
||||
depth++;
|
||||
}
|
||||
if (*input.start == brackets[1]) {
|
||||
depth--;
|
||||
}
|
||||
}
|
||||
// advance beyond last bracket
|
||||
if (input.size())
|
||||
input.start++;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string InputExtractor::macroExpand(Substring input) {
|
||||
std::string result;
|
||||
Substring name;
|
||||
std::map<std::string, Substring>::iterator lookup;
|
||||
//cout << "expanding: " << input << endl;
|
||||
|
||||
while(input.size() > 0) {
|
||||
if (*input.start == '\\') {
|
||||
input.start++;
|
||||
name.start = name.end = input.start;
|
||||
|
||||
while(input.size() > 0) {
|
||||
name.end = ++input.start;
|
||||
|
||||
if ((lookup = macros.find(name.toString())) != macros.end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (lookup == macros.end()) {
|
||||
throw Exception("unknown macro in macro expansion: " + name.toString());
|
||||
}
|
||||
|
||||
result += lookup->second.toString();
|
||||
} else {
|
||||
result += *input.start;
|
||||
}
|
||||
input.start++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#include <functional>
|
||||
typedef std::map<std::string, std::function<void(InputExtractor::List&, std::string)>> CommandList;
|
||||
|
||||
std::string Extension(std::string str) {
|
||||
auto it = str.end();
|
||||
while(it != str.begin() && *it != '.' && *it != '/' && *it != '\\') {
|
||||
it--;
|
||||
}
|
||||
|
||||
return std::string(it, str.end());
|
||||
}
|
||||
|
||||
std::string Basedir(std::string path) {
|
||||
auto it = path.end();
|
||||
while(it != path.begin() && *it != '/' && *it != '\\') {
|
||||
it--;
|
||||
}
|
||||
|
||||
return std::string(path.begin(), it+1);
|
||||
}
|
||||
|
||||
std::string Name(std::string path) {
|
||||
auto it = path.end();
|
||||
while(it != path.begin() && *it != '/' && *it != '\\') {
|
||||
it--;
|
||||
}
|
||||
|
||||
return std::string(it, path.end());
|
||||
}
|
||||
|
||||
std::string Basename(std::string path) {
|
||||
std::string temp = Name(path);
|
||||
|
||||
auto it = temp.end();
|
||||
while(it != temp.begin() && *it != '.') {
|
||||
it--;
|
||||
}
|
||||
|
||||
return std::string(temp.begin(), it);
|
||||
}
|
||||
|
||||
#include <fstream>
|
||||
|
||||
bool Exists(std::string path) {
|
||||
std::ifstream file(path);
|
||||
if (!file) {
|
||||
return false;
|
||||
}
|
||||
|
||||
file.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
InputExtractor::List Include(std::string path) {
|
||||
InputExtractor::List list;
|
||||
|
||||
int fd = open(path.c_str(), O_RDONLY);
|
||||
if (fd == -1) {
|
||||
cerr << "cannot open " << path << endl;
|
||||
return list;
|
||||
}
|
||||
|
||||
struct stat fileinfo;
|
||||
if (fstat(fd, &fileinfo) == -1) {
|
||||
perror("stat");
|
||||
close(fd);
|
||||
return list;
|
||||
}
|
||||
|
||||
void * memptr = mmap(NULL, fileinfo.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
|
||||
if (memptr == NULL) {
|
||||
perror("mmap");
|
||||
close(fd);
|
||||
return list;
|
||||
}
|
||||
|
||||
Substring str((const char *)memptr, (const char*)memptr + fileinfo.st_size);
|
||||
|
||||
std::string basedir = Basedir(path);
|
||||
list = InputExtractor()(str);
|
||||
// add basedir to list
|
||||
for (auto it = list.begin(); it != list.end(); it++) {
|
||||
*it = basedir + '/' + *it;
|
||||
}
|
||||
// cleanup
|
||||
munmap(memptr, fileinfo.st_size);
|
||||
close(fd);
|
||||
return list;
|
||||
}
|
||||
|
||||
InputExtractor::List InputExtractor::operator()(const Substring &input){
|
||||
file = input;
|
||||
List result;
|
||||
CommandList IncludeCommands;
|
||||
|
||||
IncludeCommands["input"] = [](List &l, std::string a) {
|
||||
if (Extension(a) != ".tex") a += ".tex";
|
||||
l.push_back(a);
|
||||
// try to extract all inputs of that file
|
||||
auto sub = Include(a);
|
||||
std::copy(sub.begin(), sub.end(), std::inserter(l, l.end()));
|
||||
};
|
||||
|
||||
IncludeCommands["include"] = IncludeCommands["input"];
|
||||
IncludeCommands["lstinputlisting"] = [](List &l, std::string a){ l.push_back(a); };
|
||||
|
||||
// skip normie text
|
||||
while(file.size()) {
|
||||
|
||||
if (*file.start == '%') {
|
||||
// line commment
|
||||
while(file.size() > 0 && *file.start != '\n')
|
||||
file.start++;
|
||||
|
||||
continue;
|
||||
}
|
||||
if (*file.start != '\\') {
|
||||
file.start++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// read macro name
|
||||
file.start++;
|
||||
|
||||
// TODO: throw exception
|
||||
if (!file.size())
|
||||
throw Exception("unexpected EOF");
|
||||
|
||||
Substring name;
|
||||
name.start = name.end = file.start;
|
||||
for(name.end = ++file.start; file.size() > 0 && !isspace(*file.start) && *file.start != '{' && *file.start != '\\'; name.end = (++file.start+1)) {
|
||||
auto searchHit = IncludeCommands.find(name);
|
||||
if ((searchHit = IncludeCommands.find(name)) != IncludeCommands.end()) {
|
||||
file.start++;
|
||||
cout << searchHit->first;
|
||||
cout << "[" << readBrackets(file, "[]") << "]";
|
||||
auto args = readBrackets(file, "{}");
|
||||
cout << ":" << args << endl;
|
||||
searchHit->second(result, macroExpand(args));
|
||||
break;
|
||||
} else if (name == std::string("def")) {
|
||||
file.start++;
|
||||
|
||||
if (file.size() <= 0 || *file.start != '\\') {
|
||||
continue;
|
||||
}
|
||||
file.start++;
|
||||
|
||||
Substring name(file.start, file.end);
|
||||
while (file.size() > 0 && *file.start != '{' && !isspace(*file.start)) {
|
||||
name.end = ++file.start;
|
||||
}
|
||||
|
||||
cout << "new macro definition: " << name << endl;
|
||||
macros.insert(std::pair<std::string, Substring>(name.toString(), readBrackets(file, "{}")));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int main(int argc, char ** args) {
|
||||
// find all the files the given tex files depend on
|
||||
|
||||
int fd = 0;
|
||||
struct stat filestat;
|
||||
|
||||
for(;argc > 1; --argc) {
|
||||
char * filename = args[argc-1];
|
||||
cout << "looking at " << filename << std::endl;
|
||||
|
||||
// try to open file
|
||||
fd = open(filename, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
perror("could not open input file");
|
||||
continue;
|
||||
}
|
||||
|
||||
fstat(fd, &filestat);
|
||||
//cout << "file size: " << filestat.st_size << endl;
|
||||
|
||||
// try to mmap file
|
||||
void * memory_area = mmap(NULL, filestat.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (memory_area == nullptr) {
|
||||
perror("could not mmap the input");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
//cout << "mmap success, parse file" << endl;
|
||||
|
||||
Substring file((const char*)memory_area, (const char*)memory_area + filestat.st_size);
|
||||
try {
|
||||
auto list = InputExtractor()(file);
|
||||
// write in makefile style
|
||||
std::ofstream output(Basename(filename) + ".d");
|
||||
if (!output) {
|
||||
std::cout << "could not create output file" << std::endl;
|
||||
} else {
|
||||
output << "filename: ";
|
||||
for (auto it = list.begin(); it != list.end(); it++) {
|
||||
output << *it << "\\\\\n";
|
||||
}
|
||||
output << endl;
|
||||
}
|
||||
|
||||
output.close();
|
||||
} catch(InputExtractor::Exception &e) {
|
||||
cout << e.what() << endl;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
munmap(memory_area, filestat.st_size);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
printf("done\n");
|
||||
}
|
Loading…
Reference in New Issue
Block a user