From b1e359d4e02099787f1fc36255a27b665b135710 Mon Sep 17 00:00:00 2001 From: Julian Daube Date: Sun, 8 Oct 2017 18:44:27 +0200 Subject: [PATCH] real test file, ported stuff to c++ streams --- Einleitung.tex | 2 + Makefile | 14 +- fs.cpp | 95 ++++++++++ fs.hpp | 67 +++++++ main.cpp | 471 +++++++++++++++++++--------------------------- memory_string.hpp | 86 +++++++++ test.tex | 16 +- 7 files changed, 460 insertions(+), 291 deletions(-) create mode 100644 Einleitung.tex create mode 100644 fs.cpp create mode 100644 fs.hpp create mode 100644 memory_string.hpp diff --git a/Einleitung.tex b/Einleitung.tex new file mode 100644 index 0000000..632010a --- /dev/null +++ b/Einleitung.tex @@ -0,0 +1,2 @@ +\section{Einleitung} +HI\footnote{Fußnote} diff --git a/Makefile b/Makefile index 33f298d..b13bd06 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,19 @@ -OBJ:=main.o -OUTPUT:= preparse +OBJ:= main.o fs.o +OUTPUT:= texdepends .PHONY: test debug clean -all test: debug +all: debug +test: debug ./$(OUTPUT) test.tex -debug: CXXFLAGS:= -g -std=c++11 -O0 +debug: CXXFLAGS:= -g -std=c++11 debug: $(OUTPUT) clean: $(RM) $(OBJ) $(OUTPUT) -$(OUTPUT): $(OBJ) - $(CXX) $(OBJ) -o preparse +main.o: fs.o +$(OUTPUT): main.o + $(CXX) $(OBJ) -o $(OUTPUT) diff --git a/fs.cpp b/fs.cpp new file mode 100644 index 0000000..b7c904b --- /dev/null +++ b/fs.cpp @@ -0,0 +1,95 @@ +/* + * fs.cpp + * + * Unix implementation + * + * Created on: 08.10.2017 + * Author: julian + */ + +#include "fs.hpp" + +#include + +namespace Path { + +std::tuple Split(const Path &p) { + auto it = p.end(); + while(it != p.begin() && *it != '/') { + it--; + } + + return std::make_tuple(Path(p.begin(), it), Path(it,p.end())); +} + +Path Name(const Path &p) { + return std::get<1>(Split(p)); +} + +Path Dir(const Path &p) { + return std::get<0>(Split(p)); +} + +Path Clean(const Path &p) { + Path temp (p); + + auto it = temp.begin(); + bool hasSlash = false; + while(it != temp.end()) { + if (*it == '/') { + if (hasSlash) { + // remove + temp.erase(it); + continue; + } + + hasSlash = true; + } else { + hasSlash = false; + } + it++; + } + + return temp; +} + +std::string Extension(const Path &path) { + auto it = path.end(); + while(it != path.begin() && *it != '.' && *it != '/') { + it--; + } + + return std::string(it, path.end()); +} + +std::string Basename(const Path &path) { + Path temp = Name(path); + + auto it = temp.end(); + while(it != temp.begin() && *it != '.') { + it--; + } + + if (it == temp.begin()) { + return path; + } + + return std::string(temp.begin(), it); +} + +}; // end namespace Path + + +Path::Path fs::cwd() { + ssize_t size = 1000, nsize; + + while(1) { + char buffer[size]; + if ((nsize = readlink("/proc/self/cwd", buffer, size)) < size) { + buffer[nsize] = 0; + return Path::Clean(Path::Path(buffer)); + } + size = nsize + 100; + } +} + diff --git a/fs.hpp b/fs.hpp new file mode 100644 index 0000000..d94eea9 --- /dev/null +++ b/fs.hpp @@ -0,0 +1,67 @@ +/* + * fs.hpp + * + * Created on: 08.10.2017 + * Author: julian + */ + +#ifndef FS_HPP_ +#define FS_HPP_ + +#include +#include + +namespace Path { + typedef std::string Path; + + // cleanup double path limiters + Path Clean(const Path &p); + + inline Path Join(const Path &a, const Path &b) { + if (a.empty()) return b; + if (b.empty()) return a; + + return a + "/" + b; + } + + // Join n path parts together + template + inline Path Join(const Path &p, T ... list) { + return Path(p) + "/" + Join(list...); + } + + + // Split a path into the name and the remainder + // e.g. "a/b/c.d becomes (a/b, c.d) + std::tuple Split(const Path &path); + + // strips the path of the filename + Path Dir(const Path &path); + + // returns just the filename + Path Name(const Path &path); + + // returns the filename's extension as string + // or empty if the path points to a directory + std::string Extension(const Path &path); + + // returns the filename stripped of the first extension + // or empty if the path points to a directory + std::string Basename(const Path &path); + + inline bool isRelative(const Path &path) { return path.size() && path.front() != '/'; } + inline bool isAbsolute(const Path &path) { return path.size() && path.front() == '/'; } + + inline bool isDir(const Path &path) { return path.size() && path.back() == '/'; } + inline bool isFile(const Path &path) { return path.size() && path.back() != '/'; } +} + +namespace fs { + + // return the current working directory + Path::Path cwd(); +} + + + +#endif /* FS_HPP_ */ diff --git a/main.cpp b/main.cpp index b54351a..2237927 100644 --- a/main.cpp +++ b/main.cpp @@ -9,122 +9,20 @@ #include // for fstat() #include // for open() #include // for close() -#include // for perror() - +#include // for perror() #include #include #include #include #include +#include + +#include "fs.hpp" +#include "memory_string.hpp" using namespace std; -struct Substring { - const char * start, * end; - - Substring(const char * start, const char * end) : - start(start), end(end) {} - - Substring(const char * str): - start(str), end(str + strlen(str)) - {} - - Substring(): - start(nullptr), end(nullptr) - {} - - - std::string toString() const { - std::string temp; - temp.reserve(size()); - - const char * it = start; - while(it != end) { - temp += *it; - it++; - } - - return temp; - } - operator std::string() { - return toString(); - } - - std::string::size_type size() const { - return (std::string::size_type)(end - start); - } - - friend std::ostream &operator<<(std::ostream &out, const Substring &str); - - template - int __compare(IterA a, IterA aend, IterB b, IterB bend) const { - while(a != aend && b != bend) { - if (*a < * b) { - return -1; - } - if (*a > *b) { - return 1; - } - - a++; - b++; - } - - if (a == aend && b == bend) { - return 0; - } - - if (a != aend) { - return 1; - } - - return -1; - } - - - int compare(const Substring &other) const { - return __compare(start, end, other.start, other.end); - } - - int compare(const std::string &other) const { - return __compare(start, end, other.begin(), other.end()); - } - - template - bool operator<(const T &other) const { - return compare(other) < 0; - } - - template - bool operator>(const T &other) const { - return compare(other) > 0; - } - - template - bool operator==(const T &other) const { - return compare(other) == 0; - } - - template - bool operator!=(const T &other) const { - return !(*this == other); - } -}; - - - -std::ostream &operator<<(std::ostream &out, const Substring &str) { - auto temp = str.start; - while(temp != str.end) { - out << *temp; - temp++; - } - - return out; -} - - class InputExtractor { public: @@ -133,73 +31,89 @@ public: Exception(const std::string &str) : std::runtime_error(str) {} }; - typedef std::vector List; - List operator()(const Substring &input); + typedef std::vector List; - std::string macroExpand(Substring input); + List operator()(const Path::Path &file, const MemoryString &str); + + std::string macroExpand(const std::string &input); + List Include(Path::Path); protected: - Substring file; - std::map macros; + std::map macros; + std::set includes; }; -Substring readBrackets(Substring &input, const char * brackets) { - if (input.size() <= 0 || *input.start != brackets[0]) { - cout << "expected " << brackets[0] << ", got: '" << *input.start << "'"; - return Substring(); +#include + + +template +std::string readTill(iterator &start, const iterator &end, std::function limiter) { + while(start != end && !limiter(start)) { + ++start; } - input.start++; - - int depth = 1; - Substring result(input.start, input.start); - - while(depth > 0 && input.size() > 0) { - if (*input.start == brackets[0]) { - depth++; - } - if (*input.start == brackets[1]) { - depth--; - - if(depth==0) break; - } - result.end = ++input.start; - } - // advance beyond last bracket - if (input.size()) - input.start++; - - return result; + return std::string(start, end); } -std::string InputExtractor::macroExpand(Substring input) { + +template +std::string readBrackets(iterator &begin, const iterator &end, const char * brackets) { + auto current = begin; + + if (begin == end || *current != brackets[0]) { + return std::string(); + } + + // skip first opening bracket + current++; + + int depth = 1; + auto bbegin = current, bend = current; + + while(depth > 0 && current != end) { + if (*current== brackets[0]) { + depth++; + } else if (*current == brackets[1]) { + depth--; + } else { + bend = ++current; + } + } + + // advance beyond last bracket + if (current != end) + current++; + + + return std::string(bbegin, bend); +} + +std::string InputExtractor::macroExpand(const std::string &input) { std::string result; - Substring name; - std::map::iterator lookup; + std::map::iterator lookup; //cout << "expanding: " << input << endl; - while(input.size() > 0) { - if (*input.start == '\\') { - input.start++; - name.start = name.end = input.start; + std::string::const_iterator current = input.begin(); + while(current != input.end()) { + if (*current == '\\') { + current++; + std::string::const_iterator start = current; - while(input.size() > 0) { - name.end = ++input.start; - - if ((lookup = macros.find(name.toString())) != macros.end()) { + while(current != input.end() && *current != '\\') { + if ((lookup = macros.find(std::string(start, current))) != macros.end()) { break; } } if (lookup == macros.end()) { - throw Exception("unknown macro in macro expansion: " + name.toString()); + throw Exception("unknown macro in macro expansion: " + std::string(start, current)); } - result += lookup->second.toString(); + result += lookup->second; } else { - result += *input.start; + result += *current; } - input.start++; + ++current; } return result; @@ -208,73 +122,8 @@ std::string InputExtractor::macroExpand(Substring input) { #include typedef std::map> CommandList; -std::string Extension(std::string str) { - auto it = str.end(); - while(it != str.begin() && *it != '.' && *it != '/' && *it != '\\') { - it--; - } - - return std::string(it, str.end()); -} - -std::string Basedir(std::string path) { - auto it = path.end(); - while(it != path.begin() && *it != '/' && *it != '\\') { - it--; - } - - return std::string(path.begin(), it); -} - -std::string Name(std::string path) { - auto it = path.end(); - while(it != path.begin() && *it != '/' && *it != '\\') { - it--; - } - - return std::string(it, path.end()); -} - -std::string Basename(std::string path) { - std::string temp = Name(path); - - auto it = temp.end(); - while(it != temp.begin() && *it != '.') { - it--; - } - - if (it == temp.begin()) { - return path; - } - - return std::string(temp.begin(), it); -} - -inline bool PathRelative(std::string path) { - return path.size() && path[0] != '/'; -} - #include -std::string cwd() { - ssize_t size = 1000, nsize; - - while(1) { - char buffer[size]; - if ((nsize = readlink("/proc/self/cwd", buffer, size)) < size) { - buffer[size] = 0; - return std::string(buffer); - } - size = nsize + 100; - } - -// -// buffer[rsize+1] = 0; -// -// std::string result; -// result.assign(buffer); -// return result; -} #include @@ -288,8 +137,22 @@ bool Exists(std::string path) { return true; } -InputExtractor::List Include(std::string path) { - InputExtractor::List list; +InputExtractor::List InputExtractor::Include(Path::Path path) { + path = Path::Clean(path); + List list; + + std::cout << "including file " << path << "..."; + + // look for include + if (includes.find(path) != includes.end()) { + cout << "SKIP" << endl; + return list; // already been there + } + + cout << endl; + + // add to include list + includes.insert(path); int fd = open(path.c_str(), O_RDONLY); if (fd == -1) { @@ -312,88 +175,102 @@ InputExtractor::List Include(std::string path) { return list; } - Substring str((const char *)memptr, (const char*)memptr + fileinfo.st_size); +// Substring str((const char *)memptr, (const char*)memptr + fileinfo.st_size); + MemoryString file((char*)memptr, fileinfo.st_size); - std::string basedir = Basedir(path); - list = InputExtractor()(str); + std::string basedir = Path::Dir(path); + list = (*this)(basedir, file); // follow include - // add basedir to list for all relative paths - for (auto it = list.begin(); it != list.end(); it++) { - if (PathRelative(*it)) - *it = basedir + '/' + *it; - } // cleanup munmap(memptr, fileinfo.st_size); close(fd); return list; + + cout << path << "done" << endl; } -InputExtractor::List InputExtractor::operator()(const Substring &input){ - file = input; +InputExtractor::List InputExtractor::operator()(const Path::Path &file, const MemoryString &str){ List result; CommandList IncludeCommands; - IncludeCommands["input"] = [](List &l, std::string a) { + IncludeCommands["input"] = [&file, this](List &l, std::string a) { if (a.empty()) return; - if (Extension(a) != ".tex") a += ".tex"; + if (Path::Extension(a) != ".tex") a += ".tex"; + // try to make path absolute + if (Path::isRelative(a)) { + // add current file's path + a = Path::Join(file, a); + } + if (Path::isRelative(a)) { + // add process working directory + a = Path::Join(fs::cwd(), a); + } + l.push_back(a); // try to extract all inputs of that file auto sub = Include(a); - std::copy(sub.begin(), sub.end(), std::inserter(l, l.end())); + if (!sub.empty()) + std::copy(sub.begin(), sub.end(), std::inserter(l, l.end())); }; IncludeCommands["include"] = IncludeCommands["input"]; IncludeCommands["lstinputlisting"] = [](List &l, std::string a){ l.push_back(a); }; - // skip normie text - while(file.size()) { + MemoryString::const_iterator current = str.begin(); - if (*file.start == '%') { + while(current != str.end()) { + if (*current == '%') { // line commment - while(file.size() > 0 && *file.start != '\n') - file.start++; + while(current != str.end() && *current != '\n') + current++; continue; } - if (*file.start != '\\') { - file.start++; + if (*current != '\\') { + // skip non macros + current++; continue; } // read macro name - file.start++; + current++; - // TODO: throw exception - if (!file.size()) + if (current == str.end()) throw Exception("unexpected EOF"); - Substring name; - name.start = name.end = file.start; - for(name.end = ++file.start; file.size() > 0 && !isspace(*file.start) && *file.start != '{' && *file.start != '\\'; name.end = (++file.start+1)) { - auto searchHit = IncludeCommands.find(name); - if ((searchHit = IncludeCommands.find(name)) != IncludeCommands.end()) { - file.start++; - cout << searchHit->first; - cout << "[" << readBrackets(file, "[]") << "]"; - auto args = readBrackets(file, "{}"); - cout << ":" << args << endl; - searchHit->second(result, macroExpand(args)); - break; - } else if (name == std::string("def")) { - file.start++; + auto start = current, end = current; - if (file.size() <= 0 || *file.start != '\\') { + auto limiter = [](char c) -> bool { return isspace(c) || c == '{' || c == '\\'; }; + + for(; current != str.end() && !limiter(*current); end = (++current+1)) + { + auto searchHit = IncludeCommands.find(std::string(start, end)); + if (searchHit != IncludeCommands.end()) { + // handle crosslink + current++; + if (current == str.end()) continue; + + cout << searchHit->first << "[" << readBrackets(current, str.end(), "[]") << "]"; + auto inner = readBrackets(current, str.end(), "{}"); + cout << ":" << inner << endl; + + // add to results + searchHit->second(result, macroExpand(inner)); + break; + } else if (std::string(start, end) == std::string("def")) { + // define new macro + current++; + if (current == str.end() || *current != '\\') { continue; } - file.start++; + current++; - Substring name(file.start, file.end); - while (file.size() > 0 && *file.start != '{' && !isspace(*file.start)) { - name.end = ++file.start; - } + std::function limiter = [](const MemoryString::const_iterator &it) -> bool { return std::string("{ \t\n").find(*it) != std::string::npos; }; + + std::string name = readTill(current, str.end(), limiter); cout << "new macro definition: " << name << endl; - macros.insert(std::pair(name.toString(), readBrackets(file, "{}"))); + macros.insert(std::pair(name, readBrackets(current, str.end(), "{}"))); break; } } @@ -402,17 +279,42 @@ InputExtractor::List InputExtractor::operator()(const Substring &input){ return result; } -#include - int main(int argc, char ** args) { // find all the files the given tex files depend on - cout << cwd() << std::endl; int fd = 0; struct stat filestat; + for(;argc > 1; --argc) { + + Path::Path filename = args[argc-1]; + InputExtractor parser; + + InputExtractor::List list = parser.Include(filename); + + // output results in makefile rule style + + Path::Path outfile_name = Path::Basename(filename) + ".d"; + cout << "writing dependecy rules to " << outfile_name << "..."; + + std::ofstream outfile(outfile_name); + + if (!outfile) { + cout << "could not create file!" << endl; + continue; + } + + outfile << filename << ":"; + + for (auto it = list.begin(); it != list.end(); it++) { + outfile << *it << "\t\\\n"; + } + + cout << "done" << endl; + + /* char * filename = args[argc-1]; - cout << "looking at " << filename << std::endl; + cout << "opening " << filename << "..."; // try to open file fd = open(filename, O_RDONLY); @@ -431,37 +333,41 @@ int main(int argc, char ** args) { continue; } + cout << "start parsing" << endl; - //cout << "mmap success, parse file" << endl; + MemoryString file((char*)memory_area, filestat.st_size); - Substring file((const char*)memory_area, (const char*)memory_area + filestat.st_size); try { - auto list = InputExtractor()(file); + InputExtractor::List list = InputExtractor()(file); + + Path::Path outfilename = Path::Basename(Path::Path(filename)) + ".d"; + + cout << "writing makedeps file to " << outfilename << "..." << endl; + // write in makefile style - std::ofstream output(Basename(filename) + ".d"); + std::ofstream output(outfilename); if (!output) { std::cout << "could not create output file" << std::endl; } else { output << filename << ": "; for (auto it = list.begin(); it != list.end(); it++) { - output << '\t'; - if (PathRelative(*it)) { - if (PathRelative(filename)) { - output << cwd(); + if (Path::isRelative(*it)) { + if (Path::isRelative(filename)) { + *it = Path::Join(fs::cwd(), filename, *it); } else { - output << Basename(filename); + *it = Path::Join(Path::Path(filename), *it); } - output << "/" << *it; - } else { - output << *it; } - output << "\t\\\n"; + + cout << "depends: " << *it << endl; + output << '\t' << *it << "\t\\\n"; } output << endl; } - output.close(); + cout << filename << done; + } catch(InputExtractor::Exception &e) { cout << e.what() << endl; } @@ -469,7 +375,8 @@ int main(int argc, char ** args) { // cleanup munmap(memory_area, filestat.st_size); close(fd); + + */ } - printf("done\n"); } diff --git a/memory_string.hpp b/memory_string.hpp new file mode 100644 index 0000000..be76162 --- /dev/null +++ b/memory_string.hpp @@ -0,0 +1,86 @@ +/* + * memoy_string.hpp + * + * Created on: 08.10.2017 + * Author: julian + */ + +#ifndef MEMORY_STRING_HPP_ +#define MEMORY_STRING_HPP_ + +#include + +class MemoryString { + template + struct _iterator : public std::iterator { + T* pos; + _iterator operator++(int){ _iterator temp(*this); pos++; return temp; } + _iterator operator--(int){ _iterator temp(*this); pos--; return temp; } + + _iterator &operator++() { pos++; return *this; } + _iterator &operator--() { pos--; return *this; } + + _iterator &operator+=(int i) { pos += i; return *this; } + _iterator &operator-=(int i) { pos -= i; return *this; } + + _iterator operator+(int i) { _iterator temp(*this); temp += i; return temp; } + _iterator operator-(int i) { _iterator temp(*this); temp -= i; return temp; } + + T operator*() { return *pos; } + T operator*() const { return *pos; } + + T * operator&() { return pos; } + T * operator->() { return pos; } + + template + _iterator(const _iterator &other) { *this = other; } + + template + _iterator &operator=(const _iterator &other) const { + if (this != &other) pos = other.pos; + return *this; + } + + template + bool operator==(const _iterator &other) const { + return pos == other.pos; + } + + template + bool operator!=(const _iterator &other) const { + return pos != other.pos; + } + + private: + _iterator(T *pos) : pos(pos) {} + friend MemoryString; + }; + + + +public: + typedef size_t size_type; + typedef char value_type; + + typedef _iterator const_iterator; + typedef _iterator iterator; + + MemoryString(value_type * base, size_type size): _ptr(base), _size(size) {} + ~MemoryString() {} + + iterator begin() { return iterator(_ptr); } + const_iterator begin() const { return const_iterator(_ptr); } + + iterator end() { return iterator(_ptr + _size); } + const_iterator end() const { return const_iterator(_ptr + _size); } + + size_type size() { return _size; } +protected: + + value_type * _ptr; + size_type _size; +}; + + + +#endif /* MEMORY_STRING_HPP_ */ diff --git a/test.tex b/test.tex index 27fa66e..e49bd16 100644 --- a/test.tex +++ b/test.tex @@ -1,5 +1,15 @@ -\def\hi{hi} +\documentclass{article} +\usepackage[ngerman]{babel} -\include{\hi/test.tex} -\lstinputlisting{jkhdfkjlhsdfkjsdhfk} \ No newline at end of file +\author{Julian Daube} +\title{Ein Test} + +\begin{document} +\tableofcontents +\newpage + +$x_a^2 = \alpha \cdot y = \frac{A}{B}$ + +\input{Einleitung.tex} +\end{document} \ No newline at end of file