| Search for: | |
| Returned: | 146 Documents (sorted by year) |
@Unpublished{ertl19ft,
author = "M. Anton Ertl and Bernd Paysan",
title = "Der neue {Gforth}-Header",
note = "Vortrag bei der Forth-Tagung 2019",
URL = "http://www.complang.tuwien.ac.at/papers/ertl19ft.pdf",
video = "https://forth-ev.de/wiki/events:ft2019:anton-header",
year = "2019",
}
@Unpublished{ertl19ft2,
author = "M. Anton Ertl",
title = "Forth-Quellcode im Flash",
note = "Vortrag bei der Forth-Tagung 2019",
URL = "https://forth-ev.de/wiki/events:ft2019:anton-flash",
year = "2019",
usenetmsgid = "<2019Apr19.185811@mips.complang.tuwien.ac.at>",
}
@InProceedings{paysan&ertl19,
author = "Bernd Paysan and M. Anton Ertl",
title = "The new {Gforth} Header",
booktitle = "35th EuroForth Conference",
year = "2019",
pages = "5--20",
URL = "http://www.euroforth.org/ef19/papers/paysan.pdf",
slides-url = "http://www.euroforth.org/ef19/papers/paysan-slides.pdf",
video = "https://wiki.forth-ev.de/doku.php/events:ef2019:header",
OPTnote = "refereed",
abstract = "The new Gforth header is designed to directly
implement the requirements of Forth-94 and Forth-2012.
Every header is an object with a fixed set of fields
(code, parameter, count, name, link) and methods
(\texttt{execute}, \texttt{compile}, \texttt{(to)},
\texttt{defer@}, \texttt{does},
\texttt{name>interpret}, \texttt{name>compile},
\texttt{name>string}, \texttt{name>link}). The
implementation of each method can be changed per-word
(prototype-based object-oriented programming). We
demonstrate how to use these features to implement
optimization of constants, \texttt{fvalue},
\texttt{defer}, \texttt{immediate}, \texttt{to} and
other dual-semantics words, and \texttt{synonym}.",
}
@Misc{ef2019:rs-int,
author = "M. Anton Ertl",
title = "Interactive multiline \code{>R R>} in {Gforth}",
howpublished = "https://wiki.forth-ev.de/doku.php/events:ef2019:rs-int",
year = "2019",
note = "Video of otherwise unpublished talk at EuroForth
2019",
}
@InProceedings{ertl19kps,
author = "M. Anton Ertl",
title = "Integer Division by Multiplying with the Double-Width
Reciprocal",
booktitle = "20. Kolloquium Programmiersprachen und Grundlagen der
Programmierung (KPS)",
year = "2019",
editor = "Martin Pl{\"u}micke and Fayez Abu Alia",
pages = "75--84",
URL = "http://www.complang.tuwien.ac.at/papers/ertl19kps.pdf",
slides-url = "http://www.complang.tuwien.ac.at/papers/ertl19kps-slides.pdf",
abstract = "Earlier work on integer division by multiplying with
the reciprocal has focused on multiplying with a
single-width reciprocal, combined with a correction and
followed by a shift. The present work explores using a
double-width reciprocal to allow getting rid of the
correction and shift.",
}
@InProceedings{thier+18,
author = "Patrick Thier and M. Anton Ertl and Andreas Krall",
title = "Fast and Flexible Instruction Selection with
Constraints",
booktitle = "Compiler Construction (CC'18)",
year = "2018",
pages = "93--103",
URL = "http://www.complang.tuwien.ac.at/papers/thier+18.pdf",
DOI = "10.1145/3178372.3179501",
abstract = "Tree-parsing instruction selection as used in, e.g.,
lcc, uses dynamic costs to gain flexibility and handle
situations (such as read-modify-write instructions)
that do not fit into the basic tree-parsing model. The
disadvantage of dynamic costs is that we can no longer
turn the tree grammar into a tree automaton (as is done
by burg) for fast instruction selection for JIT
compilers. In this paper we introduce constraints that
say whether a tree-grammar rule is applicable or not.
While theoretically less powerful than dynamic costs,
constraints cover the practical uses of dynamic costs;
more importantly, they allow turning the tree grammar
with constraints into a tree automaton (with
instruction-selection-time checks), resulting in faster
instruction selection than with pure
instruction-selection-time dynamic programming. We
integrate constraints in an instruction selector that
matches DAGs with tree rules. We evaluate this concept
in lcc and the CACAO JavaVM JIT compiler, and see
instruction selector speedups by a factor 1.33--1.89.",
doi-url = "http://dx.doi.org/10.1145/3178372.3179501",
}
(2 dupl. with Abstract and URL)
PDF
DOI resolver
@Unpublished{ertl18ft,
author = "M. Anton Ertl",
title = "Verallgemeinerung von locals",
note = "Vortrag bei der Forth-Tagung 2018",
URL = "http://www.complang.tuwien.ac.at/papers/ertl18ft.pdf",
video = "https://wiki.forth-ev.de/doku.php/events:tagung-2018:locals",
year = "2018",
}
@InProceedings{ertl18manlang,
author = "M. Anton Ertl",
title = "Software Vector Chaining",
booktitle = "15th International Conference on Managed Languages &
Runtimes (Manlang'18)",
year = "2018",
pages = "Article-18",
URL = "http://www.complang.tuwien.ac.at/papers/ertl18manlang.pdf",
DOI = "10.1145/3237009.3237021",
abstract = "Providing vectors of run-time determined length as
opaque value types is a good interface between the
machine-level SIMD instructions and portable
application-oriented programming languages.
Implementing vector operations requires a loop that
breaks the vector into SIMD-register-sized chunks. A
compiler can fuse the loops of several vector
operations together. However, during normal compilation
this is only easy if no other control structures are
involved. This paper explores an alternative: collect a
trace of vector operations at run-time (following the
program control flow during this collecting step), and
then perform the combined vector loop. This arrangement
has a certain run-time overhead, but its implementation
is simpler and can happen independently, in a library.
Preliminary performance results indicate that the
overhead makes this approach beneficial only for long
vectors ($>1$KB). For shorter vectors, unfused loops
should be used in a library setting. Fortunately, this
choice can be made at run time, individually for each
vector operation.",
doi-url = "http://dx.doi.org/10.1145/3237009.3237021",
}
(3 dupl. with Abstract and URL)
PDF
DOI resolver
@InProceedings{ertl&paysan18,
author = "M. Anton Ertl and Bernd Paysan",
title = "Closures --- the {Forth} way",
booktitle = "34th EuroForth Conference",
year = "2018",
pages = "17--30",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26paysan.pdf",
url2 = "http://www.euroforth.org/ef18/papers/ertl.pdf",
slides-url = "http://www.euroforth.org/ef18/papers/ertl-slides.pdf",
video = "https://wiki.forth-ev.de/doku.php/events:ef2018:closures",
OPTnote = "refereed",
abstract = "In Forth 200x, a quotation cannot access a local
defined outside it, and therefore cannot be
parameterized in the definition that produces its
execution token. We present Forth closures; they lift
this restriction with minimal implementation
complexity. They are based on passing parameters on the
stack when producing the execution token. The
programmer has to explicitly manage the memory of the
closure. We show a number of usage examples. We also
present the current implementation, which takes
109~source lines of code (including some extra
features). The programmer can mechanically convert
lexical scoping (accessing a local defined outside)
into code using our closures, by applying assignment
conversion and flat-closure conversion. The result can
do everything one expects from closures, including
passing Knuth's man-or-boy test and living beyond the
end of their enclosing definitions.",
}
@Article{ertl18vd,
author = "M. Anton Ertl",
title = "Forth-200X-Treffen auf der EuroForth 2017",
journal = "Vierte Dimension",
year = "2018",
volume = "34",
number = "3",
pages = "5--6",
}
@Unpublished{ertl17ft,
author = "M. Anton Ertl",
title = "{Statische Typ{\"u}berpr{\"u}fung}",
note = "Vortrag bei der Forth-Tagung 2017",
URL = "http://www.complang.tuwien.ac.at/papers/ertl17ft.pdf",
video = "https://wiki.forth-ev.de/doku.php/events:tagung-2017:statische-typpruefung",
year = "2017",
}
@InProceedings{ertl17,
author = "M. Anton Ertl",
title = "SIMD and Vectors",
booktitle = "33rd EuroForth Conference",
year = "2017",
pages = "25--36",
URL = "http://www.euroforth.org/ef17/papers/ertl.pdf",
video = "https://wiki.forth-ev.de/lib/exe/fetch.php/events:ef2017:simd-vectors.mp4",
OPTnote = "refereed",
abstract = "Many programs have parts with significant data
parallelism, and many CPUs provide SIMD instructions
for processing data-parallel parts faster. The weak
link in this chain is the programming language. We
propose a vector wordset so that Forth programmers can
make use of SIMD instructions to speed up the
data-parallel parts of their applications. The vector
wordset uses a separate vector stack containing opaque
vectors with run-time determined length. Preliminary
results using one benchmark show a factor~8 speedup of
a simple vector implementation over scalar Gforth code,
a smaller (factor 1.8) speedup over scalar VFX code;
another factor of 3 is possible on this benchmark with
a more sophisticated implementation. However, vectors
have an overhead; this overhead is amortized in this
benchmark at vector lengths between 3 and 250
(depending on which variants we compare).",
}
(with Abstract)
PDF
@InProceedings{ertl17kps,
author = "M. Anton Ertl",
title = "The Intended Meaning of \emph{Undefined Behaviour} in
{C} Programs",
booktitle = "19. Kolloquium Programmiersprachen und Grundlagen der
Programmierung (KPS'17)",
year = "2017",
editor = "Wolfram Amme and Thomas Heinze",
pages = "20--28",
URL = "http://www.complang.tuwien.ac.at/papers/ertl17kps.pdf",
url2 = "http://www.kps2017.uni-jena.de/proceedings/kps2017_submission_5.pdf",
abstract = "All significant C programs contain undefined
behaviour. There are conflicting positions about how to
deal with that: One position is that all these programs
are broken and may be compiled to arbitrary code.
Another position is that tested and working programs
should continue to work as intended by the programmer
with future versions of the same C compiler. In that
context the advocates of the first position often claim
that they do not know the intended meaning of a program
with undefined behaviour. This paper explores this
topic in greater depth. The goal is to preserve the
behaviour of existing, tested programs. It is achieved
by letting the compiler define a consistent mapping of
C operations to machine code; and the compiler then has
to stick to this behaviour during optimizations and in
future releases.",
}
(with Abstract)
PDF
@InProceedings{ertl-sections16,
author = "M. Anton Ertl",
title = "Sections",
booktitle = "32nd EuroForth Conference",
year = "2016",
pages = "55--57",
URL = "http://www.complang.tuwien.ac.at/papers/ertl-sections16.pdf",
video = "https://wiki.forth-ev.de/lib/exe/fetch.php/events:sections.mp4",
OPTnote = "not refereed",
abstract = "A section is a contiguous region of memory, to which
data or code can be appended (like the Forth
dictionary). Assembly languages and linkers have
supported multiple sections for a long time. This paper
describes the benefits of supporting multiple sections
in Forth, interfaces and implementation techniques.",
}
(with Abstract)
PDF
@InProceedings{ertl-recognizers16,
author = "M. Anton Ertl",
title = "Recognizers: Arguments and Design Decisions",
booktitle = "32nd EuroForth Conference",
year = "2016",
pages = "58--63",
URL = "http://www.complang.tuwien.ac.at/papers/ertl-recognizers16.pdf",
video = "https://wiki.forth-ev.de/lib/exe/fetch.php/events:recognizers.mp4",
OPTnote = "not refereed",
abstract = "The Forth text interpreter processes words and
numbers. Currently the set of words can be extended by
programmers, but not the recognized numbers.
User-defined recognizers allow to extend the
number-recognizer part, too. This paper shows the
benefits of recognizers and discusses counterarguments.
It also discusses several design decisions: Whether to
define temporary words, or a set of interpretation,
compilation, and postponing actions; and whether to
hook the recognizers inside \code{find} or in the text
interpreter.",
}
(with Abstract)
PDF
@InProceedings{ertl-secure16,
author = "M. Anton Ertl",
title = "Security",
booktitle = "32nd EuroForth Conference",
year = "2016",
pages = "82--83",
URL = "http://www.euroforth.org/ef16/papers/ertl-secure.pdf",
video = "https://wiki.forth-ev.de/lib/exe/fetch.php/events:security.mp4",
OPTnote = "presentation slides",
}
@Unpublished{ertl16ft-secure,
author = "M. Anton Ertl",
title = "Sicheres Forth",
note = "Vortrag bei der Forth-Tagung 2016",
URL = "http://www.complang.tuwien.ac.at/papers/ertl16ft-secure.pdf",
year = "2016",
abstract = "Buffer overflows sind ein beliebtes Einfallstor fuer
Angriffe auf Software. Forth vermeidet zwar einige
Probleme von C, aber es ist auch nicht dagegen gefeit.
Das Problem komplett zu eliminieren wuerde Forth zu
weit einschraenken, aber man kann die Programme ein
zwei Teile teilen: einen, der die unsicheren Features
verwenden darf und genau inspiziert werden muss, und
einen, der nur sichere Features verwenden darf und bei
dem man in der Inspektion nicht auf Buffer overflows
schauen muss.",
}
(with Abstract)
PDF
@Unpublished{ertl16ft-vectors,
author = "M. Anton Ertl",
title = "Gedanken zu SIMD und Vektorisierung",
note = "Vortrag bei der Forth-Tagung 2016",
URL = "http://www.complang.tuwien.ac.at/papers/ertl16ft-vectors.pdf",
year = "2016",
abstract = "Seit zwei Jahrzehnten stellen popul{\"a}re Prozessoren
SIMD-Erweiterungen wie SSE zur Verf{\"u}gung. Einen
portabler Assembler auf Basis von Forth sollte es auch
erm{\"o}glichen, diese Befehle zu nutzen.
Klassischerweise geht das {\"u}ber Intrinsics (sehr
architekturspezifisch) oder {\"u}ber automatische
Vektorisierung (sehr kompliziert). In diesem Vortrag
stelle ich einige {\"U}berlegungen zu dem Thema vor.
Ein zentrales Element dabei sind {\"U}berlegungen zur
Speicherverwaltung, um Komplikationen oder
Geschwindigkeitsnachteile durch Abh{\"a}ngigkeiten zu
vermeiden.",
}
(with Abstract)
PDF
@Article{ertl16vd,
author = "M. Anton Ertl",
title = "Forth-2012: Der neue Standard",
journal = "Vierte Dimension",
URL = "http://www.complang.tuwien.ac.at/papers/ertl16vd.pdf",
year = "2016",
volume = "32",
number = "3",
pages = "13--18",
}
@Article{ertl15vd,
author = "M. Anton Ertl",
title = "Forth 200xTreffen auf der EuroForth 2015",
journal = "Vierte Dimension",
year = "2015",
volume = "31",
number = "3+4",
pages = "21",
}
@InProceedings{ertl15,
author = "M. Anton Ertl and Bernd Paysan",
title = "From \texttt{exit} to \texttt{set-does>} --- A Story
of {Gforth} Re-Implementation",
booktitle = "31st EuroForth Conference",
year = "2015",
pages = "41--47",
URL = "http://www.euroforth.org/ef15/papers/ertl.pdf",
slides-url = "http://www.euroforth.org/ef15/papers/ertl-slides.pdf",
OPTnote = "not refereed",
abstract = "We changed \code{exit} from an immediate to a
non-immediate word; this requires changes in the
de-allocation of locals, which leads to changes in the
implementation of colon definitions, and to
generalizing \code{does>} into \code{set-does>} which
allows the defined word to call arbitrary execution
tokens. The new implementation of locals cleanup can
usually be optimized to similar performance as the old
implementation. The new implementation of \code{does>}
has performance similar to the old implementation,
while using \code{set-does>} results in speedups in
certain cases.",
}
@InProceedings{ertl-recognizers15,
author = "M. Anton Ertl",
title = "Recognizers --- Why and How",
booktitle = "31st EuroForth Conference",
year = "2015",
pages = "77--78",
slides-url = "http://www.euroforth.org/ef15/papers/ertl-recognizers-slides.pdf",
OPTnote = "presentation slides",
}
@InProceedings{ertl15kps,
author = "M. Anton Ertl",
title = "What every compiler writer should know about
programmers",
booktitle = "18. Kolloquium Programmiersprachen und Grundlagen der
Programmierung (KPS'15)",
year = "2015",
editor = "Jens Knoop and M. Anton Ertl",
pages = "112--133",
URL = "http://www.complang.tuwien.ac.at/kps2015/proceedings/KPS_2015_submission_29.pdf",
abstract = "In recent years C compiler writers have taken the
attitude that tested and working production (i.e.,
\emph{conforming} according to the C standard) C
programs are buggy if they contain undefined behavior,
and they feel free to compile these programs (except
benchmarks) in a way that they no longer work. The
justification for this attitude is that it allows C
compilers to optimize better. But while these
``optimizations'' provide a factor 1.017 speedup with
Clang-3.1 on SPECint 2006, for non-benchmarks it also
has a cost: if we go by the wishes of the compiler
maintainers, we have to ``fix'' our working, conforming
C programs; this requires a substantial effort, and can
result in bigger and slower code. The effort could
otherwise be invested in source-level optimizations by
programmers, which can have a much bigger effect (e.g.,
a factor $>2.7$ for Jon Bentley's traveling salesman
program). Therefore, optimizations that assume that
undefined behavior does not exist are a bad idea not
just for security, but also for the performance of
non-benchmark programs.",
}
(with Abstract)
PDF
@Proceedings{kps15,
title = "18. Kolloquium Programmiersprachen und Grundlagen der
Programmierung (KPS'15)",
booktitle = "18. Kolloquium Programmiersprachen und Grundlagen der
Programmierung (KPS'15)",
year = "2015",
key = "KPS '15",
editor = "Jens Knoop and M. Anton Ertl",
URL = "http://www.complang.tuwien.ac.at/kps2015/proceedings/",
pdf-url = "http://www.complang.tuwien.ac.at/kps2015/proceedings/kps15.pdf",
}
@Unpublished{ertl14ft,
author = "M. Anton Ertl",
title = "{Forth in "Grundlagen der Programmkonstruktion"}",
note = "Vortrag bei der Forth-Tagung 2014",
URL = "http://www.forth-ev.de/wiki/lib/exe/fetch.php/events:slides-anton.pdf",
year = "2014",
}
@InProceedings{ertl14,
author = "M. Anton Ertl",
title = "Region-Based Memory Allocation in {Forth}",
booktitle = "30th EuroForth Conference",
year = "2014",
pages = "45--49",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef14/papers/ertl.pdf",
OPTnote = "not refereed",
abstract = "Memory management has a pervasive effect on the way we
program. In region-based memory allocation, objects
with roughly the same life expectancy are allocated in
one region, and in the end the whole region is freed at
once. This avoids the need to keep track of the
individual objects for \texttt{free}. Regions are
simple to implement and compatible with real-time
requirements and multi-threading, and seem to be ideal
for Forth, except for one thing: The region id has to
be passed to the allocation word, increasing the stack
load. We propose using context wrappers to avoid that
problem. This even allows to use existing
\texttt{allocate}-based libraries with regions, but we
then have to decide what \texttt{free} and
\texttt{resize} inside these libraries do.",
}
(with Abstract)
PDF
@InProceedings{ertl14-c,
author = "M. Anton Ertl",
title = "How to get rid of {C}",
booktitle = "30th EuroForth Conference",
year = "2014",
pages = "63--65",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef14/papers/ertl-c.pdf",
video = "http://www.forth-ev.de/wiki/doku.php/events:euroforth-2014:ridofc",
OPTnote = "Presentation slides",
}
@Unpublished{ertl13ft,
author = "M. Anton Ertl",
title = "{Forth als Basis f{\"u}r einen portablen Assembler}",
note = "Vortrag bei der Forth-Tagung 2013",
URL = "https://www.complang.tuwien.ac.at/anton/slides/forth-tagung13.pdf",
year = "2013",
}
@InProceedings{ertl13-paf,
author = "M. Anton Ertl",
title = "{PAF}: A Portable Assembly Language",
booktitle = "29th EuroForth Conference",
year = "2013",
pages = "30--38",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef13/papers/ertl-paf.pdf",
OPTnote = "not refereed",
abstract = "A portable assembly language provides access to
machine-level features like memory addresses, machine
words, code addresses, and modulo arithmetics, like
assembly language, but abstracts away differences
between architectures like the assembly language
syntax, instruction encoding, register set size, and
addressing modes. Forth already satisfies a number of
the characteristics of a portable assembly language,
and is therefore a good basis. This paper presents PAF,
a portable assembly language based on Forth, and
specifically discusses language features that other
portable assembly languages do not have, and their
benefits; it also discusses the differences from Forth.
The main innovations of PAF are: tags indicate the
control flow for indirect branches and calls; and PAF
has two kinds of calls and definitions: the ABI ones
follow the platform's calling convention and are useful
for interfacing to the outside world, while the PAF
ones allow tail-call elimination and are useful for
implementing general control structures.",
}
(with Abstract)
PDF
@InProceedings{ertl13-strings,
author = "M. Anton Ertl",
title = "Standardize Strings Now!",
booktitle = "29th EuroForth Conference",
year = "2013",
pages = "39--43",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef13/papers/ertl-strings.pdf",
OPTnote = "not refereed",
abstract = "This paper looks at the issues in string words: what
operations may be required, various design options, and
why this has lead to the current state of
standardization of string operations that is
insufficient in the eyes of many.",
}
(with Abstract)
PDF
@InProceedings{ertl13-regions,
author = "M. Anton Ertl",
title = "Region-Based Memory Allocation",
booktitle = "29th EuroForth Conference",
year = "2013",
pages = "65--67",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef13/papers/ertl-regions.pdf",
OPTnote = "Presentation slides",
}
@InProceedings{ertl13-kps,
author = "M. Anton Ertl",
title = "{PAF}: A Portable Assembly Language Based on {F}orth",
booktitle = "17. Kolloquium Programmiersprachen und Grundlagen der
Programmierung (KPS'13)",
year = "2013",
URL = "http://www.complang.tuwien.ac.at/papers/ertl-kps13.ps.gz",
abstract = "A portable assembly language provides access to
machine-level features like memory addresses, machine
words, code addresses, and modulo arithmetics, like
assembly language, but abstracts away differences
between architectures like the assembly language
syntax, instruction encoding, register set size, and
addressing modes. Forth already satisfies a number of
the characteristics of a portable assembly language,
and is therefore a good basis. This paper presents PAF,
a portable assembly language based on Forth, and
specifically discusses language features that other
portable assembly languages do not have, and their
benefits; it also discusses the differences from Forth.
The main innovations of PAF are: tags indicate the
control flow for indirect branches and calls; and PAF
has two kinds of calls and definitions: the ABI ones
follow the platform's calling convention and are useful
for interfacing to the outside world, while the PAF
ones allow tail-call elimination and are useful for
implementing general control structures.",
}
(with Abstract)
gzipped Postscript
@Unpublished{ertl12ft,
author = "M. Anton Ertl",
title = "objects2.fs: Ein modernisiertes objektorientiertes
Paket",
note = "Vortrag bei der Forth-Tagung 2012",
URL = "http://forth-ev.de/filemgmt/visit.php?lid=415",
year = "2012",
}
@InProceedings{ertl12,
author = "M. Anton Ertl",
title = "Methods in objects2: Duck Typing and Performance",
booktitle = "28th EuroForth Conference",
year = "2012",
pages = "96--103",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef12/papers/ertl.pdf",
OPTnote = "not refereed",
abstract = "The major new feature of objects2 is defining methods
for any class (like in Smalltalk): this means that we
can have two classes that are unrelated by inheritance,
yet react to the same messages and can be used in the
same contexts; this is also known as duck typing. This
paper discusses the implementation of method dispatch
for these general selectors as well as the more
restricted class selectors of the original
\code{objects.fs}, and compares the memory and
execution time costs of these method selector
implementations: Unhashed general selectors are as fast
as class selectors (down to two instructions), but can
consume a lot of memory (megabytes of dispatch tables
for large class hierarchies); hashed general selectors
are significantly slower ($\ge 43$ cycles), but consume
less memory. Programmers don't need to choose a
selector implementation up front; instead, it is easy
to switch between them later, on a per-selector
basis.",
}
(with Abstract)
PDF
@InProceedings{wodni&ertl11,
author = "Gerald Wodni and M. Anton Ertl",
title = "{SWIG} \& {The Forth Net}: Hands-On",
booktitle = "27th EuroForth Conference",
year = "2011",
pages = "32--35",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef11/papers/wodni.pdf",
OPTnote = "not refereed",
abstract = "We have shown the basic functionality of SWIG and The
Forth Net in the past. Now we want to provide two basic
examples which explain how to use them and to show how
easy it is to create C-interfaces or Forth-libraries
and share them.",
}
(with Abstract)
PDF
@InProceedings{ertl11euroforth,
author = "M. Anton Ertl",
title = "Ways to Reduce the Stack Depth",
booktitle = "27th EuroForth Conference",
year = "2011",
pages = "36--41",
URL = "http://www.complang.tuwien.ac.at/papers/ertl11euroforth.ps.gz",
url2 = "http://www.complang.tuwien.ac.at/anton/euroforth/ef11/papers/ertl.pdf",
OPTnote = "not refereed",
abstract = "Having to deal with many different data can lead to
problems in Forth: The data stack is the preferred
place to store data; on the other hand, dealing with
too many data stack items is cumbersome and usually bad
style. This paper presents and discusses ways to
unburden the data stack; some of them are used widely,
others are almost unknown or new.",
}
(with Abstract)
gzipped Postscript
@Unpublished{wodni&ertl11ft,
author = "Gerald Wodni and M. Anton Ertl",
title = "SWIG-Erweiterung f{\"u}r Forth",
note = "Vortrag bei der Forth-Tagung 2011",
URL = "http://forth-ev.de/filemgmt/visit.php?lid=354",
year = "2011",
}
@Unpublished{ertl11ft-stack,
author = "M. Anton Ertl",
title = "Techniken f{\"u}r weniger Stack-Tiefe",
note = "Vortrag bei der Forth-Tagung 2011",
URL = "http://forth-ev.de/filemgmt/visit.php?lid=353",
year = "2011",
}
@Unpublished{ertl11ft-multi,
author = "M. Anton Ertl",
title = "Multi-Threading und Multi-Tasking in Gforth",
note = "Vortrag bei der Forth-Tagung 2011",
URL = "http://forth-ev.de/filemgmt/visit.php?lid=351",
year = "2011",
}
@Unpublished{ertl11ft-string,
author = "M. Anton Ertl",
title = "Ausgabe in Strings",
note = "Vortrag bei der Forth-Tagung 2011",
URL = "http://forth-ev.de/filemgmt/visit.php?lid=351",
year = "2011",
}
@InProceedings{ertl&kuehling10,
author = "M. Anton Ertl and David K{\"u}hling",
title = "\texttt{ABI-CODE}: Increasing the portability of
assembly language words",
booktitle = "26th EuroForth Conference",
year = "2010",
pages = "5--14",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef10/papers/ertl.pdf",
OPTnote = "refereed",
abstract = "Code words are not portable between Forth systems,
even on the same architecture; worse, in the case of
Gforth, they are not even portable between different
engines nor between different installations. We propose
a new mechanism for interfacing to assembly code:
\texttt{abi-code} words are written to comply with the
calling conventions (ABI) of the target platform, which
does not change between Forth systems. In the trade-off
between performance and portability, \texttt{abi-code}
provides a new option between code words and colon
definitions. Compared to \texttt{code} words, the
\texttt{abi-code} mechanism incurs an overhead of 16
instructions on AMD64. Compared to colon definitions,
we achieved a speedup by a factor of 1.27 on an
application by rewriting one short colon definition as
an abi-code word.",
}
(with Abstract)
PDF
@InProceedings{wodni10,
author = "Gerald Wodni and M. Anton Ertl",
title = "The Forth Net",
booktitle = "26th EuroForth Conference",
year = "2010",
pages = "68--69",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef10/papers/wodni.pdf",
OPTnote = "not refereed",
abstract = "CPAN and PECL are impressive ways of sharing custom
libraries. Projects are discussed, hosted and
downloaded. Their dependencies are clear (no need to
search across the web) and also downloaded at once.
There is no such web portal for Forth --- until now.",
}
(with Abstract)
PDF
@Unpublished{ertl09ft1,
author = "M. Anton Ertl",
title = "Neuigkeiten in Gforth 0.7.0",
note = "Talk given at Forth-Tagung 2009",
year = "2009",
URL = "http://www.complang.tuwien.ac.at/papers/ertl09ft1.pdf",
}
@Unpublished{ertl09ft2,
author = "Gerald Wodni and M. Anton Ertl",
title = "Neuigkeiten seit Gforth 0.7.0",
note = "Talk given at Forth-Tagung 2009",
year = "2009",
URL = "http://www.complang.tuwien.ac.at/papers/ertl09ft2.pdf",
}
@Article{ertl09vd3a,
author = "M. Anton Ertl",
title = "Gforth~0.7.0",
journal = "Vierte Dimension",
year = "2009",
volume = "25",
number = "3",
pages = "13--14",
OPTnote = "not refereed",
}
@Article{ertl09vd3b,
author = "M. Anton Ertl",
title = "Forth200x --- Berichte von den
Standardisierungstreffen",
journal = "Vierte Dimension",
year = "2009",
volume = "25",
number = "3",
pages = "22",
OPTnote = "not refereed",
}
@Unpublished{ertl09autrans,
author = "M. Anton Ertl",
title = "Domination-Based Scoping and Static Single Assignment
Languages",
note = "Talk given at Static Single-Assignment Form Seminar in
Autrans, France",
month = apr,
year = "2009",
slidesurl = "http://www.prog.uni-saarland.de/ssasem/talks/Anton.Ertl.pdf",
abstract = "SSA form makes data flow easier to understand. This is
not just an advantage for the compiler, but can also be
an advantage for the programmer. One style of using
local variables in Forth is to program in
single-assignment style. Based on these experiences, we
also designed an experimental programming language with
a more conventional syntax that enforces static single
assignments, and collected some experience with it. One
consequence of this approach is a new scoping regime
based on domination; in many cases domination-based
scoping is mostly equvalent to classical block-based
scoping, but there are also cases where there is a
difference.",
}
(2 dupl. with Abstract and URL)
@InProceedings{ertl09kps,
author = "M. Anton Ertl",
title = "Utilizing Multiple Hardware Threads with Pipeline
Parallelism",
year = "2009",
booktitle = "{15. Kolloquium Programmiersprachen und Grundlagen der
Programmierung (KPS '09)}",
editor = "Jens Knoop and Adrian Prantl",
pages = "69--75",
URL = "http://www.complang.tuwien.ac.at/kps09/pdfs/ertl.pdf",
slidesurl = "http://www.complang.tuwien.ac.at/kps09/pdfs/vortraege/kps09_ertl.pdf",
abstract = "In recent years general-purpose CPUs have aquired
multiple hardware threads by providing multiple cores
per CPU (multi-core) and multiple hardware threads per
core (simultaneous multi-threading). Making good use of
such resources has been a challenge for several decades
that has been successfully attacked for scientific
applications, but not to a significant extent for
general-purpose applications. The main current paradigm
for this programming problem is to have explicit
threads that share memory and are synchronized by a
variety of synchronzation constructs. Unfortunately, it
seems to be too hard to program profitably in this
paradigm for general-purpose applications. Pipeline
parallelism is a programming paradigm that has proved
so easy to understand that shell programmers use it
even on machines that have only one hardware thread. In
this work we present the case for better support for
pipeline parallelism in programming languages, and
present ideas on how to improve the scalability of the
implementation of pipeline parallelism.",
}
(with Abstract)
PDF
@InProceedings{ertl09euroforth,
author = "M. Anton Ertl",
title = "A Look at {G}forth Performance",
booktitle = "25th EuroForth Conference",
year = "2009",
pages = "23--31",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef09/papers/ertl.pdf",
slidesurl = "http://www.complang.tuwien.ac.at/anton/euroforth/ef09/papers/ertl-slides.pdf",
OPTnote = "not refereed",
abstract = "Gforth used to be an traditional threaded-code system.
In the last decade we integrated a number of
performance features into Gforth. Several of them were
evaluated individually, but an evaluation with a more
global perspective has been missing until now. This
paper fills this void: We have measured the performance
of Gforth releases from 0.5.0 to 0.7.0, on a wide
variety of machines, and employing a wide variety of
GCC versions for compiling Gforth. We present that data
and give explanations for the performance
differences.",
}
(with Abstract)
PDF
@Article{shi+08,
author = "Yunhe Shi and Kevin Casey and M. Anton Ertl and David
Gregg",
title = "Virtual machine showdown: Stack versus registers",
journal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
year = "2008",
volume = "4",
number = "4",
pages = "21:1--21:36",
month = jan,
URL = "http://doi.acm.org/10.1145/1328195.1328197",
abstract = "Virtual machines (VMs) enable the distribution of
programs in an architecture-neutral format, which can
easily be interpreted or compiled. A long-running
question in the design of VMs is whether a stack
architecture or register architecture can be
implemented more efficiently with an interpreter. We
extend existing work on comparing virtual stack and
virtual register architectures in three ways. First,
our translation from stack to register code and
optimization are much more sophisticated. The result is
that we eliminate an average of more than 46\% of
executed VM instructions, with the bytecode size of the
register machine being only 26\% larger than that of
the corresponding stack one. Second, we present a fully
functional virtual-register implementation of the Java
virtual machine (JVM), which supports Intel, AMD64,
PowerPC and Alpha processors. This register VM supports
inline-threaded, direct-threaded, token-threaded, and
switch dispatch. Third, we present experimental results
on a range of additional optimizations such as register
allocation and elimination of redundant heap loads. On
the AMD64 architecture the register machine using
switch dispatch achieves an average speedup of 1.48
over the corresponding stack machine. Even using the
more efficient inline-threaded dispatch, the register
VM achieves a speedup of 1.15 over the equivalent
stack-based VM.",
}
(5 dupl. with Abstract and URL)
DOI resolver (ACM)
@InProceedings{ertl08euroforth,
author = "M. Anton Ertl",
title = "Cleaning up After Yourself",
booktitle = "24th EuroForth Conference",
year = "2008",
pages = "35--38",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth/ef08/papers/ertl.pdf",
psurl = "http://www.complang.tuwien.ac.at/papers/ertl08euroforth.ps.gz",
OPTnote = "not refereed",
abstract = "Performing cleanup actions such as restoring a
variable or closing a file used to be impossible to
guarantee in Forth before Forth-94 gave us
\code{catch}. Even with \code{catch}, the cleanup code
can be skipped due to user interrupts if you are
unlucky. We introduce a construct that guarantees that
the cleanup code is always completed. We also discuss a
cheaper implementation approach for cleanup code than
using a full exception frame.",
}
(with Abstract)
PDF
@Unpublished{ertl08ft,
author = "M. Anton Ertl",
title = "Die Multicore-Herausforderung",
note = "Talk given at Forth-Tagung 2008",
URL = "http://www.complang.tuwien.ac.at/papers/ertl08ft.pdf",
year = "2008",
}
@Unpublished{ertl08dagstuhl,
author = "M. Anton Ertl",
title = "Using C for the Back End",
note = "Talk given at Dagstuhl Seminar on Emerging Uses and
Paradigms for Dynamic Binary Translation (08441)",
year = "2008",
slides-url = "http://www.dagstuhl.de/Materials/Files/08/08441/08441.ErtlAnton.Slides.pdf",
abstract = "If we want to implement a translator easily and
portably, but with with good code quality, then
translating through C is a good option. While C is a
static language, we can also use this technique for a
dynamic translator with the help of a dynamic linker.
The disadvantage of this approach is the large startup
time; this disadvantage can be partially overcome by
caching, batching, and seeding the cache. Some
challenges for this technique in binary translation are
modeling the (arbitrary) control flow in C and the
compilation granularity of C. One example of using
dynamic translation through C, although not in a binary
translation context is the implementation of a foreign
function interface.",
}
(with Abstract)
PDF
@InProceedings{ertl07euroforth,
author = "M. Anton Ertl",
title = "Gforth's libcc {C} Function Call Interface",
booktitle = "23rd EuroForth Conference",
year = "2007",
editor = "M. Anton Ertl",
pages = "7--11",
URL = "http://www.complang.tuwien.ac.at/papers/ertl07euroforth.ps.gz",
pdfurl = "http://www.complang.tuwien.ac.at/anton/euroforth2007/papers/ertl.pdf",
OPTnote = "not refereed",
abstract = "A major problem in our earlier proposal for a C
interface was that a part of the interface was not
portable between platforms. The libcc interface solves
this problem by using a C compiler and its
\code{.h}-files. The \code{.h}-files contain knowledge
about the specific platform, and the C compiler
automatically inserts the necessary conversions between
Forth and C types. In this paper we describe the libcc
implementation and interface. We also discuss how a
Forth-C interface might be standardized.",
}
(with Abstract)
gzipped Postscript
@Proceedings{euroforth07,
title = "23rd EuroForth Conference",
booktitle = "23rd EuroForth Conference",
year = "2007",
key = "EuroForth'07",
editor = "M. Anton Ertl",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth2007/papers/proceedings.pdf",
}
@Article{casey+07toplas,
author = "Kevin Casey and M. Anton Ertl and David Gregg",
title = "Optimizing indirect branch prediction accuracy in
virtual machine interpreters",
journal = "ACM Transactions on Programming Languages and
Systems",
year = "2007",
volume = "29",
number = "6",
pages = "37:1--37:36",
month = oct,
URL = "http://doi.acm.org/10.1145/1286821.1286828",
abstract = "Interpreters designed for efficiency execute a huge
number of indirect branches and can spend more than
half of the execution time in indirect branch
mispredictions. Branch target buffers (BTBs) are the
most widely available form of indirect branch
prediction; however, their prediction accuracy for
existing interpreters is only 2\%--50\%. In this
article we investigate two methods for improving the
prediction accuracy of BTBs for interpreters:
replicating virtual machine (VM) instructions and
combining sequences of VM instructions into
superinstructions. We investigate static (interpreter
build-time) and dynamic (interpreter runtime) variants
of these techniques and compare them and several
combinations of these techniques. To show their
generality, we have implemented these optimizations in
VMs for both Java and Forth. These techniques can
eliminate nearly all of the dispatch branch
mispredictions, and have other benefits, resulting in
speedups by a factor of up to 4.55 over efficient
threaded-code interpreters, and speedups by a factor of
up to 1.34 over techniques relying on dynamic
superinstructions alone.",
}
(4 dupl. with Abstract and URL)
DOI resolver (ACM)
@Article{ertl07vd1,
author = "M. Anton Ertl",
title = "Forth 200x -- Treffen auf der EuroForth 2006",
journal = "Vierte Dimension",
year = "2007",
volume = "23",
number = "1",
pages = "34",
URL = "http://www.forth-ev.de/filemgmt/visit.php?lid=133",
OPTnote = "not refereed",
}
@Article{ertl07vd2a,
author = "M. Anton Ertl",
title = "Factor, Postscript, und Forth: Ein kleiner Vergleich",
journal = "Vierte Dimension",
year = "2007",
volume = "23",
number = "2",
pages = "10--12",
OPTnote = "not refereed",
}
@Article{ertl07vd2b,
author = "M. Anton Ertl",
title = "Der Forth-Stammbaum",
journal = "Vierte Dimension",
year = "2007",
volume = "23",
number = "2",
pages = "15--18",
OPTnote = "not refereed",
}
@Article{ertl06vd,
author = "M. Anton Ertl",
title = "Bericht von der EuroForth 2005",
journal = "Vierte Dimension",
year = "2006",
volume = "22",
number = "1",
pages = "27",
month = jan,
OPTnote = "not refereed",
}
@Article{ertl06vd2,
author = "M. Anton Ertl",
title = "Ank{\"u}ndigung EuroForth 2006",
journal = "Vierte Dimension",
year = "2006",
volume = "22",
number = "2",
pages = "8",
month = apr,
OPTnote = "not refereed",
}
@InProceedings{burgstaller+06,
author = "Bernd Burgstaller and Bernhard Scholz and Anton Ertl",
title = "An Embedded Systems Programming Environment for {C}",
booktitle = "Euro-Par 2006",
pages = "1204--1216",
year = "2006",
volume = "4128",
series = "LNCS",
publisher = "Springer",
ISBN = "3-540-37783-2",
isbn13 = "978-3-540-37783-2",
abstract = "Resource constraints are a major concern with the
design, development, and deployment of embedded
systems. Embedded systems are highly hardware-dependent
and have little computational power. Mobile embedded
systems are further constrained by their limited
battery capacity. Many of these systems are still
programmed in assembly language because there is a lack
of efficient programming environments.\par To overcome
or at least alleviate the restrictions, we propose a
light-weight and versatile programming environment for
the C programming language that offers mixed-mode
execution, i.e., code is either executed on the CPU or
on a virtual machine (VM). This mixed-mode execution
environment combines the advantages of highly
compressed bytecode with the speed of machine code.\par
We have implemented the programming environment and
conducted experiments for selected programs of the
MiBench suite and the Spec~2000. The VM has a footprint
of 12~KB on the Intel~IA32. Initial results show that
the performance of the virtual machine is typically
only 2 to 36 times slower than the binary execution,
with compressed code occupying only 36\%--57\% of the
machine code size. Combining sequences of VM
instructions into new VM instructions
(superinstructions) increases the execution speed and
reduces the VM code size. Preliminary experiments
indicate a speedup by a factor of 3.",
}
(3 dupl. with Abstract)
Try KVK
@Unpublished{ertl06forth-tagung1,
author = "M. Anton Ertl",
title = "Forth 200x",
note = "Talk given at Forth-Tagung 2006 in Witten",
month = may,
year = "2006",
URL = "http://www.forth200x.org/",
}
@Article{ertl+06dotnet,
author = "M. Anton Ertl and Christian Thalinger and Andreas
Krall",
title = "Superinstructions and Replication in the {Cacao} {JVM}
interpreter",
journal = "Journal of .NET Technologies",
year = "2006",
volume = "4",
pages = "25--32",
note = "Journal papers from \emph{{.NET} Technologies 2006}
conference",
URL = "http://www.complang.tuwien.ac.at/papers/ertl+06dotnet.ps.gz",
issueurl = "http://dotnet.zcu.cz/NET_2006/Papers_2006/2006_Vol_4.pdf",
ISBN = "80-86943-13-5",
ISSN = "1801-2108",
abstract = "Dynamic superinstructions and replication can provide
large speedups over plain interpretation. In a JVM
implementation we have to overcome two problems to
realize the full potential of these optimizations: the
conflict between superinstructions and the quickening
optimization; and the non-relocatability of JVM
instructions that can throw exceptions. In this paper,
we present solutions for these problems. We also
present empirical results: We see speedups of up to a
factor of 4 on SpecJVM98 benchmarks from
superinstructions with all these problems solved. The
contribution of making potentially throwing JVM
instructions relocatable is up to a factor of 2. A
simple way of dealing with quickening instructions is
good enough, if superinstructions are generated in JIT
style. Replication has little effect on performance.",
}
(with Abstract)
gzipped Postscript
@Unpublished{ertl+06linkoeping1,
author = "M. Anton Ertl",
title = "Fast and Flexible Instruction Selection with On-Demand
Tree-Parsing Automata",
note = "Talk given at \emph{Mini-Workshop on Code Generation}
in Link{\"o}ping, June 7--8",
year = "2006",
URL = "http://www.complang.tuwien.ac.at/papers/ertl+06pldi.ps.gz",
}
@Unpublished{ertl+06linkoeping2,
author = "M. Anton Ertl",
title = "Superinstructions and Replication in the {Cacao} {JVM}
interpreter",
note = "Talk given at \emph{Mini-Workshop on Code Generation}
in Link{\"o}ping, June 7--8",
year = "2006",
URL = "http://www.complang.tuwien.ac.at/papers/ertl+06dotnet.ps.gz",
}
@InProceedings{ertl+06pldi,
author = "M. Anton Ertl and Kevin Casey and David Gregg",
title = "Fast and Flexible Instruction Selection with On-Demand
Tree-Parsing Automata",
booktitle = "SIGPLAN Conference on Programming Language Design and
Implementation (PLDI '06)",
ISBN = "1-59593-320-4",
pages = "52--60",
year = "2006",
URL = "http://www.complang.tuwien.ac.at/papers/ertl+06pldi.ps.gz",
abstract = "Tree parsing as supported by code generator generators
like BEG, burg, iburg, lburg and ml-burg is a popular
instruction selection method. There are two existing
approaches for implementing tree parsing: dynamic
programming, and tree-parsing automata; each approach
has its advantages and disadvantages. We propose a new
implementation approach that combines the advantages of
both existing approaches: we start out with dynamic
programming at compile time, but at every step we
generate a state for a tree-parsing automaton, which is
used the next time a tree matching the state is found,
turning the instruction selector into a fast
tree-parsing automaton. We have implemented this
approach in the Gforth code generator. The
implementation required little effort and reduced the
startup time of Gforth by up to a factor of 2.5.",
}
(3 dupl. with Abstract and URL)
gzipped Postscript
Try KVK
@InProceedings{ertl06,
author = "M. Anton Ertl",
title = "A Portable {C} Function Call Interface",
booktitle = "22nd EuroForth Conference",
year = "2006",
editor = "M. Anton Ertl",
pages = "47--51",
URL = "http://www.complang.tuwien.ac.at/papers/ertl06.ps.gz",
pdfurl = "http://www.complang.tuwien.ac.at/anton/euroforth2006/papers/ertl.pdf",
OPTnote = "not refereed",
abstract = "Many Forth systems provide means to call C functions,
but these interfaces are not designed to be portable
between platforms: A call to a C library function that
works on one platform may fail on the next platform,
because the parameter and return value types of the C
function may be different. In this paper, we present an
interface that avoids this problem: In particular, the
actual calls can be made platform-independent; a part
of the declarations is platform-dependent, but can be
generated automatically from C .h-files.",
}
(with Abstract)
gzipped Postscript
@Article{ertl06vd4,
author = "M. Anton Ertl",
title = "Einladung zur Forth-Tagung 2007",
journal = "Vierte Dimension",
year = "2006",
volume = "22",
number = "4",
pages = "36",
month = dec,
OPTnote = "not refereed",
}
@InProceedings{ertl&gregg05,
author = "M. Anton Ertl and David Gregg",
title = "Stack Caching in {Forth}",
booktitle = "21st EuroForth Conference",
year = "2005",
editor = "M. Anton Ertl",
pages = "6--15",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26gregg05.ps.gz",
pdfurl = "http://www.complang.tuwien.ac.at/anton/euroforth2005/papers/ertl%26gregg05.pdf",
OPTnote = "not refereed",
abstract = "Stack caching speeds Forth up by keeping stack items
in registers, reducing the number of memory accesses
for stack items. This paper describes our work on
extending Gforth's stack caching implementation to
support more than one register in the canonical state,
and presents timing results for the resulting Forth
system. For single-representation stack caches, keeping
just one stack item in registers is usually best, and
provides speedups up to a factor of 2.84 over the
straight-forward stack representation. For stack caches
with multiple stack representations, using the
one-register representation as canonical representation
is usually optimal, resulting in an overall speedup of
up to a factor of 3.80 (and up to a factor of 1.53 over
single-representation stack caching).",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl&paysan05,
author = "M. Anton Ertl and Bernd Paysan",
title = "Xchars or {Unicode} in {Forth}",
booktitle = "21st EuroForth Conference",
year = "2005",
editor = "M. Anton Ertl",
pages = "16--20",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26paysan05.ps.gz",
pdfurl = "http://www.complang.tuwien.ac.at/anton/euroforth2005/papers/ertl%26paysan05.pdf",
OPTnote = "not refereed",
abstract = "When dealing with different scripts at the same time
(e.g., Latin, Greek, Cyrillic), or with Chinese
ideograms, 8-bit fixed-width characters are too narrow.
However, many Forth programs have an environmental
dependency on $\code{1 chars}=1$, so just making Forth
characters wider would cause quite a lot of portability
problems. We propose to add xchars for dealing with
potentially wider, variable-width characters. This
extension is relatively painless, requiring changes in
only those program parts that work with individual
characters, if they should work with the extended
characters; uses of string words need no changes to
work with extended characters. The xchar words can also
be implemented on 8-bit-only Forth systems, so programs
written to use xchars can also work on such systems.",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{shi+05,
author = "Yunhe Shi and David Gregg and Andrew Beatty and M.
Anton Ertl",
title = "Virtual Machine Showdown: Stack Versus Registers",
booktitle = "Virtual Execution Environments (VEE '05)",
pages = "153--163",
year = "2005",
URL = "http://www.cs.tcd.ie/David.Gregg/papers/vee05-ShiGreggBeattyErtl.pdf",
}
@Article{ertl05ivme-editorial,
author = "M. Anton Ertl",
title = "Advances in Interpreters, Virtual Machines, and
Emulators",
journal = "Science of Computer Programming",
year = "2005",
volume = "57",
number = "3",
pages = "251--252",
month = sep,
note = "Editorial for special issue with journal versions of
the IVME'03 papers.",
}
@InProceedings{ertl&gregg04ivme,
author = "M. Anton Ertl and David Gregg",
title = "Combining Stack Caching with Dynamic
Superinstructions",
booktitle = "Interpreters, Virtual Machines and Emulators (IVME
'04)",
year = "2004",
pages = "7--14",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26gregg04ivme.ps.gz",
abstract = "Dynamic superinstructions eliminate most of the
interpreter dispatch overhead. This results in a higher
proportion of interpreter time spent in stack accesses
(on a stack-based virtual machine). Stack caching
reduces the stack access overhead. Each of these
optimizations provides more speedup, if the other one
is applied, too. Combining these optimizations also
opens additional opportunities: we can insert stack
state transitions without dispatch cost; this reduces
the number of necessary VM instruction instances
significantly. A shortest-path search can find the
optimal sequence of state transitions and VM
instructions. In this paper we describe an
implementation of static stack caching employing these
ideas. We also represent empirical results for our
implementation, resulting in a speedup of up to 58\%
over a version that keeps one value in registers all
the time.",
}
(with Abstract)
gzipped Postscript
@InProceedings{ertl&gregg04pact,
author = "M. Anton Ertl and David Gregg",
title = "Retargeting {JIT} compilers by using {C}-compiler
generated executable code",
booktitle = "Parallel Architecture and Compilation Techniques
(PACT' 04)",
year = "2004",
pages = "41--50",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26gregg04pact.ps.gz",
abstract = "JIT compilers produce fast code, whereas interpreters
are easy to port between architectures. We propose to
combine the advantages of these language implementation
techniques as follows: we generate native code by
concatenating and patching machine code fragments taken
from interpreter-derived code (generated by a C
compiler); we completely eliminate the interpreter
dispatch overhead and accesses to the interpreted code
by patching jump target addresses and other constants
into the fragments. In this paper we present the basic
idea, discuss some issues in more detail, and present
results from a proof-of-concept implementation,
providing speedups of up to 1.87 over the fastest
previous interpreter-based technique, and performance
comparable to simple native-code compilers. The effort
required for retargeting our implementation from the
386 to the PPC architecture was less than a
person-day.",
}
(3 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl04euroforth,
author = "M. Anton Ertl",
title = "Forth Family Tree and Timeline",
booktitle = "EuroForth 2004 Conference Proceedings",
pages = "1--4",
year = "2004",
OPTnote = "not refereed",
URL = "http://www.complang.tuwien.ac.at/forth/family-tree/",
}
@InProceedings{gregg&ertl04euroforth,
author = "David Gregg and M. Anton Ertl",
title = "Inlining in {Gforth}: Early Experiences",
booktitle = "EuroForth 2004 Conference Proceedings",
pages = "33--40",
year = "2004",
URL = "http://www.complang.tuwien.ac.at/papers/gregg%26ertl04euroforth.ps.gz",
OPTnote = "not refereed",
abstract = "Many optimizations are easier or more effective for
straight-line code (basic blocks). Straight-line code
in Forth is limited mainly by calls and returns.
Inlining eliminates calls and returns, which in turn
makes the basic blocks longer, and increases the
effectiveness of other optimizations. In this paper we
present a first prototype implementation of ininlining
for Gforth.",
}
(with Abstract)
gzipped Postscript
@InCollection{gregg&ertl04dspg,
author = "David Gregg and M. Anton Ertl",
title = "A Language and Tool for Generating Efficient Virtual
Machine Interpreters",
booktitle = "Domain-Specific Program Generation",
pages = "196--215",
publisher = "Springer",
year = "2004",
series = "LNCS 3016",
}
@Article{ertl03vd,
author = "M. Anton Ertl",
title = "Threaded Code -- Varianten und Optimierungen
(Kurzfassung)",
journal = "Vierte Dimension -- Das FORTH-Magazin",
year = "2003",
volume = "19",
number = "1",
pages = "12--15",
annote = "A shortened, German version of \cite{ertl02}",
}
@Unpublished{ertl03dagstuhl,
author = "M. Anton Ertl",
title = "Optimizing Interpreters",
note = "Talk given at the Dagstuhl Seminar 3071: Emerging
Technologies: Can Optimization Technology meet their
Demands?",
year = "2003",
month = feb,
}
@InProceedings{ertl&gregg03,
author = "M. Anton Ertl and David Gregg",
title = "Optimizing Indirect Branch Prediction Accuracy in
Virtual Machine Interpreters",
booktitle = "SIGPLAN Conference on Programming Language Design and
Implementation (PLDI'03)",
year = "2003",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26gregg03.ps.gz",
abstract = "Interpreters designed for efficiency execute a huge
number of indirect branches and can spend more than
half of the execution time in indirect branch
mispredictions. Branch target buffers are the best
widely available\mn{on all recent general-purpose
machines?} form of indirect branch prediction; however,
their prediction accuracy for existing interpretes is
only 2\%--50\%. In this paper we investigate two
methods for improving the prediction accuracy of BTBs
for interpreters: replicating virtual machine (VM)
instructions and combining sequences of VM instructions
into superinstructions. We investigate static
(interpreter build-time) and dynamic (interpreter
run-time) variants of these techniques and compare them
and several combinations of these techniques. These
techniques can eliminate nearly all of the dispatch
branch mispredictions, and have other benefits,
resulting in speedups by a factor of up to 3.17 over
efficient threaded-code interpreters, and speedups by a
factor of up to 1.3 over techniques relying on
superinstructions alone.",
}
(4 dupl. with Abstract and URL)
gzipped Postscript
@Proceedings{ivme03,
title = "Interpreters, Virtual Machines and Emulators
(IVME~'03)",
booktitle = "Interpreters, Virtual Machines and Emulators
(IVME~'03)",
year = "2003",
key = "IVME~'03",
editor = "M. Anton Ertl",
URL = "http://www.complang.tuwien.ac.at/anton/ivme03/proceedings/ivme.ps.gz",
url2 = "http://portal.acm.org/toc.cfm?id=858570&type=proceeding",
}
(2 dupl. with URL)
gzipped Postscript
@InProceedings{ertl&gregg03euroforth,
author = "M. Anton Ertl and David Gregg",
title = "Implementation Issues for Superinstructions in
{Gforth}",
booktitle = "EuroForth 2003 Conference Proceedings",
year = "2003",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26gregg03euroforth.ps.gz",
abstract = "Combining Forth primitives into superinstructions
provides nice speedups. Several approaches to
superinstructions were explored in the Gforth project.
This paper discusses the effects of these approaches on
performance, compilation time, implementation effort,
and on programming tools such as the decompiler and
backtracing.",
}
(with Abstract)
gzipped Postscript
@Article{ertl&gregg03jilp,
author = "M. Anton Ertl and David Gregg",
title = "The Structure and Performance of \emph{Efficient}
Interpreters",
journal = "The Journal of Instruction-Level Parallelism",
year = "2003",
volume = "5",
month = nov,
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26gregg03jilp.ps.gz",
url2 = "http://www.jilp.org/vol5/v5paper12.pdf",
note = "http://www.jilp.org/vol5/",
abstract = "Interpreters designed for high general-purpose
performance typically perform a large number of
indirect branches (3.2\%--13\% of all executed
instructions in our benchmarks). These branches consume
more than half of the run-time in a number of
configurations we simulated. We evaluate how accurate
various existing and proposed branch prediction schemes
are on a number of interpreters, how the mispredictions
affect the performance of the interpreters and how two
different interpreter implementation techniques perform
with various branch predictors. We also suggest various
ways in which hardware designers, C compiler writers,
and interpreter writers can improve the performance of
interpreters.",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{casey+03,
author = "Kevin Casey and David Gregg and M. Anton Ertl and
Andrew Nisbeth",
title = "Towards Superinstructions for Java Interpreters",
booktitle = "Software and Compilers for Embedded Systems (SCOPES
2003)",
pages = "329--343",
year = "2003",
volume = "2826",
series = "LNCS",
publisher = "Springer",
}
@Manual{gforth03,
title = "Gforth (Manual)",
author = "Neal Crook and Anton Ertl and David Kuehling and Bernd
Paysan and Jens Wilke",
organization = "Free Software Foundation",
edition = "0.6.2",
year = "2003",
}
@Article{ertl+02,
author = "M. Anton Ertl and David Gregg and Andreas Krall and
Bernd Paysan",
title = "\textsf{vmgen} --- A Generator of Efficient Virtual
Machine Interpreters",
journal = "Software---Practice and Experience",
year = "2002",
volume = "32",
number = "3",
pages = "265--294",
URL = "http://www.complang.tuwien.ac.at/papers/ertl+02.ps.gz",
abstract-url = "http://www3.interscience.wiley.com/cgi-bin/abstract/90010508/START",
keywords = "interpreter; virtual machine; generator; stack
architecture; superinstruction; byte code",
abstract = "In a virtual machine interpreter, the code for each
virtual machine instruction has similarities to code
for other instructions. We present an interpreter
generator that takes simple virtual machine instruction
descriptions as input and generates C code for
processing the instructions in several ways: execution,
virtual machine code generation, disassembly, tracing,
and profiling. The generator is designed to support
efficient interpreters: it supports threaded code,
caching the top-of-stack item in a register, combining
simple instructions into superinstructions, and other
optimizations. We have used the generator to create
interpreters for Forth and Java. The resulting
interpreters are faster than other interpreters for the
same languages and they are typically 2-10 times slower
than code produced by native-code compilers. We also
present results for the effects of the individual
optimizations supported by the generator.",
}
(5 dupl. with Abstract and URL)
gzipped Postscript
Unknown Format
@InProceedings{ertl&gregg02,
author = "M. Anton Ertl and David Gregg",
title = "Building an interpreter with \textsf{vmgen}",
booktitle = "Compiler Construction (CC'02)",
pages = "5--8",
year = "2002",
publisher = "Springer LNCS~2304",
note = "Tool Demonstration",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26gregg02.ps.gz",
abstract = "\textsf{Vmgen} automates many of the tasks of writing
the virtual machine part of an interpreter, resulting
in less coding, debugging and maintenance effort. This
paper gives some quantitative data about the source
code and generated code for a \textsf{vmgen}-based
interpreter, and gives some examples demonstrating the
simplicity of using \textsf{vmgen}.",
}
(3 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl02,
author = "M. Anton Ertl",
title = "Threaded Code Variations and Optimizations (Extended
Version)",
booktitle = "Forth-Tagung 2002",
year = "2002",
address = "Garmisch-Partenkirchen",
URL = "http://www.complang.tuwien.ac.at/papers/ertl02.ps.gz",
abstract = "Forth has been traditionally implemented as indirect
threaded code, where the code for non-primitives is the
code-field address of the word. To get the maximum
benefit from combining sequences of primitives into
superinstructions, the code produced for a
non-primitive should be a primitive followed by a
parameter (e.g., \code{lit} \emph{addr} for variables).
This paper takes a look at the steps from a traditional
threaded-code implementation to superinstructions, and
at the size and speed effects of the various
steps.\comment{It also compares these variants of
Gforth to various other Forth implementations on
contemporary machines.} The use of superinstructions
gives speedups of up to a factor of 2 on large
benchmarks on processors with branch target buffers,
but requires more space for the primitives and the
optimization tables, and also a little more space for
the threaded code.",
}
(with Abstract)
gzipped Postscript
@InProceedings{ertl02ef,
author = "M. Anton Ertl",
title = "The Evolution of Vmgen",
booktitle = "18th EuroForth Conference",
year = "2002",
editor = "M. Anton Ertl",
pages = "33--37",
URL = "http://www.complang.tuwien.ac.at/anton/euroforth2002/papers/ertl.ps.gz",
note = "Slides",
}
(2 dupl. with URL)
gzipped Postscript
@InProceedings{ertl02efb,
author = "M. Anton Ertl",
title = "Superinstructions in {Gforth}",
booktitle = "18th EuroForth Conference",
year = "2002",
editor = "M. Anton Ertl",
note = "Demonstration only, no paper",
}
@Proceedings{euroforth02,
title = "18th EuroForth Conference",
booktitle = "18th EuroForth Conference",
year = "2002",
key = "EuroForth'02",
editor = "M. Anton Ertl",
}
@InProceedings{gregg+01,
author = "David Gregg and M.~Anton Ertl and Andreas Krall",
title = "A Fast {Java} Interpreter",
booktitle = "JOSES Workshop at ETAPS'01",
year = "2001",
editor = "Uwe Assmann",
URL = "http://www.complang.tuwien.ac.at/papers/gregg+01.ps.gz",
abstract = "The Java virtual machine (JVM) is usually implemented
with an interpreter or just-in-time (JIT) compiler.
JITs provide the best performance, but must be
substantially rewritten for each architecture they are
ported to. Interpreters are easier to develop and
maintain, need less memory and can be ported to new
architectures with almost no changes. The weakness of
interpreters is that they are much slower than JITs.
This paper describes work in progress on faster Java
interpreters. Our goal is to bring interpreter
performance to a new higher level by developing new
optimisations for modern computer architectures, and by
adapting recent research on compilers to
interpreters.",
}
(with Abstract)
gzipped Postscript
@InProceedings{ertl&gregg01,
author = "M.~Anton Ertl and David Gregg",
title = "The Behaviour of Efficient Virtual Machine
Interpreters on Modern Architectures",
booktitle = "Euro-Par 2001",
pages = "403--412",
year = "2001",
publisher = "Springer LNCS~2150",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26gregg01.ps.gz",
abstract = "Romer et al (ASPLOS 96) examined several interpreters
and concluded that they behave much like general
purpose integer programs such as gcc. We show that
there is an important class of interpreters which
behave very differently. Efficient virtual machine
interpreters perform a large number of indirect
branches (3.2\%--13\% of all executed instructions in
our benchmarks, taking up to 61\%-79\% of the cycles on
a machine with no branch prediction). We evaluate how
various branch prediction schemes and methods to reduce
the mispredict penalty affect the performance of
several virtual machine interpreters. Our results show
that for current branch predictors, threaded code
interpreters cause fewer mispredictions, and are almost
twice as fast as switch based interpreters on modern
superscalar architectures.",
}
(with Abstract)
gzipped Postscript
@InProceedings{gregg+01hpcn,
author = "David Gregg and M.~Anton Ertl and Andreas Krall",
title = "Implementing an Efficient {Java} Interpreter",
booktitle = "High-Performance Computing and Networking (HPCN Europe
2001)",
pages = "613--620",
year = "2001",
publisher = "Springer LNCS~2110",
URL = "http://www.complang.tuwien.ac.at/papers/gregg+01hpcn.ps.gz",
pdf-url = "http://link.springer-ny.com/link/service/series/0558/papers/2110/21100613.pdf",
abstract = "The Java virtual machine (JVM) is usually implemented
with an interpreter or just-in-time (JIT) compiler. JIT
compilers provide the best performance, but must be
substantially rewritten for each architecture they are
ported to. Interpreters are easier to develop and
maintain, and can be ported to new architectures with
almost no changes. The weakness of interpreters is that
they are much slower than JIT compilers. This paper
describes work in progress on a highly efficient Java
interpreter. We describe the main features that make
our interpreter efficient. Our initial experimental
results show that an interpreter-based JVM may be only
1.9 times slower than a compiler-based JVM for some
important applications.",
}
(4 dupl. with Abstract and URL)
gzipped Postscript
PDF
@InProceedings{ertl01,
author = "M. Anton Ertl",
title = "Threaded Code Variations and Optimizations",
booktitle = "EuroForth 2001 Conference Proceedings",
pages = "49--55",
year = "2001",
URL = "http://www.complang.tuwien.ac.at/papers/ertl01.ps.gz",
abstract = "Forth has been traditionally implemented as indirect
threaded code, where the code for non-primitives is the
code-field address of the word. To get the maximum
benefit from combining sequences of primitives into
superinstructions, the code produced for a
non-primitive should be a primitive followed by a
parameter (e.g., \code{lit} \emph{addr} for variables).
This paper takes a look at the steps from a traditional
threaded-code implementation to superinstructions, and
at the size and speed effects of the various steps. The
use of superinstructions gives speedups of up to a
factor of 2 on large benchmarks on processors with
branch target buffers, but requires more space for the
primitives and the optimization tables, and also a
little more space for the threaded code.",
}
(with Abstract)
gzipped Postscript
@InProceedings{gregg+01euroforth,
author = "David Gregg and M. Anton Ertl and John Waldron",
title = "The Common Case in {Forth} Programs",
booktitle = "EuroForth 2001 Conference Proceedings",
pages = "63--70",
year = "2001",
url1 = "http://www.cs.tcd.ie/David.Gregg/euroforth01.ps.gz",
URL = "http://www.complang.tuwien.ac.at/papers/gregg+01euroforth.ps.gz",
abstract = "Identifying common features in Forth programs is
important for those designing Forth machines and
optimisers. In this paper we measure the behaviour of
six large Forth programs and four small ones. We look
at the ratio of user to system code, basic block
lengths, common instructions, and common sequences of
instructions. Our most important finding is that for
most large programs, many (38.4\%--47.6\% statically
and 21.8\%--40.9\% dynamically) basic blocks consist of
only a single instruction, which hinders optimisation.
We also show static measures of frequent instructions
and sequences of instructions are more consistent
across programs, and may be a better predictor of the
behaviour of other programs than dynamic measures.",
}
(4 dupl. with Abstract and URL)
gzipped Postscript
@Unpublished{ertl01vhs,
author = "M. Anton Ertl",
title = "Wie werden Computer schneller?",
note = "Vortrag im Rahmen der VHS-Veranstaltungsreihe
``University meets Public''",
month = feb,
year = "2001",
URL = "http://www.complang.tuwien.ac.at/papers/ertl01vhs.ps.gz",
abstract = "Computerhardware wird regelmaessig schneller und
leistungsfaehiger. Dieser Vortrag beschreibt diese
Entwicklungen, insbesondere in Bezug auf den internen
Aufbau der Prozessoren, und ihre Auswirkungen auf die
Software.",
}
(with Abstract)
gzipped Postscript
@Unpublished{ertl01state,
author = "M. Anton Ertl",
title = "STATE-smartness: Applications, Pitfalls,
Alternatives",
note = "This paper is an updated and significantly revised
version of my EuroForth '98 paper \cite{ertl98}; I
submitted this draft version to JFAR in 2001, but it
was not processed, so I finally put it online.",
year = "2001",
URL = "http://www.complang.tuwien.ac.at/papers/ertl01state.pdf",
abstract = "STATE-smart words provide a number of unpleasant
surprises to their users. They are applied in two
contexts, and they fail in both: 1) for implementing
words like \texttt{s"} that provide an arbitrary
combination of interpretation and compilation semantics
(combined words); 2) for optimizing using a special
implementation of the (default) compilation semantics.
This paper discusses these issues and shows programmers
and system implementors how to avoid STATE-smart words.
It also reports our experiences in converting the
STATE-smart words in Gforth into a clean solution:
little work and few problems.",
}
(with Abstract)
PDF
@Unpublished{ertl00dagstuhl,
author = "M. Anton Ertl",
title = "Optimization During Tree-Parsing Code Selection",
note = "Talk given at the Dagstuhl Seminar on Code
Optimization",
year = "2000",
month = sep,
URL = "http://www.complang.tuwien.ac.at/papers/ertl00dagstuhl.ps.gz",
abstract = "Tree parsing is well-known as a method for code
selection. This talk presents a technique for also
using it for some optimizations. The basic principle is
to introduce additional nonterminals that correspond to
additional data representations. For example, a
nonterminal representing complemented values can be
used to distribute complement operations to the optimal
position (for a machine) using DeMorgan's laws. Other
examples are other unary operators, constant folding
across intervening non-constants, and optimizing the
conversion between different representations, such as
various flag representations, tagged and untagged
representations, or various data sizes. The advantages
of this technique are that it optimizes for the
machine, not some intermediate code metric and that it
has no compile-time overhead when used with tree
parsing automata. The limitations are that there are
only a finite number of nonterminals and thus data
representations, that optimality is limited to trees,
that otherwise it is limited to single-entry regions,
and that it results in large grammars. This talk also
mentions further work in factoring the grammar and in
dealing with DAGs.",
}
(with Abstract)
gzipped Postscript
@InProceedings{ertl00,
author = "M. Anton Ertl",
title = "\texttt{CONST-DOES>}",
booktitle = "EuroForth 2000 Conference Proceedings",
year = "2000",
address = "Prestbury, UK",
URL = "http://www.complang.tuwien.ac.at/papers/ertl00.ps.gz",
abstract = "A frequent use of the \code{create}...\code{does>}
construct is to provide some constants at definition
time that are then used at execution time (a
\code{constant}-style use). However,
\code{create}...\code{does>} also supports a
\code{value}-style use, where the data is not constant
and can change at any time. This additional
functionality inhibits optimization. This paper
proposes \code{const-does>}, which can only be used to
define \code{constant}-style words, and thus makes
optimization possible.",
}
(with Abstract)
gzipped Postscript
@InProceedings{czezatke&ertl00,
author = "Christian Czezatke and M. Anton Ertl",
title = "{LinLogFS} --- A Log-Structured Filesystem For
{Linux}",
booktitle = "Freenix Track of Usenix Annual Technical Conference",
pages = "77--88",
year = "2000",
URL = "http://www.complang.tuwien.ac.at/papers/czezatke%26ertl00/",
ps-url = "http://www.complang.tuwien.ac.at/papers/czezatke%26ertl00.ps.gz",
abstract = "LinLogFS is a log-structured filesystem for Linux. It
currently offers features like fast crash recovery and
in-order write semantics. We implemented LinLogFS by
putting a logging layer between an adapted version of
the ext2 file system and the block device.",
}
(3 dupl. with Abstract and URL)
HTML
gzipped Postscript
@InProceedings{ertl99,
author = "M. Anton Ertl",
title = "Optimal Code Selection in {DAG}s",
booktitle = "Principles of Programming Languages (POPL '99)",
year = "1999",
OPTpages = "242--249",
URL = "http://www.complang.tuwien.ac.at/papers/ertl99.ps.gz",
abstract = "We extend the tree parsing approach to code selection
to DAGs. In general, our extension does not produce the
optimal code selection for all DAGs (this problem would
be NP-complete), but for certain code selection
grammars, it does. We present a method for checking
whether a code selection grammar belongs to this set of
DAG-optimal grammars, and use this method to check code
selection grammars adapted from lcc: the grammars for
the MIPS and SPARC architectures are DAG-optimal, and
the code selection grammar for the 386 architecture is
almost DAG-optimal.",
}
(7 dupl. with Abstract and URL)
gzipped Postscript
@Unpublished{ertl+99,
author = "M. Anton Ertl and David Gregg and Andreas Krall",
title = "Optimal Global Instruction Scheduling with Unlimited
Resources",
note = "http://www.complang.tuwien.ac.at/papers/ertl+99.ps.gz",
URL = "http://www.complang.tuwien.ac.at/papers/ertl+99.ps.gz",
year = "1999",
abstract = "We present a method for optimal whole-procedure
instruction scheduling for machines with unlimited
resources: The program and its dependences are
transformed into a linear programming problem, which
can then be solved using an off-the-shelf linear
problem solver. This scheduler is an intermediate step
towards a more realistic global instruction scheduler,
but it has also an immediate use: We use it to evaluate
the significance of the restrictions imposed by static
scheduling and for determining an upper bound for the
performance of more realistic global instruction
schedulers. We have applied the scheduler to several
benchmarks and compared it to a dynamic scheduler with
unlimited resources. For some benchmarks, they perform
equally well; for others, dynamic scheduling performs
much better; on closer inspection it appears that the
causes for this performance difference can be reduced
by performing well-known transformations before
scheduling (in particular, loop transformations).",
}
(with Abstract)
gzipped Postscript
@InProceedings{ertl99ef,
author = "M. Anton Ertl",
title = "Is {Forth} Code Compact? {A} Case Study",
booktitle = "EuroForth'99 Conference Proceedings",
year = "1999",
address = "St. Petersburg, Russia",
URL = "http://www.complang.tuwien.ac.at/papers/ertl99ef.ps.gz",
abstract = "Forth advocates often claim that Forth code is
smaller, faster, and requires less development time
than equivalent programs in other languages. This paper
investigates this claim by comparing a number of parser
generators written in various languages with respect to
source code size. The smallest parser generator (14
lines) in this comparison is written in Forth, and the
other Forth program is smaller than the others in its
class by a factor of 8 or more; however, the Forth
programs do not have all the features of their
counterparts. I took a closer look at Gray (in Forth)
and Coco/R (in Modula-2) and found that several Forth
features missing from Modula-2 give Gray more than a
factor of three advantage over Coco/R (even if the
other size differences were solely due to differences
in functionality): run-time code generation; access to
the parser and a simple, flexible syntax; and Forth's
dictionary.",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@Unpublished{ertl99boulder,
author = "M. Anton Ertl",
title = "A fast compiler for a stack-based language",
note = "Talk given at the University of Colorado in Boulder",
year = "1999",
month = apr,
abstract = "I will present RAFTS, a Forth compiler, focussing on
aspects that are also useful in compilers for other
langauges.\parSpecific needs addressed by RAFTS are:
The source language relies on programmer-visible stacks
(like JavaVM and Postscript), it requires very fast
compilation (like e.g., Lisp, Prolog, Smalltalk, and
JavaVM) to remain interactive, and the source code
usually has a very high procedure call frequency (like,
e.g., Lisp, Prolog, ML).\par My presentation will give
a tour of the compiler, pointing out some of the
techniques that address these needs, e.g.: How convert
the stack-based code into data flow graphs, enabling
the application of standard code generation techniques;
how backwards scheduling simplifies the data structure
and saves two passes; how we minimized the number of
passes without sacrificing code quality; how a
threaded-code intermediate representation speeds up
code replication optimizations.",
}
(with Abstract)
@TechReport{ertl&pirker98,
author = "M. Anton Ertl and Christian Pirker",
title = "Compilation of Stack-Based Languages
({Abschlu{\ss}bericht})",
institution = "Institut f{\"u}r Computersprachen, Technische
Universit{\"a}t Wien",
year = "1998",
type = "Final report to {FWF} for research project {P11231}",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26pirker98.ps.gz",
abstract = "RAFTS is a framework for applying state of the art
compiler technology to the compilation of stack-based
languages like Forth and Postscript. The special needs
of stack-based languages are an efficient stack
representation, fast procedure calls, and fast
compilation. RAFTS addresses the stack representation
problem by allocating stack items to registers such
that most stack accesses in the source program are
register accesses in the machine language program, and
by eliminating most stack pointer updates. To achieve
fast calls, RAFTS performs these optimizations
interprocedurally and also performs procedure inlining
and tail call optimization. Fast compilation is
achieved by selecting fast algorithms and implementing
them efficiently. Until now we have implemented the
basic block part of RAFTS and a part of the work
necessary for inlining and interprocedural
optimizations.",
}
(with Abstract)
gzipped Postscript
@InProceedings{maierhofer&ertl98,
author = "Martin Maierhofer and M. Anton Ertl",
title = "Local Stack Allocation",
booktitle = "Compiler Construction (CC'98)",
year = "1998",
editor = "Kai Koskimies",
publisher = "Springer LNCS~1383",
address = "Lisbon",
pages = "189--203",
URL = "http://www.complang.tuwien.ac.at/papers/maierhofer%26ertl98.ps.gz",
abstract = "Considering the renewed interest in stack machines (in
particular, the Java Virtual Machine), efficient
execution of Algol-family languages on this class of
hardware becomes increasingly important. Local variable
accesses in the source language should be translated
into stack accesses on the target machine (in analogy
to register allocation on register machines).\par In
this paper we explore such stack allocation techniques
for basic blocks. We present some improvements to Phil
Koopman's \emph{stack scheduling}, add an instruction
scheduler and compare the result with an optimal stack
allocation and instruction scheduling strategy. Stack
scheduling in cooperation with depth first postorder
instruction scheduling produces results close to the
optimum. The optimizations discussed in this paper are
profitable only for stack hardware where stack
manipulation operations are faster than local variable
accesses.",
}
(3 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{krall+98,
author = "Andreas Krall and Anton Ertl and Michael Gschwind",
title = "{JavaVM} Implementation: Compilers versus Hardware",
booktitle = "Computer Architecture 98 (ACAC '98)",
year = "1998",
editor = "John Morris",
volume = "20",
series = "Australian Computer Science Communications",
publisher = "Springer",
address = "Perth",
pages = "101--110",
URL = "http://www.complang.tuwien.ac.at/papers/krall+98.ps.gz",
abstract = "The Java Virtual Machine (JavaVM) has contributed
greatly to Java's success because it provides a common
intermediate format which can be shared across the
Internet. Unfortunately, the JavaVM has been optimized
for an interpreted model, resulting in inferior
performance because its stack-based execution model
cannot exploit instruction-level parallelism. The
inherent serialization of the stack execution model can
be addressed either by using compilation techniques or
by hardware. In this article, we review the different
JavaVM implementation methods based on our experiences
with the implementation of the CACAO just-in-time
compiler. For comparison, we have also investigated
different hardware architectures for the direct
implementation of the JavaVM.",
}
(with Abstract)
gzipped Postscript
@InProceedings{ertl98,
author = "M. Anton Ertl",
title = "\texttt{State}-smartness --- Why it is Evil and How to
Exorcise it",
booktitle = "EuroForth'98 Conference Proceedings",
year = "1998",
address = "Schlo\ss{} Dagstuhl",
URL = "http://www.complang.tuwien.ac.at/papers/ertl98.ps.gz",
abstract = "\texttt{State}-smart words provide a number of
unpleasant surprises to their users. They are applied
in two contexts, and they fail in both: 1) for
providing an arbitrary combination of interpretation
and compilation semantics; 2) for optimizing with a
special implementation of the (default) compilation
semantics. This paper discusses these issues and shows
programmers and system implementors how to avoid
\texttt{state}-smart words. It also reports our
experiences in converting the \texttt{state}-smart
words in Gforth into a clean solution: little work and
few problems.",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl97,
author = "M. Anton Ertl",
title = "{GRAY -- ein Generator f{\"u}r rekursiv absteigende
Ybersetzer}",
booktitle = "Forth-Tagung",
year = "1997",
address = "Ludwigshafen",
URL = "http://www.complang.tuwien.ac.at/papers/ertl97.ps.gz",
note = "In German",
abstract = "Der Parser-Generator Gray {\"u}bersetzt Grammatiken in
ausf{\"u}hrbaren Forth-Code f{\"u}r einen Parser. Als
Besonderheit ist dabei die problemlose Kombination
semantischen Aktionen und Erweiterungen der BNF zu
nennen, die sich aus der Verwendung des Stacks zur
Kommunikation zwischen den semantischen Aktionen
ergibt. Dieser Artikel beschreibt den Entwurf und die
Verwendung von Gray.",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{maierhofer&ertl97,
author = "Martin Maierhofer and M. Anton Ertl",
title = "Optimizing Stack Code",
booktitle = "Forth-Tagung",
year = "1997",
address = "Ludwigshafen",
URL = "http://www.complang.tuwien.ac.at/papers/maierhofer%26ertl97.ps.gz",
abstract = "With interest in stack machines recently growing (e.g.
the JavaVM architecture used by the Java programming
language), support for the efficient execution of
Algol-like high level languages on this class of
hardware becomes an issue. Local variables accesses in
the source language should be translated into stack
accesses of the target machine (similar to register
allocation on register machines).\par In this paper we
explore such stack allocation techniques for basic
blocks. We evaluate Phil Koopman's ``stack scheduling''
by adding an instruction scheduler and comparing the
result with an optimal stack allocation and instruction
scheduling strategy. Stack scheduling in cooperation
with a depth first postorder instruction scheduling
produces results close to the optimum. The
optimizations discussed in this paper are only useful
for stack hardware that allows faster access to the
stack than to main memory.",
}
(with Abstract)
gzipped Postscript
@InProceedings{ertl&pirker97,
author = "M. Anton Ertl and Christian Pirker",
title = "The Structure of a {Forth} Native Code Compiler",
booktitle = "EuroForth '97 Conference Proceedings",
year = "1997",
address = "Oxford",
pages = "107--116",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26pirker97.ps.gz",
abstract = "Writing a sophisticated Forth native code compiler
poses some tasks that are not discussed in compiler
text books. Some of these tasks arise from specific
language features, others from the requirement for very
fast compilation to maintain interactivity. In this
paper we describe some of the more interesting data
structures and algorithms used in the RAFTS
prototype.",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@Unpublished{ertl97ibm,
author = "M. Anton Ertl",
title = "Fast high-quality {JavaVM} compilation",
note = "Seminar given at IBM Watson Research Center, Yorktown
Heights",
year = "1997",
month = aug,
abstract = "I will discuss how to apply the technology we
developed for our Forth native code compilation project
RAFTS to JavaVM just-in-time (JIT) compilation, and
describe parts of the CACAO JavaVM just-in-time
compiler. Overall, this gives an outline how CACAO will
probably look in the future.\par For generating good
code for register machines from stack-based languages,
we have to avoid the overheads of the straightforward
memory-based stack representation; instead, we keep the
stack items in registers, and eliminate as many
stack-pointer updates as possible by combining
them.\par The basis of our native code generation
technique is a method for converting the stack-based
code for a basic block into a data dependence graph;
this allows us to apply standard code generation
techniques: we select code with tree parsing, schedule
the instructions with list scheduling, and use a simple
basic block register allocator; these phases are merged
into as few passes as possible to achieve very fast
compilation. The global (intraprocedural) register
allocation for the JavaVM is performed by another
simple register allocator that assigns a register to
each stack slot that is alive at a basic block
boundary, and one register to each local variable
slot.",
}
(with Abstract)
@Article{ertl97objects,
author = "M. Anton Ertl",
title = "Yet Another {Forth} Objects Package",
journal = "Forth Dimensions",
year = "1997",
volume = "19",
number = "2",
pages = "37--43",
URL = "http://www.complang.tuwien.ac.at/forth/objects/objects.html",
}
@Article{ertl97structs,
author = "M. Anton Ertl",
title = "Yet Another {Forth} Structures Package",
journal = "Forth Dimensions",
year = "1997",
volume = "19",
number = "3",
pages = "13--16",
URL = "http://www.complang.tuwien.ac.at/forth/objects/structs.html",
}
@InProceedings{ertl&krall96,
author = "M. Anton Ertl and Andreas Krall",
title = "Removing Anti Dependences by Repairing",
booktitle = "Compiler Construction (CC'96)",
year = "1996",
editor = "Tibor Gyim\'{o}thy",
volume = "1060",
series = "LNCS",
publisher = "Springer",
address = "Link{\"o}ping",
pages = "33--43",
URL = "http://www.complang.tuwien.ac.at/papers/ertl-krall96cc.ps.gz",
abstract = "Anti dependences (write-after-read dependences)
constrain the reordering of instructions and limit the
effectiveness of instruction scheduling and software
pipelining techniques for superscalar and VLIW
processors. Repairing solves this problem: If the
definition of a variable is moved before a previous use
of that variable, compiler-generated repair code
reconstructs the value that the definition destroyed.
Repairing features several potential advantages over
register renaming, another technique for removing anti
dependences: less register pressure, less loop
unrolling and fewer move instructions.",
}
(4 dupl. with Abstract and URL)
gzipped Postscript
@PhdThesis{ertl96diss,
author = "M. Anton Ertl",
title = "Implementation of Stack-Based Languages on Register
Machines",
school = "{Technische Universit{\"a}t Wien}",
year = "1996",
address = "Austria",
URL = "http://www.complang.tuwien.ac.at/papers/ertl96diss.ps.gz",
abstract = "Languages with programmer-visible stacks (stack-based
languages) are used widely, as intermediate languages
(e.g., JavaVM, FCode), and as languages for human
programmers (e.g., Forth, PostScript). However, the
prevalent computer architecture is the register
machine. This poses the problem of efficiently
implementing stack-based languages on register
machines. A straight-forward implementation of the
stack consists of a memory area that contains the stack
items, and a pointer to the top-of-stack item.\par The
basic optimizations explored in this thesis are:
Caching the frequently-accessed top-of-stack items in
registers reduces stack access overhead, and combining
stack-pointer updates eliminates most of them.\par This
thesis examines these optimizations in the context of
three basic implementation techniques: \begin{itemize}
\item For (virtual machine) \textbf{interpreters}, I
regard the stack cache in the registers as finite state
machine, where the execution of a virtual machine
instruction performs a state transition; there are
specialized implementations of the virtual machine
instructions for each state. \item My
\textbf{native-code compilation} technique transforms
the programs into standard compiler data structures;
then state-of-the-art compiler technology can be
applied for optimization and code generation. In
particular, stack items are represented by
pseudo-registers, which register allocation will
(usually) put into machine registers; stack pointer
updates are executed symbolically, i.e., at compile
time. \item For \textbf{translation to C}, I emphasize
simplicity; the optimizer of the C compiler takes care
of the complex problems. The translator just has to
represent stack items as local C variables, and the C
compiler will keep them in registers. As with
native-code compilers, (usually) no stack pointer
updates at run-time are necessary. \end{itemize} For
interpreters, the optimizations eliminate about two
real machine instructions per virtual machine
instruction, resulting in speedups of 17\%--31\% (for
Forth on a DecStation~3100). The techniques presented
here for native-code compilation and translation to C
achieve a speedup factor of 1.3--3 over traditional
native code compilers and more than 3 over a
straight-forward translator to C (for Forth on a
486DX2/66).",
}
(4 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl&pirker96,
author = "M. Anton Ertl and Christian Pirker",
title = "{RAFTS} for Basic Blocks: A progress report on {Forth}
Native Code Compilation",
booktitle = "EuroForth '96 Conference Proceedings",
year = "1996",
address = "St. Petersburg, Russia",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26pirker96.ps.gz",
abstract = "RAFTS is a framework for applying state of the art
compiler technology to the compilation of stack-based
languages, in particular Forth. This paper describes
our experiences and findings in implementing the basic
block (straight-line code) part of RAFTS; it also
presents empirical results: the basic block part is the
simplest part of RAFTS, but without the other parts it
is hardly better than existing techniques, because in
Forth basic blocks are very short (there are few basic
blocks with more than two useful instructions).",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl95pldi,
author = "M. Anton Ertl",
title = "Stack Caching for Interpreters",
booktitle = "SIGPLAN Conference on Programming Language Design and
Implementation (PLDI'95)",
year = "1995",
pages = "315--327",
URL = "http://www.complang.tuwien.ac.at/papers/ertl95pldi.ps.gz",
abstract = "An interpreter can spend a significant part of its
execution time on arguments of virtual machine
instructions. This paper explores two methods to reduce
this overhead for virtual stack machines by caching
top-of-stack values in (real machine) registers. The
{\em dynamic method} is based on having, for every
possible state of the cache, one specialized version of
the whole interpreter; the execution of an instruction
usually changes the state of the cache and the next
instruction is executed in the version corresponding to
the new state. In the {\em static method} a state
machine that keeps track of the cache state is added to
the compiler. Common instructions exist in specialized
versions for several states, but it is not necessary to
have a version of every instruction for every cache
state. Stack manipulation instructions are optimized
away.",
}
(10 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ErKr95,
author = "M. Anton Ertl and Andreas Krall",
title = "High Level Constraints over Finite Domains",
booktitle = "Constraint Processing",
editor = "Manfred Meyer",
series = "LNCS 923",
publisher = "Springer",
pages = "51--66",
address = "Heidelberg",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26krall93.ps.gz",
year = "1995",
}
(7 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl&maierhofer95,
author = "M. Anton Ertl and Martin Maierhofer",
title = "Translating {Forth} to Efficient {C}",
booktitle = "EuroForth~'95 Conference Proceedings",
year = "1995",
address = "Schloss Dagstuhl, Germany",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26maierhofer95.ps.gz",
abstract = "An automatic translator can translate Forth into C
code which the current generation of optimizing C
compilers compiles to efficient machine code. I.e., the
resulting code keeps stack items in registers and
rarely updates the stack pointer. This paper presents a
simple translation method that produces efficient C
code, describes an implementation of the method and
presents results achieved with this implementation: The
translated code is 4.5--7.5 times faster than Gforth
(the fastest measured interpretive system), 1.3--3
times faster than BigForth 386 (a native code
compiler), and smaller than Gforth's threaded code.",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@Article{WaKr95a,
author = "Jian Wang and Andreas Krall and M. Anton Ertl",
title = "Trace Software Pipelining",
journal = "Journal of Computer Science and Technology",
year = "1995",
volume = "10",
number = "6",
pages = "481--490",
}
@InProceedings{WaKr95b,
author = "Jian Wang and Andreas Krall and M. Anton Ertl",
title = "Decomposed Software Pipelining with reduced Register
Requirement",
booktitle = "International Conference on Parallel Architectures and
Compilation Techniques",
editor = "Jean-Luc Gaudiot",
organization = "IFIP,ACM,IEEE",
publisher = "North-Holland",
pages = "277--280",
address = "Limassol",
year = "1995",
}
@InProceedings{WaKr95c,
author = "Jian Wang and Andreas Krall and M. Anton Ertl",
title = "Register Requirement for Exploiting Loops' Maximum
Instruction-Level Parallelism",
booktitle = "The Fourth International Conference for Young Computer
Scientists",
editor = "Shou Bai and Jianping Fan and Xiaozhong Li",
publisher = "Peking University Press",
pages = "70--75",
address = "Beijing",
year = "1995",
}
(5 dupl. with Abstract and URL)
@InProceedings{ambrosch+94,
author = "Wolfgang Ambrosch and M. Anton Ertl and Felix Beer and
Andreas Krall",
title = "Dependence-Conscious Global Register Allocation",
booktitle = "Programming Languages and System Architectures",
year = "1994",
editor = "J{\"u}rg Gutknecht",
publisher = "Springer LNCS~782",
address = "{Z{\"u}rich}",
pages = "125--136",
URL = "http://www.complang.tuwien.ac.at/papers/ambrosch+94.ps.gz",
abstract = "Register allocation and instruction scheduling are
antagonistic optimizations: Whichever is applied first,
it will impede the other. To solve this problem, we
propose dependence-conscious colouring, a register
allocation method that takes the dependence graph used
by the instruction scheduler into consideration.
Dependence-conscious colouring consists of two parts:
First, the interference graph is built by analysing the
dependence graphs, resulting in fewer interference
edges and less spilling than the conventional
preordering approach. Secondly, during colouring the
register selection keeps dependence paths short,
ensuring good scheduling. Dependence-conscious
colouring reduces the number of interference edges by
7\%--24\% and antidependences by 46\%--100\%.",
}
(5 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl94l,
author = "M. Anton Ertl",
title = "Automatic Scoping of Local Variables",
booktitle = "EuroForth~'94 Conference Proceedings",
year = "1994",
address = "Winchester, UK",
pages = "31--37",
URL = "http://www.complang.tuwien.ac.at/papers/ertl94l.ps.gz",
abstract = "In the process of lifting the restrictions on using
locals in Forth, an interesting problem poses itself:
What does it mean if a local is defined in a control
structure? Where is the local visible? Since the user
can create every possible control structure in ANS
Forth, the answer is not as simple as it may seem.
Ideally, the local is visible at a place if the control
flow {\em must} pass through the definition of the
local to reach this place. This paper discusses locals
in general, the visibility problem, its solution, the
consequences and the implementation as well as related
programming style questions.",
}
(3 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl&krall94,
author = "M. Anton Ertl and Andreas Krall",
title = "Delayed Exceptions --- Speculative Execution of
Trapping Instructions",
booktitle = "Compiler Construction (CC '94)",
year = "1994",
publisher = "Springer LNCS~786",
address = "Edinburgh",
month = apr,
pages = "158--171",
URL = "http://www.complang.tuwien.ac.at/papers/ertl-krall94cc.ps.gz",
abstract = "Superscalar processors, which execute basic blocks
sequentially, cannot use much instruction level
parallelism. Speculative execution has been proposed to
execute basic blocks in parallel. A pure software
approach suffers from low performance, because
exception-generating instructions cannot be executed
speculatively. We propose delayed exceptions, a
combination of hardware and compiler extensions that
can provide high performance and correct exception
handling in compiler-based speculative execution.
Delayed exceptions exploit the fact that exceptions are
rare. The compiler assumes the typical case (no
exceptions), schedules the code accordingly, and
inserts run-time checks and fix-up code that ensure
correct execution when exceptions do happen.",
}
(7 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{b94b,
author = "Jian Wang and Andreas Krall and M. Anton Ertl and
Christine Eisenbeis",
title = "Software pipelining with register allocation and
spilling",
booktitle = "proceedings of the 27th International Symposium and
Workshop on Microprogramming and Microarchitecture
(MICRO-27)",
year = "1994",
month = dec,
organization = "ACM and IEEE",
}
(7 dupl. with Abstract and URL)
@InProceedings{b94a,
author = "Jian Wang and Andreas Krall and M. Anton Ertl and
Christine Eisenbeis",
title = "{Trace Software Pipelining}: A Novel Technique for
Parallelization of Loops with Branches",
booktitle = "proceedings of International Conference on Parallel
Architectures and Compilation Techniques (PACT'94)",
year = "1994",
month = aug,
organization = "IFIP",
editor = "Michel Cosnard and G. R. Gao and G. M. Silberman",
publisher = "Elsevier Science B. V. (North-Holland)",
}
@InProceedings{ertl93,
author = "M. Anton Ertl",
title = "A Portable {Forth} Engine",
booktitle = "EuroFORTH '93 conference proceedings",
year = "1993",
address = "Mari\'ansk\'e L\'azn\`e (Marienbad)",
URL = "http://www.complang.tuwien.ac.at/papers/ertl93.ps.gz",
abstract = "The Forth engine discussed in this paper is written in
GNU C, which provides several extensions that are
important for Forth implementation. The indirect
threaded Forth engine is completely
machine-independent, direct threading requires a few
machine-specific lines for each machine. Using a
portable language like GNU C encourages producing an
engine with many primitives. In order to make the
development of primitives easier and less error-prone,
an automatic tool generates most of the code for a
Forth primitive from the stack effect notation, even if
the TOS is kept in a register. The engine is combined
with the parts of the system written in Forth by
loading a machine-independent image file that contains
the executable Forth code in relocatable form.",
}
(5 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl&krall93deutsch,
author = "M. Anton Ertl and Andreas Krall",
title = "Benutzerdefinierte Constraints",
booktitle = "9.~Workshop Logische Programmierung",
pages = "18--22",
year = "1993",
address = "Hagen",
series = "FernUniversit{\"a}t Hagen 146-10/1993",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26krall93deutsch.ps.gz",
note = "In German",
abstract = "{Um kombinatorische Suchprobleme zu l{\"o}sen, werden
Konsistenztechniken in Logik-Programmiersprachen
verwendet. Bei vielen dieser Probleme reichen
allerdings die vordefinierten Constraints nicht aus.
Leider sind Constraints, die mit lookahead- oder
forward-Deklarationen definiert werden, oft sehr
langsam. In dieser Arbeit stellen wir einen neuen
Ansatz f{\"u}r benutzerdefinierte Constraints vor.
Benutzerdefinierte Constraints sind normale
Prolog-Pr{\"a}dikate mit einer zus{\"a}tzlichen
Constraint-Deklaration. Sie erm{\"o}glichen eine genaue
Kontrolle "uber Laufzeit und Suchraumbeschr{\"a}nkung.
Damit lassen sich gewaltige Laufzeitverbesserungen
gegen{\"u}ber lookahead-De\-kla\-ra\-tionen
erzielen.}",
}
(2 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl&krall92,
author = "M. Anton Ertl and Andreas Krall",
title = "Instruction Scheduling for Complex Pipelines",
booktitle = "Compiler Construction (CC'92)",
year = "1992",
publisher = "Springer LNCS~641",
address = "Paderborn",
pages = "207--218",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26krall92.ps.gz",
abstract = "We designed heuristics for applying the list
scheduling algorithm to processors with complex
pipelines. On these processors the pipeline can stall
due to resource contention (structural hazards) in
addition to the usual data hazards. Conventional
heuristics consider only data hazards. Our heuristics
reduce structural hazards, too. Code with much
instruction-level parallelism is optimized to avoid
structural hazards, sequential code is scheduled for
reducing data hazards. Embedded in a postpass strategy
our scheduler removes 60\%--100\% of the removable
stalls from conventionally scheduled code.",
}
(7 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl92,
author = "M. Anton Ertl",
title = "A New Approach to {Forth} Native Code Generation",
booktitle = "EuroForth~'92",
year = "1992",
organization = "MicroProcessor Engineering",
address = "Southampton, England",
pages = "73--78",
URL = "http://www.complang.tuwien.ac.at/papers/ertl92.ps.gz",
abstract = "RAFTS is a framework for applying state of the art
compiler technology to the compilation of Forth. The
heart of RAFTS is a simple method for transforming
Forth programs into data flow graphs and static single
assignment form. Standard code generation and
optimization techniques can be applied to programs in
these forms. Specifically, RAFTS uses interprocedural
register allocation to eliminate nearly all stack
accesses. It also removes nearly all stack pointer
updates. Inlining and tail call optimization reduce the
call overhead. RAFTS compiles all of Forth, including
difficult cases like unknown stack heights, {\tt PICK},
{\tt ROLL} and {\tt EXECUTE}. And last, but not least,
RAFTS is designed for interactive Forth systems; it is
not restricted to batch compilers",
}
(3 dupl. with Abstract and URL)
gzipped Postscript
@InProceedings{ertl&krall91,
author = "M. Anton Ertl and Andreas Krall",
title = "Optimal Instruction Scheduling Using Constraint Logic
Programming",
booktitle = "Programming Language Implementation and Logic
Programming (PLILP)",
year = "1991",
publisher = "Springer LNCS~528",
address = "Passau",
pages = "75--86",
URL = "http://www.complang.tuwien.ac.at/papers/ertl%26krall91.ps.gz",
abstract = "Instruction scheduling is essential for the efficient
operation of today's and tomorrow's processors. It can
be stated easily and declaratively as a logic program.
Consistency techniques embedded in logic programming
makes the efficient solution of this problem possible.
This paper describes an instruction scheduling program
for the Motorola 88100 RISC processor, which minimizes
the number of pipeline stalls. The scheduler is written
in the constraint logic programming language ARISTO and
uses a declarative model of the processor to generate
an optimal schedule. The model uses lists of domain
variables to represent the pipeline stages and
describes the dependencies between instructions by
constraints in order to ensure correct scheduling.
Although optimal instruction scheduling is NP-complete,
the scheduler can be applied to real programs because
of the speed gained through consistency techniques.",
}
(8 dupl. with Abstract and URL)
gzipped Postscript
@MastersThesis{ertl90,
author = "M. Anton Ertl",
title = "Coroutining und Constraints in der
Logik-Programmierung",
school = "{Technische Universit{\"a}t Wien}",
type = "Diplomarbeit",
year = "1990",
address = "Austria",
URL = "http://www.complang.tuwien.ac.at/Diplomarbeiten/ertl90.ps.gz",
note = "In German",
abstract = "Logic Programming is distinguished by the declarative
programming style it encourages. Solutions to search
problems are specified easily. Some extensions to
Prolog are necessary to apply these advantages to
real-world problems.\par {\bf Coroutining} provides for
data driven goal execution. Using Coroutining it is
easier to separate logic and control and to use a
declarative programming style.\par {\bf Constraint
Logic Programming} liberates unification from its
restriction to syntactic equality and extends it to
relations on arbitrary computation domains. It also
enables the integration of {\bf consistency techniques}
in logic programming. Combinatorial problems are solved
efficiently using consistency techniques, since they
prune the search tree early and provide information for
the application of heuristics.\par Coroutining and
consistency techniques have been embedded in a Prolog
compiler based on the Warren Abstract Machine. The
application of these extensions to other Prolog
implementations is described.\par The application of
the resulting extended logic programming languages is
demonstrated using examples from artificial
intelligence, operations research, and systems
programming.",
}
(with Abstract)
gzipped Postscript
Some entries might have been ommited on the list above if author/title combination has already occurred. If in doubt check the duplicate list linked on the right of some citations.
| 146 matches: | Papers and talks produced by the Institut für Computersprachen, TU Wien |
This page has been served in 0.11 secs.