Hex Dump In Many Programming Languages

See also: ArraySumInManyProgrammingLanguages, CounterInManyProgrammingLanguages, DotProductInManyProgrammingLanguages, WardNumberInManyProgrammingLanguages, NinetyNineBottlesOfBeerOnTheWall, ProgrammingChrestomathy

Please use spaces rather than tabs in any code samples below (ConvertSpacesToTabsNotForCode).

A new ProgrammingChrestomathy challenge: hexdump a file, given a starting and ending offset in the file, and number of bytes to display per outline line. Newly recommended is to show the current offset at the start of each output line.

Read the file source and print a HexDump of the bytes in the file from start to end, formatted to show width bytes separated by space, followed by a separator, then followed by the ASCII values of the displayed bytes (or a period for non-ASCII bytes), in the order they exist in the file.


Has anyone tracked the most compact, yet viable, solutions? It seems to me that the Perl, Forth, and C examples solve the problem with the least fooling around.


SchemeLanguage: (alternative versions, or critiques of this one, would be welcome) [Note: I've removed many of the comments from my original version; ironically, it is now much clearer and easier to read.]

;; (hex-dump) => #t OR identifier
;; To use:
;; start the Scheme interpreter
;; (load 'hexdump)
;; (hexdump)
;;
;; Special Instructions:
;; The filename must be enclosed in double quotes.
(define (hexdump)
(let ((file (begin (display "Filename: ") (read)))
(start (begin (display "Start: ")(read)))
(finish (begin (display "End: ") (read)))
(width (begin (display "Bytes per Line: ") (read))))
(dump-file file width start finish)))
(define (dump-file filename line-length start finish)
(cond ((not (string? filename))
'err-no-filename)
((> 0 start)
'err-negative-start)
((>= start finish)
'err-startpoint-after-endpoint)
(else (let ((src (open-input-file filename))
(width (if (> line-length 0) ; default line width to 16
line-length
  1. ))
(size (- finish start))) ; total size of dump in bytes
(if (not (port? src)) ; check for valid port
'err-no-file
(if (seek src start)
(display-dump src width size)
'err-file-length-exceeded)))))) ; file ends before reaching start
(define (seek src entrypoint)
(let find ((count entrypoint))
(cond ((>= 0 count) #t)
((eof-object? (read-char src)) ; if file ends before reaching entrypoint,
(close-input-port src) ; close the file and return false
#f)
(else (find (- count 1))))))
;; (display-dump port integer integer) => #t OR identifier
;; Print out a hexdump
;;
;; The function accumulate-lines is used to collect
;; a group of lines up to blocksize in length to be printed
;; together as a single string. The purpose for this is to minimize
;; the number of calls to (display), a very high-overhead function.
(define (display-dump src width size)
(let ((blocksize 64)
(height (/ size width)))
(do ((blockcount height (- blockcount blocksize)) ; decrement by blocksize each pass
(dumpsize size (- dumpsize (* blocksize width))))
((or (<= blockcount 0)
(eof-object? (peek-char src)))
(close-input-port src)
#t)
(display (accumulate-lines src width blocksize dumpsize)))))
(define (accumulate-lines src width blocksize dumpsize)
(let accumulate ((count blocksize)
(tailing dumpsize))
(if (or (< count 0) ; if we've reached the end of the block,
(< tailing 0)) ; or the end of the dump region,
"" ; return an empty string
; else, read a new line of data; if it is at the end of the region,
; only get the remaining part of the line, otherwise get a full line
(let ((line (read-dump-line src (min tailing width))))
(if (null? line) ; if we've reached the end of the file,
"" ; return an empty string
; otherwise, continue accumulating lines
(string-append (format-line line width)
(accumulate (- count 1) (- tailing width))))))))
(define (read-dump-line src width)
(let read-data ((count width))
(if (>= 0 count)
'()
(let ((next (read-char src)))
(if (eof-object? next)
'()
(cons next (read-data (- count 1))))))))
(define (format-line dump width)
(string-append (format-dump-line dump)
(pad-string 3 (- width (length dump)))
": "
(format-values-line dump)
line-marker))
(define line-marker (make-string 1 #\newline))
(define (pad-string pad-size padding)
(if (< 0 padding)
(make-string (* pad-size padding) #\space)
""))
(define (format-values-line dump)
(list->string (map char-value dump)))
(define (format-dump-line dump)
(string-list-concat (map char->hex-string dump) " "))
(define (string-list-concat string-list separator)
(let cat ((stringlist string-list))
(cond ((null? stringlist) "")
((list? stringlist) (string-append (car stringlist)
separator
(cat (cdr stringlist))))
(else stringlist))))
(define (char->hex-string c)
(let* ((charval (char->integer c))
(hexval (number->string charval 16)))
(if (> 16 charval)
(string-append "0" hexval)
hexval)))
(define (char-value c)
(if (char-printable? c)
c
#\.))
(define (char-printable? c)
(and (char (char

A sample printout, using it's own source file:

> (hexdump)
Filename: "hexdump.scm"
Start: 128
End: 256
Bytes per Line: 16
  1. 53 70 65 63 69 61 6c 20 49 6e 73 74 72 75 63 : .Special.Instruc
  2. 69 6f 6e 73 3a 0d 0a 3b 3b 20 20 20 20 54 68 : tions:..;;....Th
  3. 20 66 69 6c 65 6e 61 6d 65 20 6d 75 73 74 20 : e.filename.must.
  4. 65 20 65 6e 63 6c 6f 73 65 64 20 69 6e 20 64 : be.enclosed.in.d
  5. f 75 62 6c 65 20 71 75 6f 74 65 73 2e 20 20 20 : ouble.quotes....
  6. 0d 0a 0d 0a 28 64 65 66 69 6e 65 20 28 68 65 : .....(define.(he
  7. 64 75 6d 70 29 0d 0a 20 20 28 6c 65 74 20 28 : xdump)....(let.(
  8. 66 69 6c 65 20 20 20 28 62 65 67 69 6e 20 28 : (file...(begin.(
#t

HaskellLanguage:

import System
import IO
import Data.Char
hexdump start end width = unlines . map hexify . addLineNumber . splitGroups . takeRange
where takeRange = take (end - start) . drop start
splitGroups [] = []
splitGroups xs = ys : splitGroups zs
where (ys,zs) = splitAt width xs in
addLineNumber = zip $ map (pad . show) $ iterate (+ width) start
where pad str = replicate (6 - length str) '0' ++ str
hexify (num, char) = num ++ ": " ++ (unwords $ map toHexDigit char)
++ ": " ++ map literal char
toHexDigit = stringify . (`divMod` 16) . ord
where stringify (x,y) = [intToDigit x, intToDigit y]
literal c = if isPrint c then c else '.'
withFile file func = readFile file >>= putStrLn . func
main = do args <- getArgs
let intArg = read . (args !!) in
withFile (args !! 0) $ hexdump (intArg 1) (intArg 2) (intArg 3)

Output:

debian:/home/jdtang/vm/spike# ./hexdump hexdump.hs 0 150 15
  1. : 69 6d 70 6f 72 74 20 53 79 73 74 65 6d 0a 69: import System.i
  2. : 6d 70 6f 72 74 20 49 4f 0a 69 6d 70 6f 72 74: mport IO.import
  3. : 20 44 61 74 61 2e 43 68 61 72 0a 0a 68 65 78: Data.Char..hex
  4. : 64 75 6d 70 20 73 74 61 72 74 20 65 6e 64 20: dump start end
  5. : 77 69 64 74 68 20 3d 20 75 6e 6c 69 6e 65 73: width = unlines
  6. : 20 2e 20 6d 61 70 20 68 65 78 69 66 79 20 2e: . map hexify .
  7. : 20 61 64 64 4c 69 6e 65 4e 75 6d 62 65 72 20: addLineNumber
  8. : 2e 20 73 70 6c 69 74 47 72 6f 75 70 73 20 2e: . splitGroups .
  9. : 20 74 61 6b 65 52 61 6e 67 65 0a 20 20 20 20: takeRange.
  10. : 77 68 65 72 65 20 74 61 6b 65 52 61 6e 67 65: where takeRange

-- JonathanTang


VisualBasicNine

The following direct/imperative-style program does the trick

Imports System
Imports System.IO
Module InManyLanguages
Public Sub HexDump(ByVal filename As String, _
ByVal startOffset As Integer, _
ByVal endOffset As Integer, _
ByVal width As Integer)
Using fs = New FileStream(filename, FileMode.Open, FileAccess.Read)
Console.WriteLine("Filename: {0}", filename)
Console.WriteLine("Start: {0}", startOffset)
Console.WriteLine("End: {0}", endOffset)
Console.WriteLine("Bytes per line: {0}", width)
If (fs.Length > startOffset) Then
fs.Position = startOffset
Dim buffer(width - 1) As Byte
Dim bytesRead = 0
Dim maxBytes = endOffset - startOffset
Dim done = False
While Not done
Dim bytesToRead = IIf(maxBytes - bytesRead < width, maxBytes - bytesRead, width)
Dim cb = fs.Read(buffer, 0, bytesToRead)
bytesRead += cb
done = cb < width Or bytesRead = maxBytes
For index As Integer = 0 To cb - 1
Console.Write("{0:X2} ", buffer(index))
Next
For index As Integer = cb To width - 1
Console.Write(" ")
Next
Console.Write(": ")
For index As Integer = 0 To cb - 1
Dim b = buffer(index)
Console.Write(IIf(b > 32 And b < 126, ChrW(b), "."))
Next
Console.WriteLine()
End While
End If
End Using
End Sub
End Module

Which prints out the following:

Filename: InManyLanguages.vb
Start: 128
End: 256
Bytes per line: 16
  1. 73 20 49 6E 74 65 67 65 72 2C 20 42 79 56 61 : As.Integer,.ByVa
  2. C 20 65 6E 64 4F 66 66 73 65 74 20 41 73 20 49 : l.endOffset.As.I
  3. E 74 65 67 65 72 2C 20 42 79 56 61 6C 20 77 69 : nteger,.ByVal.wi
  4. 74 68 20 41 73 20 49 6E 74 65 67 65 72 29 0D : dth.As.Integer).
  5. A 20 20 20 20 20 20 20 20 55 73 69 6E 67 20 66 : .........Using.f
  6. 20 3D 20 4E 65 77 20 46 69 6C 65 53 74 72 65 : s.=.New.FileStre
  7. 6D 28 66 69 6C 65 6E 61 6D 65 2C 20 46 69 6C : am(filename,.Fil
  8. 4D 6F 64 65 2E 4F 70 65 6E 2C 20 46 69 6C 65 : eMode.Open,.File

The imperative printing code within the loop could be replaced by this functional LINQ expression:

Dim s = String.Join(" ", (From index In Enumerable.Range(0, cb) _
Select buffer(index).ToString("X2")).Concat( _
Enumerable.Repeat(" ", width - cb)).ToArray()) _
& " : " & _
New String((From index In Enumerable.Range(0, cb) _
Let b = buffer(index) _
Select CType(IIf(b > 32 And b < 126, ChrW(b), "."c), Char)).ToArray())
Console.WriteLine(s)

This isn't necessarily more maintainable or readable, but it proves the point.


PythonLanguage:

import sys
def hexdump( chars, sep, width ):
while chars:
line = chars[:width]
chars = chars[width:]
line = line.ljust( width, '\000' )
print "%s%s%s" % ( sep.join( "%02x" % ord(c) for c in line ),
sep, quotechars( line ))
def quotechars( chars ):
return .join( ['.', c][c.isalnum()] for c in chars )
def file_section( name, start, end ):
contents = open( name, "rb" ).read()
return contents[start:end]
if __name__ == '__main__':
hexdump( file_section( sys.argv[1], int( sys.argv[2] ), int( sys.argv[3] )),
' ', int( sys.argv[4] ))

Run output:

$ python hexdump.py hexdump.py 0 60 16
  1. a 69 6d 70 6f 72 74 20 73 79 73 0a 0a 64 65 66 .import.sys..def
  2. 68 65 78 64 75 6d 70 28 20 63 68 61 72 73 2c .hexdump..chars.
  3. 73 65 70 2c 20 77 69 64 74 68 20 29 3a 0a 20 .sep..width.....
  4. 20 77 68 69 6c 65 20 63 68 61 72 00 00 00 00 ..while.char....

Should this one lose points for reading the whole file into memory?

I don't think so. After all, memory-mapped files are a very powerful paradigm. There have been operating systems that only offered that option, and their proponents were very disdainful of the other approach.

Long ago on systems with tiny RAM, this might have been crippling, since one couldn't view large files, but these days with big RAM and huge swap areas, it seems unlikely to be a pragmatic issue.

There is something to be said for stream-based solutions as part of a DataFlowProgramming system, but that doesn't seem a big deal on this particular page.

The above version is buggy - it doesn't print brackets, etc, properly. Here's an idiomatic version that handles large files. It's less terse than it could be (but that's because it's more readable than it could be ;-)

import sys
def hexdump(fname, start, end, width):
for line in get_lines(fname, int(start), int(end), int(width)):
nums = ["%02x" % ord(c) for c in line]
txt = [fixchar(c) for c in line]
print " ".join(nums), "".join(txt)
def fixchar(char):
from string import printable
if char not in printable[:-5]:
return "."
return char
def get_lines(fname, start, end, width):
f = open(fname, "rb")
f.seek(start)
chunk = f.read(end-start)
gap = width - (len(chunk) % width)
chunk += gap * '\000'
while chunk:
yield chunk[:width]
chunk = chunk[width:]
if __name__ == '__main__':
try:
hexdump(*sys.argv[1:5])
except TypeError:
hexdump("hexdump.py", 0, 100, 16)

RubyLanguage:

def hexdump(filename, start = 0, finish = nil, width = 16)
ascii =
counter = 0
print '%06x ' % start
File.open(filename).each_byte do |c|
if counter >= start
print '%02x ' % c
ascii << (c.between?(32, 126) ? c : ?.)
if ascii.length >= width
puts ascii
ascii =
print '%06x ' % (counter + 1)
end
end
throw :done if finish && finish <= counter
counter += 1
end rescue :done
puts ' ' * (width - ascii.length) + ascii
end
if $0 == __FILE__
if ARGV.empty?
hexdump $0
else
filename = ARGV.shift
hexdump filename, *(ARGV.map {|arg| arg.to_i })
end
end
% hexdump.rb hexdump.rb 0 58 16
  1. 64 65 66 20 68 65 78 64 75 6d 70 28 66 69 6c 65 def hexdump(file
  2. 6e 61 6d 65 2c 20 73 74 61 72 74 20 3d 20 30 2c name, start = 0,
  3. 20 66 69 6e 69 73 68 20 3d 20 6e 69 6c 2c 20 77 finish = nil, w
  4. 69 64 74 68 20 3d 20 31 36 29 0a idth = 16).

CeeLanguage or maybe CeePlusPlus - Not actually CeeLanguage - There are CeePlusPlus specifics used(like declaring variables in the middle of a function for readability). Why do people think CeePlusPlus means one must use class. Sigh. I guess the CeeLanguage/CeePlusPlus are popular enough that people must post follow ups with their own visions. An OO solution is probably overkill, but it wouldn't be unreasonable to ask that something purporting to be a CeePlusPlus example use things like the CeePlusPlus standard library, rather than the CeeLanguage standard library... (or if you're going to use the CeeLanguage library, at least use the standard CeePlusPlus headers for it [that's #include and #include , rather than and ])... - Fair enough, we'll call it a draw - if just to avoid a LanguageHolyWar.

I posted one of the straight C language solutions below, which should tell you that I'm not a rabid foaming-at-the-mouth OO bigot, however I was the one that said it was C, not C++, and I stand by that. Aside from utterly trivial issues, it could compile with a C compiler. And yes, I would in fact want to see use of classes or templates, although as someone said above, at least using native C++ facilities for IO would put you in a more defensible position.

Re: "follow up with their own visions" - there's room for multiple solutions in each language. I like to go for very terse solutions because I think that's a virtue in itself. Others obviously have somewhat different philosophies that they like to illustrate. -- dm

Not that I'm disagreeing with you - I do have strong CeeLanguage influences, but just to explain where I was coming from - Can you imagine the ThreadMess if someone really did post a "true" CeePlusPlus example?

You could guarantee a HolyWar over:

I don't think this little exercises risk major HolyWars. Anyway,

Even in straight C, getting to the elegant minimum is fiercely debated. Perhaps the posts of "true" CeePlusPlus examples are so large that they could be the cause of the denied access problems we've been having. ;)

/*
This entry contains certain functionality the others may not.
#include
#include
void hexdump(unsigned char *buffer, unsigned long index, unsigned long width)
{
unsigned long i;
for (i=0;i {
printf("%02x ",buffer[i]);
}
for (unsigned long spacer=index;spacer printf(" ");
printf(": ");
for (i=0;i {
if (buffer[i] < 32) printf(".");
else printf("%c",buffer[i]);
}
printf("\n");
}
int hexdump_file(FILE *infile,unsigned long start, unsigned long stop, unsigned long width)
{
char ch;
unsigned long f_index=0;
unsigned long bb_index=0;
unsigned char *byte_buffer = malloc(width);
if (byte_buffer == NULL)
{
printf("Could not allocate memory for byte_buffer\n");
return -1;
}
while (!feof(infile))
{
ch = getc(infile);
if ((f_index >= start)&&(f_index <= stop))
{
byte_buffer[bb_index] = ch;
bb_index++;
}
if (bb_index >= width)
{
hexdump(byte_buffer,bb_index,width);
bb_index=0;
}
f_index++;
}
if (bb_index)
hexdump(byte_buffer,bb_index,width);
fclose(infile);
free(byte_buffer);
return 0;
}
int main(int argc, char *argv[])
{
if (argc != 5)
{
printf("Usage: hexdump \n");
return 0;
}
FILE *infile=fopen(argv[1],"rb");
if (infile==(FILE *)NULL)
{
printf("Error opening input file %s\n",argv[1]);
return 0;
}
printf("Filename: \"%s\"\n", argv[1]);
printf("Start : %lu\n", atoi(argv[2]));
printf("End : %lu\n", atoi(argv[3]));
printf("Bytes per Line: %lu\n",atoi(argv[4]));
int result = hexdump_file(infile,atoi(argv[2]),atoi(argv[3]),atoi(argv[4]));
return 0;
}

Run output:

$ hexdump hexdump.c 0 100 16
Filename: "hexdump.c"
Start : 0
End : 100
Bytes per Line: 16
  1. f 2a 0d 0a 20 20 54 68 69 73 20 65 6e 74 72 79 : /*.. This entry
  2. 63 6f 6e 74 61 69 6e 73 20 63 65 72 74 61 69 : contains certai
  3. e 20 66 75 6e 63 74 69 6f 6e 61 6c 69 74 79 20 : n functionality
  4. 68 65 20 6f 74 68 65 72 73 20 6d 61 79 20 6e : the others may n
  5. f 74 2e 0d 0a 20 20 20 20 2a 20 54 68 65 20 73 : ot... * The s
  6. 6f 70 2d 73 74 61 72 74 20 73 70 61 6e 20 64 : top-start span d
  7. f 65 73 20 6e : oes n

-- LayneThomas


ForthLanguage

DUMP is in the ANS Tools word set, e.g. GNU Forth:

HERE 32 DUMP
  1. : 80 C0 0C CC C3 33 0C 30 - CC 33 0C C3 30 C3 0C CC .....3.0.3..0...
  2. A0: CC CC CC CC CC CC CC CC - C3 0C CC CC CC CC CC CC ................

Here's an implementation that meets the new spec. (Usage: "start end width HEXDUMP filename")

  1. value fileid
: .pos ( ud -- )
<# # # # #S #> type ." : " ;
: .hex ( addr len -- )
over + swap do I c@ S>D <# # # #> type space loop ;
: .spacer ( width read -- )
- 3 * spaces ." : " ;
: .chars ( addr len -- )
over + swap do I c@ dup 127 bl within if drop [char] . then emit loop ;
: (hd) ( start end width "filename" -- )
R/O OPEN-FILE throw to fileid
>R over - R> rot ( count width start )
S>D fileid REPOSITION-FILE throw ( count width )
begin cr fileid FILE-POSITION throw .pos
  1. dup min pad swap fileid READ-FILE throw ?dup
while pad over .hex 2dup .spacer pad over .chars
over = \ end of file?
while tuck - tuck 0> \ end of range?
while repeat then then 2drop
fileid CLOSE-FILE throw cr ;
: hexdump ( start end width "filename" -- )
bl parse base @ >R hex ['] (hd) catch R> base ! throw ;

Sample output, testing EOF handling and partial last line :

  1. 950 8 hexdump hexdump.f
  2. A2: 61 73 65 20 21 20 20 74 : ase ! t
  3. AA: 68 72 6F 77 20 3B 0A : hrow ;.

I believe that <# #> is underrated even in the Forth world, and certainly outside it. People seem to think it's pretty much equivalent to formatting in C or Lisp, but I think it is instead an example of an entire paradigm of programming that is mostly unexplored, although alluded to by ChuckMoore.

Could you expand on this? Forth pictured numerics are simple, but not exactly the friendliest of constructs (you are specifying digits in reverse order, for example). Here is GNU Forth's simple implementation (though I added digit>char for clarity).

variable holdptr
variable holdend
create holdbuf 64 allot
here constant holdbuf-end
: <# ( ud -- ud ) holdbuf-end dup holdptr ! holdend ! ;
: hold ( c -- ) -1 holdptr +! holdptr @ dup holdbuf u< -17 and throw c! ;
: digit>char ( 0-35 -- [0-9A-Z] ) 9 over < IF 7 + THEN [char] 0 + ;
: # ( ud -- ud/base ) base @ ud/mod rot digit>char hold ;
: #s ( ud -- 0d ) BEGIN # 2dup or 0= UNTIL ;
: #> ( ud -- addr len ) 2drop holdptr @ holdend @ over - ;

Yeah, and many in the Forth community actually regard them as obsolete. But I'm not talking about their literal use for formatting, I'm talking about them as an example of a different way of approaching doing programming. Recall anything that ChuckMoore has said about how he approaches problem solving in Forth?

Here's one example of what Moore said:

"The whole point of Forth was that you didn't write programs in Forth you wrote vocabularies in Forth. When you devised an application you wrote a hundred words or so that discussed the application and you used those hundred words to write a one line definition to solve the application. It is not easy to find those hundred words, but they exist, they always exist."

"I wish I knew what to tell you that would lead you to write good Forth. I can demonstrate. I have demonstrated in the past, ad nauseam, applications where I can reduce the amount of code by 90% percent and in some cases 99%. It can be done, but in a case by case basis. The general principle still eludes me." http://www.ultratechnology.com/1xforth.htm

He doesn't know how to explain how he goes about finding just the right set of words. I think <# #> serves as an illustration, not of the ultimate way to format, but of the paradigm he exercises unconsciously. -- dm


ColorForth

The wiki doesn't do color, so these are the mappings:

Other Strange Things:

{block 32}

(dump) var x (200000) var y (201200)
one dup @ h. space dup h. cr ;
lines for one -1 + next drop ;
dump x !
r show black screen x @ 15 + 16 text lines keyboard ;
it @ + @ dup h. space ;
lines for white i [x] it i [y] it or drop if red then i . cr -next ;
cmp show blue screen text 19 lines red [x] @ h. space [y] @ h. keyboard ;
u 16
+xy dup [x] +! [y] +! ;
d -16 +xy ;
ati F4100000 (ff7fc000) or
byte 4 / dump ;
fix for 0 over ! 1 + next ; [dump]

{block 33} this is the shadow block, commentary on the previous block. An editor command flips between a block and its shadow.

(does not say empty, compiles on top of application)
x -a (current address)
one a-a (line of display)
lines an
dump a (background task continually displays memory)
u (increment address)
d (decrement)
ati (address of AGP graphic registers)
byte (a byte address dump)
fix an-a (test word)

I just looked (for way too long) and couldn't find a list of the standard (well - common) colorforth words anywhere at all (Moore has an annoying page purporting to help programmers that basically says "try it! I'll add documentation some year. Maybe"). Where can I find one?

ColorForth is primarily ChuckMoore's personal programming system. As a working engineer, he hasn't had much time or inclination to document his evolving experiment. He admits communication is one of his weaknesses. Others have written better documented variants that keep the flavor of ColorForth; I'll try to add more references to the ColorForth page. This particular snippet came from some webified source on http://www.merlintec.com/download/color.html

You've already largely translated it out of colorforth; may as well continue until it's 100% one of the regular forths. After all, there seem to be, what, 20 regular colorforth users, and 1 colorforth expert, in the world? :-)

Actually, I haven't changed it a whit; it's a direct mapping from color to markup. ColorForth has very different semantics from standard Forth. They're even further apart than SchemeLanguage is from CommonLisp.

Magenta variables are interesting, since they're basically a bizarre way of doing persistence.


JavaLanguage:

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
public class HexDump {
public static void main(String[] args) throws IOException {
String inputFileName = args[0];
int start = Integer.parseInt(args[1]);
int end = Integer.parseInt(args[2]);
int width = Integer.parseInt(args[3]);
byte[] bytes = read(inputFileName, start, end);
for (int index = 0; index < bytes.length; index += width) {
printHex(bytes, index, width);
printAscii(bytes, index, width);
}
}
private static byte[] read(String inputFileName, int start, int end)
throws FileNotFoundException, IOException {
File theFile = new File(inputFileName);
FileInputStream input = new FileInputStream(theFile);
int skipped = 0;
while (skipped < start) {
skipped += input.skip(start - skipped);
}
int length = (int) (Math.min(end, theFile.length()) - start);
byte[] bytes = new byte[length];
int bytesRead = 0;
while (bytesRead < bytes.length) {
bytesRead = input.read(bytes, bytesRead, bytes.length - bytesRead);
if (bytesRead == -1) {
break;
}
}
return bytes;
}
private static void printHex(byte[] bytes, int offset, int width) {
for (int index = 0; index < width; index++) {
if (index + offset < bytes.length) {
System.out.printf("%02x ", bytes[index + offset]);
} else {
System.out.print(" ");
}
}
}
private static void printAscii(byte[] bytes, int index, int width)
throws UnsupportedEncodingException {
if (index < bytes.length) {
width = Math.min(width, bytes.length - index);
System.out.println(
":"
+ new String(bytes, index, width, "UTF-8").replaceAll("\r\n", " ").replaceAll(
"\n",
" "));
} else {
System.out.println();
}
}

}

Arguments: HexDump.java 0 100 16
  1. 6d 70 6f 72 74 20 6a 61 76 61 2e 69 6f 2e 46 :import java.io.F
  2. 6c 65 3b 0d 0a 69 6d 70 6f 72 74 20 6a 61 76 :ile; import jav
  3. 2e 69 6f 2e 46 69 6c 65 49 6e 70 75 74 53 74 :a.io.FileInputSt
  4. 65 61 6d 3b 0d 0a 69 6d 70 6f 72 74 20 6a 61 :ream; import ja
  5. 61 2e 69 6f 2e 46 69 6c 65 4e 6f 74 46 6f 75 :va.io.FileNotFou
  6. e 64 45 78 63 65 70 74 69 6f 6e 3b 0d 0a 69 6d :ndException; im
  7. 6f 72 74 :port

And another JavaLanguage example factored differently:

package main;
import java.io.*;
import java.util.*;
public class Dump {
public static void main(String[] args) {
byte[] bytes = new ContentFile(args[0]).content().getBytes();
int start = integer(args[1]);
bytes = slice(bytes, start, integer(args[2]));
int perLine = integer(args[3]);
List chunks = chunks(start, bytes ,perLine);
for(Line x : chunks)
x.writeLn(perLine);
}
private static String leftPadded(String s, int num) {while (s.length() < num) s = " " + s; return s; }
private static int integer(String s) { return Integer.parseInt(s); }
private static List chunks(int start, byte[] bytes, int perLine) {
List result = new ArrayList();
while (bytes.length > 0) {
result.add(new Line(start, slice(bytes, 0, perLine)));
bytes = slice(bytes, perLine, bytes.length);
start += perLine;
}
return result;
}
private static byte[] slice(byte[] bytes, int start, int end) {
if (end > bytes.length) end = bytes.length;
if (end - start <= 0) return new byte[0];
byte[] result = new byte[end - start];
System.arraycopy(bytes, start, result, 0, end - start);
return result;
}
static class ContentFile {
File file_;
public ContentFile(String name) {
file_ = new File(name);
}
public String content() {
try {
StringBuffer buf = new StringBuffer();
BufferedReader in = new BufferedReader(new FileReader(file_));
for(String s = in.readLine(); s != null; s = in.readLine())
buf.append(s);
return buf.toString();
} catch (Exception e) {
throw new RuntimeException("couldn't get content", e);
}
}
}
static class Line {
final int start;
final byte[] bytes;
public Line(int start, byte[] bytes) {
this.bytes = bytes;
this.start = start;
}
public void writeLn(int perLine) {
System.out.println("O: " + leftPadded("" + start, 4) + " " + bytesText(perLine) + ":" + text());
}
private String text() {
String result = "";
for(int i = 0; i < bytes.length; i++) {
char c = (char) bytes[i];
result += Character.isLetterOrDigit(c) || Character.isSpace(c) ? c : '.';
}
return result;
}
private String bytesText(int perLine) {
String result = "";
for(int i = 0; i < bytes.length; i++) {
result += String.format("%02x ", bytes[i]);
}
while (perLine-- > bytes.length) result += " ";
return result;
}
}
}
C:\eclipserc3\eclipse\workspace\hex\bin>java -cp . main.Dump ..\src\main\Dump.java 0 100 16
O: 00 70 61 63 6b 61 67 65 20 6d 61 69 6e 3b 69 6d 70 :package main.imp
O: 16 6f 72 74 20 6a 61 76 61 2e 69 6f 2e 2a 3b 69 6d :ort java.io...im
O: 32 70 6f 72 74 20 6a 61 76 61 2e 75 74 69 6c 2e 2a :port java.util..
O: 48 3b 70 75 62 6c 69 63 20 63 6c 61 73 73 20 44 75 :.public class Du
O: 64 6d 70 20 7b 20 20 20 20 70 75 62 6c 69 63 20 73 :mp . public s
O: 80 74 61 74 69 63 20 76 6f 69 64 20 6d 61 69 6e 28 :tatic void main.
O: 96 53 74 72 69 :Stri

C:\eclipserc3\eclipse\workspace\hex\bin>

I wanted to factor the chunks method into the Line class by creating a line with the entire source byte array, and then calling a method subdivide, which would call a method line() and a method rest(). Think that would pretty up a bit more, but hey - real work calls.

-- JeffBay


BourneShell:

#!/bin/sh
if [ $# -ne 4 ]
then
echo "Usage: $0 " 1>&2
exit 2
fi
SOURCE="$1"
START="$2"
END="$3"
WIDTH="$4"
LENGTH=`expr $END - $START`
hexdump -s $START -n $LENGTH -e $WIDTH'/1 "%02X "' -e '" : "' -e '"%_p"' -e '"\n"' $SOURCE

...HaHaOnlySerious!


PerlLanguage:

#!/usr/bin/perl -w
use Fcntl qw(:seek);
use List::Util qw(min);
use strict;
if(@ARGV != 4) {
print STDERR "Usage: $0 \n";
exit(2);
}
my ($source, $start, $end, $width) = @ARGV;
open(SOURCE, "<", $source) or die("Can't open $source");
seek(SOURCE, $start, SEEK_SET) or die("Can't seek to $start");
my $block;
for(my $left = $end - $start; $left > 0 && read(SOURCE, $block, min($width, $left)); $left -= length($block)) {
my $hex = join(" ", (map { sprintf("%02X", ord($_)) } split(//, $block)));
$hex .= ' ' x ($width - length($block));
my $plain = join("", (map { printable($_) ? $_ : "." } split(//, $block)));
print "$hex: $plain\n";
}
sub printable { my $o = ord($_[0]); return $o >= 33 && $o <= 126; }

CeeLanguage:

#include
#include
#include
int min(int a, int b) { return a < b ? a : b; }
int main(int argc, char *argv[]) {
FILE *fp;
int i, start, end, width, left, count;
unsigned char *buf;
if(argc != 5) {
fprintf(stderr, "Usage: %s \n", argv[0]);
exit(2);
}
if((fp = fopen(argv[1], "r")) == NULL) { perror("fopen"); exit(2); }
start = atoi(argv[2]);
end = atoi(argv[3]);
width = atoi(argv[4]);
left = end - start;
if((buf = malloc(width)) == NULL) {
fprintf(stderr, "No memory?!?\n");
exit(2);
}
if(fseek(fp, start, SEEK_SET)) { perror("fseek"); exit(2); }
while(left > 0 && (count = fread(buf, 1, min(left, width), fp))) {
for(i = 0; i < count; i++) printf("%02X ", buf[i]);
for(i = 0; i < width - count; i++) printf(" ");
printf(": ");
for(i = 0; i < count; i++) printf("%c", isgraph(buf[i]) ? buf[i] : '.');
printf("\n");
left -= count;
}
free(buf);
return 0;
}

Hmm, this doesn't appear to be totally debugged:

./a.out a.out 0 50 16
  1. F 45 4C 46 01 01 01 00 00 00 00 00 00 00 00 00 : .ELF............
  2. 00 03 00 01 00 00 00 FFFFFFB0 FFFFFF84 04 08 34 00 00 00 : ............4...
FFFFFFC0 23 00 00 00 00 00 00 34 00 20 00 06 00 28 00 : .#......4.....(.
  1. 00 : #.

You need to add a mask: printf("%02X ", buf[i] & 0xff);

Minor detail.

Ah. Signed chars are sign extended into signed ints... Fixed by making the chars unsigned.

You beat me to the edit, and with a shorter solution, it looks like.


CeeLanguage:

After great struggle, I transformed the natural but verbose approach of C closures, reflection, and metaclasses into something a little more terse:

#include
#include
#include
void displayHexRecord(char *data, int count, int record_length) {
int i;
for (i=0; i < count; i++) printf("%02x ", data[i] & 0xff);
for (; i < record_length; i++) printf(" ");
printf(": ");
for (i=0; i < count; i++) {
if (isgraph(data[i])) putchar(data[i]);
else putchar('.');
}
putchar('\n');
}
void hexDump(FILE *f, long cur_addr, long end, int bytes_per_line) {
int record_length, read_count;
char *data;
assert(data = (char*) calloc(sizeof(data[0]), bytes_per_line));
if (-1 == fseek(f, cur_addr, SEEK_SET)) perror("fseek");
while (1) {
printf("%05x ", cur_addr);
read_count = end - cur_addr;
if (read_count > bytes_per_line) read_count = bytes_per_line;
record_length = fread(data, sizeof(char), read_count, f);
displayHexRecord(data, record_length, bytes_per_line);
if (record_length < bytes_per_line) break;
if (end <= (cur_addr += record_length)) break;
}
free(data);
}

Output for hexDump(f, 0, 55, 16) of its own executable:

  1. 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 : .ELF............
  2. 02 00 03 00 01 00 00 00 f0 83 04 08 34 00 00 00 : ............4...
  3. 44 35 00 00 00 00 00 00 34 00 20 00 06 00 28 00 : D5......4.....(.
  4. 23 00 20 00 06 00 00 : #......

There's something strange about powerful languages having longer examples than powerless languages...

where's the main() and why no checking of command line arguments - it wouldn't produce a full executable without some other harness, so. . . where is it?

It's a library function. Requiring a full program violates the spirit of these ProgrammingChrestomathy pages; someone has already commented on searching and hex editing, and why exactly are we leaving out the GUI? :-P That's all foolishness, so I created a library routine. I also left out e.g. printing the filename, because that violates principles outlined in TheArtOfUnixProgramming; that, too, is an inappropriate requirement, although obviously it only adds one line of code to a program in any language.

Printing the filename wasn't a requirement. The Scheme program prompts the user for the filename, etc., from the console because command-line arguments aren't part of the standard language. Apparently, the person who wrote the C++ example (which, as someone else pointed it, is actually plain C) mistook the Scheme program prompting the user for input as printing the arguments out.

Anyone who fails to grok these points should immediately go download and examine the (huge) GNU hello world distribution, and ponder how it is that hello world got to be so big.

(It got to be that big because it's an example of how to release packages The GNU Way and they wanted to give an example of every bell, whistle, and gong you can accomplish with standard GNU packaging tools. In other words, it got that big because it's demonstrating a nontrivial solution set to a highly nontrivial problem.)

But if you just want to try it, a trivial main will do it:

#include
int main(void) {
FILE *f = fopen("a.out", "r");
if (f == NULL) {
perror("a.out");
exit(1);
}
hexDump(f, 0, 55, 16);
return 0;
}

Parse command line args? What's that got to do with hex dump Chrestomathy? How about a test harness? I never saw a Marketing Requirements doc, either. Where's our sysadmin plan? Budget? HR-required ergonomic classes? Shall I go on? :-)

It's not really terse if it's missing a main, and puts printfs on the same line as the for loop. How many corners can you cut and claim "but it's terse!"? :->

(Bug fixed, thanks.)

How many corners can you cut and claim "but it's terse"? That's a good question. But you know, textbooks and magazine columns that illustrate algorithms almost always leave out error checks, because they get in the way of seeing the algorithm that is the true point.

The question of whether to sometimes put trivial bodies on the same line as the control construct, or whether to always put them on separate lines, is purely stylistic, and obviously HolyWars have been fought about that. Some people claim that it makes a huge difference in readability, but I personally don't think it matters too much either way in most cases.

The other CeeLanguage author did similar things - while finding an even more terse algorithm kernel as well.

Well, every programmer thinks their coding style is the "pragmatic optimum", so I won't argue that. The whole Chrestomathy idea should apply to samples within the same language too. I guess the "real-world" origins of this page have changed, but now it's kind of a loose problem. How does the file get read? What does it link with? Does it require a host environment? Unfortunately "hex dumping" isn't really an algorithmic problem. . . so . . I guess I have no point than to say I'm not sure what the criteria for a good example is anymore.

It's not a huge algorithmic problem; none of these chrestomathy pages are. But it's an interesting 'small' algorithmic problem. I still need to finish examining why it is that the other author got a smaller kernel than I did, so as to keep in mind any points I observe for future use. Look for general principles whenever possible, I say.

P.S. I thought I already addressed the issue of the missing main(), but to be somewhat more explicit: it would be all but trivially the same for all of these chrestomathy pages. It's not interesting, it's not illustrative, it doesn't vary, but it certainly adds clutter. And also, come now, I added one after someone implied they didn't want to be bothered writing one to test my code...but I still don't see that it is part of the chrestomathy proper. A proper Hello World illustrates half of how to do it in C, and an example of any use of getopts illustrates the other half.


JavaScript: (JScript running under WSH - Windows Script Host - uses ActiveXscripting for its file API)

note about ActiveXscripting: one way or another you don't have direct access to file I/O (Input Output) inside an ECMAscript implementation (meaning the core language definition), the file I/O depends on the environment where the language is implemented. Here in WSH, Microsoft has provided external COM objects for file I/O access, in another environment like FESI ( http://www.lugrin.ch/fesi/fioext.html ) you could use another way to access file I/O with the File constructor, anyway the code provided can be easyly ported to different ECMAscript environment just edit the readFile method in HexDump.

usage: cscript hexdump.js

limitation: this work only for ASCII files

if you want to edit the code edit it correctly, or make a fork with your own version, if I have formated or given a particular structure to this script there are good reasons for that:

if you're not happy with it make your own entry and dont't garbage my own.

/* Function: trace */

function trace( /*String*/ txt )
{
WScript.Echo( txt.toString() );
}

/* Singleton: Application */ Application = {}; Application.arguments = [];

Application.getArguments = function()

{
var args, i;
args = WScript.Arguments;
for( i=0; i {
this.arguments.push( args(i) );
}
return this.arguments;
}

/* Class: HexDump */

HexDump = function( /*String*/ source, /*Int*/ start, /*Int*/ end, /*Int*/ width )
{
this.source = source;
this.start = start;
this.end = end;
this.width = width;
this.file = "";
this.hex = [];
this.dump = [];
this.parse();
}
HexDump.prototype.toString = function()
{
var data, CRLF;
data = "";
CRLF = "\r\n";
data += "HexDump: \""+this.source+"\" ["+this.start+"-"+this.end+"] ("+this.width+")" + CRLF;
data += this.dump.join( CRLF );
return data;
}
HexDump.prototype.readFile = function( /*String*/ filepath )
{
var FSO, file, data;
FSO = new ActiveXObject( "Scripting.FileSystemObject" );
file = FSO.OpenTextFile( filepath, /*FileIOmode.reading*/ 1, false, /*FileFormat.ascii*/ false );
data = file.ReadAll();
file.Close();
return data;
}
HexDump.prototype.formatText = function( /*String*/ txt )
{
txt = txt.split( " " ).join( "." );
txt = txt.split( "\n" ).join( " " );
txt = txt.split( "\r" ).join( " " );
return txt;
}
HexDump.prototype.charToHex = function( /*Char*/ c )
{
var hex;
hex = c.charCodeAt( 0 ).toString( 16 );
return( (hex.length==2)? hex: "0"+hex );
}
HexDump.prototype.parse = function()
{
var i, j, char, hexLine, fileLine;
this.file = this.readFile( this.source );
this.file = this.file.substring( this.start, this.end );
for( i=0; i {
char = this.file.charAt( i );
this.hex.push( this.charToHex( char ) );
}
j = 0;
hexLine = "";
fileLine = "";
while( j < this.file.length )
{
hexLine = this.hex.slice( j, j+this.width+1 );
while( hexLine.length <= this.width )
{
hexLine.push( " " );
}
hexLine = hexLine.join( " " );
fileLine = this.file.slice( j, j+this.width+1 );
fileLine = this.formatText( fileLine );
this.dump.push( hexLine + " : " + fileLine );
j += this.width;
}
}

/* MAIN ENTRY POINT */

Application.main = function()
{
var args, dump;
args = this.getArguments();
if( args[3] == null )
{
args[3] = 16; //default value
}
if( args[2] == null )
{
args[2] = 100; //default value
}
if( args[1] == null )
{
args[1] = 0; //default value
}
if( args[0] == null )
{
trace( "usage:\r\ncscript hexdump.js " );
return;
}
dump = new HexDump( args[0], parseInt(args[1]), parseInt(args[2]), parseInt(args[3]) );
trace( dump );
}
Application.main();

output for cscript hexdump.js hexdump.js 0 100 16

HexDump: "hexdump.js" [0-100] (16)
  1. f 2a 20 46 75 6e 63 74 69 6f 6e 3a 20 74 72 61 63 : /*.Function:.trac
  2. 65 0d 0a 2a 2f 0d 0a 74 72 61 63 65 20 3d 20 66 : ce */ trace.=.f
  3. 75 6e 63 74 69 6f 6e 28 20 2f 2a 53 74 72 69 6e : function(./*Strin
  4. e 67 2a 2f 20 74 78 74 20 20 29 0d 0a 20 20 20 20 : ng*/.txt..) ....
  5. 7b 0d 0a 20 20 20 20 57 53 63 72 69 70 74 2e 45 : .{ ....WScript.E
  6. 63 68 6f 28 20 74 78 74 2e 74 6f 53 74 72 69 6e : Echo(.txt.toStrin
  7. e 67 28 29 : ng()

(My 1st participation in this great wiki ;) -- ZwetanKjukov [sideNote: I can provide also a oneliner totally obscured and unreadable but I don't really see any usefulness to that.]

Neither does anyone else. Layne did that to be sarcastic because I wrote something like:

if (x) y();

where he thought style required always writing

if (x) {
y();
}

That's the only reason that he posted his unreadable oneliners; they were an editorial comment.


Here's a CommonLisp version without pretense of efficiency or command-line parsing:

(defun dump-line (in start width count)
(let ((bytes (loop for i below count
for byte = (read-byte in nil nil)
while byte collect byte)))
(format t "~&~8,'0X ~{~2,'0X ~}~A | ~{~C~}"
start
bytes
(make-string (* 3 (- width (length bytes)))
:initial-element #\Space)
(loop for byte in bytes
for char = (code-char byte)
when (graphic-char-p char) collect char
else collect #\.))))
(defun dump (file start end width)
(with-open-file (in file
:direction :input
:element-type '(unsigned-byte 8))
(when (file-position in start)
(loop with true-end = (min end (file-length in))
for i from start below true-end by width
do (dump-line in i width (min width (- true-end i))))))
(format t "~%"))

Sample output, generated by evaluating (dump "dump.lisp" 3 37 16):

  1. 66 75 6E 20 64 75 6D 70 2D 6C 69 6E 65 20 28 69 | fun dump-line (i
  2. 6E 20 73 74 61 72 74 20 63 6F 75 6E 74 20 77 69 | n start count wi
  3. 64 74 | dt

-- DanMuller

Standard CommonLisp doesn't provide a way to get command-line arguments. Guess I'll have to add interactive argument acquisition.


[Historical note: The original version of this page stated that other chrestomathy pages "do not demonstrate the use of the languages in a way that is applicable to real-world programming", with implication that this one would. It also placed several requirements on the program which were out of place in this sort of demonstration (that it had to be a stand-alone program, that read user input from the either the command line or the console, etc.). This naturally enough garnered several scornful remarks. When the problem definition was refactored, the origianl version and the subsequent discussion were moved here for reference sake.]

While there are several ProgrammingChrestomathy pages already, most of them deal with a task that is either trivial (printimg "Hello, World!" or the lyrics to "99 Bottles of Beer", encrypting a cleartext in RotThirteen), or of primarily theoretical or abstract importance (the 8-Queens Problem, calculating dot-products). While these are all valuable in their own way, one could argue that they do not demonstrate the use of the languages in a way that is applicable to real-world programming.

To this end, I am hereby proposing a new chrestomathy challenge: a program in the format

hexdump source start end width

which reads the file source and prints a HexDump of the bytes in the file from start to end, formatted to show width bytes separated by space, followed by a separator, then followed by the ASCII values of the displayed bytes, in the order they exist in the file. The program include the code to read these arguments, either from the command line, or from user input. Either console or GUI programs are acceptable, though portable code is preferred where possible. The code should be testable by independent third parties. Any instructions needed for testing the code (target environment, special requirements) should be listed in the program comments.

I wouldn't say that this problem is any more or less trivial than the other chrestomathy (great word!) pages. Depends what your real world is, I guess.

There are a host of "real world" factors ignored here too:

I just don't think it's possible to take all the complexities of "real software" and reduce it to a single problem - because the solutions will be optomized to fit the rules of the game, and not the realities of customer satisfaction.

Late note: although it wasn't originally mentioned, in real life it is fairly essential to show the current address at the start of each line, so I recommend adding that to the existing examples below.

Very well, the point is conceded. It was a mistake for me to use the phrase 'real-world' for something as artificial and isolated as this. I might replace it with 'practical', but that has nearly as many problems, now doesn't it? My intention was simply that it would show some of the aspects of languages which normally are ignored in such demonstration programs, not to make a commercial-quality project of it. Perhaps even that was poorly considered - after all, it is still a very small task, and such details only serve to confuse. While I still think it is a good example project, my presentation was regrettable (though perhaps not as regrettable as the Scheme example I wrote). -- JayOsako

The 8 queens chrestomathy page had people talking about "extra points" for things like N-queens rather than just 8; not everyone bothered. I would think that approach would be appropriate for all such things: the challenge itself should be quite minimal, but with recognition of extras that make sense. I sure would give someone brownie points if they added a gui, searching, editing, color high lighting, etc etc, all in a small enough amount of code to not be absurd to put on this page (not sure how feasible that would be).

Perhaps I should refactor the page to that model, then. I don't want to step on too many toes, however; I'm fairly new to Wiki (modulo a few things I wrote years ago, and have largely regretted), and I'm still not comfortable with the editing ettiquette. -- JayOsako

The cautious approach would be to just rewrite what you yourself wrote, noting in broad outline any changes you made that might surprise someone who already contributed code to the page. We're only hoping to improve things, not to make people feel like we're unfairly changing the rules on them or something, thus the note about what changed.

I presume there is no current motivation to rewrite everyone's code, so it's not really about refactoring the whole page.

You are correct, of course; there's no need to refactor anything except the requirements (and my awful Scheme code). I'm not sure what will be done with the comments that were made about it; for now, I'll leave them as the are, though I'll put a separator and a note as to what they refer to).


FoxPro

Someone already posted a Perl solution and a C/C++ solution, and portability seems to be important, so I won't post an AssemblyLanguage solution, but I do have another portable language (this will also run as-is in XBase++ and probably in both FlagShip and Clipper).

parameters filename, start, stop, dwidth
private talking, hex_ascii, d_len, di, str16, d_str, hex_str
if set("talk") = "ON"
set talk OFF
talking = .T.
else
talking = .F.
endif
crlf = chr(13) + chr(10) && for convenience
hex_ascii = "" && buffer for output line
dwidth = min(dwidth, 200) && largest reasonable display width
dwidth = max(dwidth, 1) && smallest reasonable display width
fh = fopen(filename) && open file at low level
if fh > 0 && proceed only if valid handle
? && begin new line
xi = fseek(fh, start) && skip to desired offset in file
do while !feof(fh) AND xi <= stop && stop at end of file
raw = fread(fh, dwidth) && read a block of desired size
hex_str = padr(hex(raw, " "), dwidth * 3) && pad short line
hex_ascii = str(xi, 5) +": "+ hex_str +"| "+ strip(raw) + crlf
?? hex_ascii && offset: hex digits | stripped ASCII
xi = xi + dwidth && bump the offset
enddo
endif
fh = fclose(fh) && housekeeping
if talking
set talk ON
endif
return
function strip && replace control chars in string with '.'
parameters rawstring
private outstring, sx, byte
outstring = ""
for sx = 1 to len(rawstring) && examine each byte in string
byte = substr(rawstring, sx, 1) && get single byte
outstring = outstring + iif(asc(byte) > 31, byte, ".")
endfor && keep printables, convert controls
return outstring
function hex
parameters string, padchr && bytes and separator
private nbytes, hex_str, ix, bc
padchr = iif(type("padchr") = "C", padchr, "") && optional separator
hex_str = "" && result buffer
nbytes = len(string) && count the bytes
for ix = 1 to nbytes && and one by one ...
bc = substr(string, ix, 1) && extract a byte
hex_str = hex_str + hdigits(bc) + iif(ix = nbytes, "", padchr)
endfor && and build result hex digit string
return hex_str
function hdigits
parameters char
private ch_hi, ch_lo, av, nib
av = asc(char) && ASCII value of byte
nib = int(av / 16) && high order nibble
ch_hi = chr(nib + iif(nib > 9, 55, 48)) && hex nibble
nib = mod(av, 16) && low order nibble
ch_lo = chr(nib + iif(nib > 9, 55, 48)) && hex nibble
return ch_hi + ch_lo && two nibbles to a byte
- - - - - - - - - -
output of do fhdump with fhdump.prg", 0, 128, 16
  1. : 2A 09 66 68 64 75 6D 70 2E 70 72 67 0D 0A 0D 0A | *.fhdump.prg....
    1. : 70 61 72 61 6D 65 74 65 72 09 66 69 6C 65 6E 61 | parameter.filena
    2. : 6D 65 2C 20 73 74 61 72 74 2C 20 73 74 6F 70 2C | me, start, stop,
    3. : 20 64 77 69 64 74 68 0D 0A 70 72 69 76 61 74 65 | dwidth..private
    4. : 09 09 74 61 6C 6B 69 6E 67 2C 20 68 65 78 5F 61 | ..talking, hex_a
    5. : 73 63 69 69 2C 20 64 5F 6C 65 6E 2C 20 64 69 2C | scii, d_len, di,
    6. : 20 73 74 72 31 36 2C 20 64 5F 73 74 72 2C 20 68 | str16, d_str, h
      1. : 65 78 5F 73 74 72 0D 0A 0D 0A 69 66 20 73 65 74 | ex_str....if set
      2. : 28 22 74 61 6C 6B 22 29 20 3D 20 22 4F 4E 22 0D | ("talk") = "ON".
- - - - - - - - - -
output of do fhdump with "fhdump.prg", 0, 64, 8
  1. : 2A 09 66 68 64 75 6D 70 | *.fhdump
  2. : 2E 70 72 67 0D 0A 0D 0A | .prg....
    1. : 70 61 72 61 6D 65 74 65 | paramete
    2. : 72 09 66 69 6C 65 6E 61 | r.filena
    3. : 6D 65 2C 20 73 74 61 72 | me, star
    4. : 74 2C 20 73 74 6F 70 2C | t, stop,
    5. : 20 64 77 69 64 74 68 0D | dwidth.
    6. : 0A 70 72 69 76 61 74 65 | .private
    7. : 09 09 74 61 6C 6B 69 6E | ..talkin

Minimally Excessive CeeLanguage example:

There was debate about my other C example, so here's what happens when C gets a little too terse. . . -- LayneThomas

#include
hexer(FILE *f,int b,int e,int w){
unsigned char cb[2048];int z[3]={0,0,0};
while(!feof(f)){
fscanf(f,"%c",&cb[z[2]]);
if ((z[1]>=b)&&(z[1] if (z[2]==0) printf("%4u : ",z[1]);printf("%02X ",cb[z[2]],z[2]++);
if ((((z[1]-b)%w)==(w-1)||((z[1]==e-1)?z[0]=1:z[0]=0))){
if (z[0]) for (z[0]=0;z[0]<(w-z[2]);z[0]++){printf("__ ");}printf(" : ");
for (z[0]=0;z[0] }}z[1]++;}
return 0;
}
int main(void){FILE *inf=fopen("main.cpp","rt"); hexer(inf,10,243,16);fclose(inf);return 0;}
output of "hasaproblem", 10 100, 16
  1. : 73 74 64 69 6F 2E 68 3E 0A 68 65 78 65 72 28 46 : stdio.h>.hexer(F
  2. : 49 4C 45 20 2A 66 2C 69 6E 74 20 62 2C 69 6E 74 : ILE *f,int b,int
  3. : 20 65 2C 69 6E 74 20 77 29 7B 0A 20 20 75 6E 73 : e,int w){. uns
  4. : 69 67 6E 65 64 20 63 68 61 72 20 63 62 5B 32 30 : igned char cb[20
  5. : 34 38 5D 3B 69 6E 74 20 7A 5B 33 5D 3D 7B 30 2C : 48];int z[3]={0,
  6. : 30 2C 30 7D 3B 0A 20 20 77 68 69 6C 65 28 21 66 : 0,0};. while(!f
    1. : 65 6F 66 28 66 29 29 7B 0A 20 20 66 73 63 61 6E : eof(f)){. fscan
    2. : 66 28 66 2C 22 25 63 22 2C 26 63 62 5B 7A 5B 32 : f(f,"%c",&cb[z[2
    3. : 5D 5D 29 3B 0A 20 20 69 66 20 28 28 7A 5B 31 5D : ]]);. if ((z[1]
    4. : 3E 3D 62 29 26 26 28 7A 5B 31 5D 3C 65 29 29 7B : >=b)&&(z[1]
    5. : 0A 20 20 69 66 20 28 7A 5B 32 5D 3D 3D 30 29 20 : . if (z[2]==0)
    6. : 70 72 69 6E 74 66 28 22 25 34 75 20 3A 20 22 2C : printf("%4u : ",
    7. : 7A 5B 31 5D 29 3B 70 72 69 6E 74 66 28 22 25 30 : z[1]);printf("%0
    8. : 32 58 20 22 2C 63 62 5B 7A 5B 32 5D 5D 2C 7A 5B : 2X ",cb[z[2]],z[
    9. : 32 5D 2B 2B 29 3B 0A 20 20 __ __ __ __ __ __ __ : 2]++);. [2]],z[

Allow me to don the CeeLanguage evil advocate's hat for a moment:

Super Two-Line Excessively Minimal CeeLanguage example:

#include
hexer(FILE *f,int b,int e,int w){unsigned char cb[2048];int z[3]={0,0,0};while(!feof(f)){fscanf(f,"%c",&cb[z[2]]);if ((z[1]>=b)&&(z[1]

I am of course joking about all this.

Sarcastic, you mean. Since you have all this boundless energy, where's that C++ version?

I'm currently uploading it. . . Even at 700k/s, it will still take until tomorrow morning.

Smaller than I expected; congratulations.


Here is an attempt at a reasonable-length CeePlusPlus version:

#include
#include
#include
#include
using namespace std;
struct hexwriter
{
stringstream _numbers,_letters;
int _start,_width,_position;
hexwriter(int start,int width) :
_start(start), _width(width), _position(0)
{
}
void operator()(char value)
{
_numbers << setw(2) << setfill('0') << hex;
_numbers << static_cast(static_cast(value)) << ' ';
_letters << (iscntrl(value)?'.':value);
if(++_position%_width==0)
flush();
}
void flush()
{
if(0!=_position)
{
cout << right << setw(8) << hex << _start << " : ";
cout << left << setw(3*_width) << _numbers.str() << ": " << _letters.str() << endl;
_position=0;
_start+=_width;
}
_numbers.str("");
_letters.str("");
}
};
int main(int argc,char** argv)
{
int first=(argc>1)?atoi(argv[1]):0;
int last=(argc>2)?atoi(argv[2]):string::npos;
int width=(argc>3)?atoi(argv[3]):16;
typedef istreambuf_iterator cinit;
string buffer(cinit(cin.rdbuf()),cinit());
buffer=buffer.substr(first,last);
hexwriter writer(first,width);
for_each(buffer.begin(),buffer.end(),writer);
writer.flush();
}
$ hexdump 10 110 24 < hexdump.cpp
a : 69 6f 73 74 72 65 61 6d 3e 0a 23 69 6e 63 6c 75 64 65 20 3c 61 6c 67 6f : iostream>.#include
  1. : 72 69 74 68 6d 3e 0a 23 69 6e 63 6c 75 64 65 20 3c 73 73 74 72 65 61 6d : rithm>.#include
  2. a : 3e 0a 23 69 6e 63 6c 75 64 65 20 3c 69 6f 6d 61 6e 69 70 3e 0a 75 73 69 : >.#include .usi
  3. : 6e 67 20 6e 61 6d 65 73 70 61 63 65 20 73 74 64 3b 0a 0a 73 74 72 75 63 : ng namespace std;..struc
  4. a : 74 20 68 65 78 77 72 69 74 65 72 0a 7b 0a : t hexwriter.{.

-- MichaelSparks


OcamlLanguage:

open Printf
let hexadecimal str =
let buffer = Buffer.create ((String.length str) * 3) in begin
String.iter (fun char ->
Buffer.add_string buffer (sprintf " %02x" (int_of_char char))) str;
Buffer.contents buffer
end
and ascii str =
let buffer = Buffer.create (String.length str) in begin
String.iter (fun char ->
Buffer.add_char buffer
(match char with
' ' .. '~' -> char
| _ -> '.')) str;
Buffer.contents buffer
end
let front width pos str =
printf "%08x:%*s | %*s\n"
pos (3*width) (hexadecimal str) width (ascii str)
and back width pos str =
printf "%08x:%-*s | %-*s\n"
pos (3*width) (hexadecimal str) width (ascii str)
let dump filename ?s:(s = 0) ?e:e ?width:(width = 16) () =
let chan = open_in_bin filename in
let display f length =
let buf = (String.make length '\000') in
let real_length = input chan buf 0 length in
let _ = if real_length > 0 then
(f (String.sub buf 0 real_length)) else () in
if real_length < length then raise End_of_file else ()
in
let front_off = (width - (s mod width)) in
let (final, remaining) =
match e with
Some e -> ((fun pos -> (e - pos) <= width),
(fun pos -> (e - pos)))
| None -> ((fun pos -> false),
(fun pos -> 0))
in
try begin
seek_in chan s;
display (front width s) front_off;
let rec continue pos =
if (final pos) then begin
display (back width pos) (remaining pos);
raise End_of_file
end else begin
display (back width pos) width;
continue (pos+width)
end
in continue (s + front_off)
end with End_of_file -> begin
close_in chan;
()
end
;;
let usage () =
eprintf "Usage: %s file [start [end [width]]]\n" Sys.argv.(0) in
let main () =
match Sys.argv with
[|_|] -> usage ()
| [|_; file|] -> dump file ()
| [|_; file; s|] -> dump file ~s:(int_of_string s) ()
| [|_; file; s; e|] ->
dump file ~s:(int_of_string s) ~e:(int_of_string e) ()
| [|_; file; s; e; width|] ->
dump file ~s:(int_of_string s) ~e:(int_of_string e)
~width:(int_of_string width) ()
| _ -> usage ()
in main ()

JayLanguage:

nhex =: 3 : 0 NB. res;val
' ',~"1(((>0{y.)$16) #: >1{y.){ '0123456789ABCDEF'
)
hex =: 3 : ',.nhex 2;a.i.y.'
prt =: 3 : 0
pa =. (32+i.96){a.
mk =. '.'
(pa i.y.){pa,mk
)
hexdump =: 3 : 0 NB. 'file';start;end;width
fn =. >0{y.
st=. >1{y.
sz=. >:((1!:4 2{y.)-st
w=. >3{y.
r=. >.sz%w
adrs=. nhex 4;st+w*i.r
text=. (r,w)$(1!:11 fn;st,sz),(-sz-r*w)${.a.
adrs,. (hex text),. ':',.' ',. prt text
)

Put to use:

hexdump 'c:\prj\j\hexdump.ijs';20;73;10
  1. 65 78 20 3D 3A 20 33 20 3A 20 : ex =: 3 :
  2. E 30 20 20 4E 42 2E 20 72 65 73 : 0 NB. res
  3. 3B 76 61 6C 0D 0A 09 27 20 27 : ;val...' '
  4. 2C 7E 22 31 28 28 28 3E 30 7B : ,~"1(((>0{
  5. C 79 2E 29 24 31 36 29 20 23 3A : y.)$16) #:
  6. 20 3E 31 7B 00 00 00 00 00 00 : >1{......

-- MarcThibault


Here is an attempt at a reasonable-length Java version:

import java.io.FileInputStream;

public class HexDump {

public static void main(final String[] args) throws Exception {
final int width = 3 * Integer.parseInt(args[3]);
final FileInputStream inS = new FileInputStream(args[0]);
inS.skip(Integer.parseInt(args[1]));
final StringBuilder hex = new StringBuilder();
final StringBuilder chr = new StringBuilder();
for (int pos = Integer.parseInt(args[2]); pos > 0 && inS.available() > 0; pos--) {
chr.append((char) inS.read());
hex.append(String.format("%1$02x ", (byte) chr.charAt(chr.length() - 1)));
if (hex.length() >= width || pos <= 1) {
System.out.println(String.format("%1$-" + (width) + "s", hex) + ":"
+ chr.toString().replaceAll("[^!-~]", " "));
hex.setLength(0);
chr.setLength(0);
}
}
}

}

Arguments: HexDump.java 0 100 16

  1. 6d 70 6f 72 74 20 6a 61 76 61 2e 69 6f 2e 46 :import java.io.F
  2. 6c 65 49 6e 70 75 74 53 74 72 65 61 6d 3b 0d :ileInputStream;
  3. a 0d 0a 70 75 62 6c 69 63 20 63 6c 61 73 73 20 : public class
  4. 65 78 44 75 6d 70 20 7b 0d 0a 09 70 75 62 6c :HexDump { publ
  5. 63 20 73 74 61 74 69 63 20 76 6f 69 64 20 6d :ic static void m
  6. 69 6e 28 66 69 6e 61 6c 20 53 74 72 69 6e 67 :ain(final String
  7. b 5d 20 61 :[] a

A shorter CeePlusPlus version, with error checking:

#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
void show_char(char c) { cout << (isprint(c) ? c : '.'); }
void show_hex(char c) { cout << setw(2) << (c & 0xff) << ' '; }
int main(int argc, char** argv) {
try {
if (argc != 5)
throw invalid_argument("Usage: hexdump ");
ifstream in(argv[1], ios::binary);
if (! in)
throw invalid_argument("File not found");
size_t pos(strtoul(argv[2], 0, 0)), end(strtoul(argv[3], 0, 0)), width(strtoul(argv[4], 0, 0));
if (end < pos || width < 1)
throw invalid_argument("Invalid arguments");
in.seekg(pos);
if (! in)
throw invalid_argument("Start position is past end of file");
vector buf(width);
cout << hex << setfill('0');
for (; in && pos < end; pos += width) {
in.read(&buf[0], min(end - pos, width));
size_t bytes(in.gcount());
if (bytes) {
cout << setw(8) << pos << " : ";
for_each(&buf[0], &buf[bytes], show_hex);
cout << string(3 * (width - bytes), ' ') << ": ";
for_each(&buf[0], &buf[bytes], show_char);
cout << '\n';
}
}
return 0;
}
catch (const exception& ex) {
cerr << "*** " << ex.what() << '\n';
return 1;
}
}
./hexdump hexdump.cpp 100 200 16
  1. : 63 6c 75 64 65 20 3c 73 74 64 65 78 63 65 70 74 : clude
  2. : 3e 0a 23 69 6e 63 6c 75 64 65 20 3c 73 74 72 69 : >.#include
  3. : 6e 67 3e 0a 23 69 6e 63 6c 75 64 65 20 3c 76 65 : ng>.#include
  4. : 63 74 6f 72 3e 0a 0a 75 73 69 6e 67 20 6e 61 6d : ctor>..using nam
  5. a4 : 65 73 70 61 63 65 20 73 74 64 3b 0a 0a 76 6f 69 : espace std;..voi
  6. b4 : 64 20 73 68 6f 77 5f 63 68 61 72 28 63 68 61 72 : d show_char(char
  7. c4 : 20 63 29 20 : c)

-- RossSmith


A EuphoriaLanguage version. I'd forgotten how verbose Euphoria is, although this could be shortened...

include get.e
include file.e
constant STDOUT = 1, STDERR = 2
sequence Filename
integer Start, End, Width, FileHandle, Lines
procedure Error(sequence msg)
puts(STDERR, msg)
abort(2)
end procedure
procedure Usage()
Error("Usage: ex hexdump.ex \n")
end procedure
-- Convert string to integer, erroring with invalid parameter if not valid
function IntParam(sequence string)
sequence tmp
tmp = value(string)
if tmp[1] = GET_SUCCESS and integer(tmp[2]) then
return tmp[2]
end if
Usage()
end function
-- Check command line parameters
procedure GetParams()
sequence cmd
cmd = command_line()
cmd = cmd[3..length(cmd)]
if length(cmd) != 4 then
Usage()
end if
Filename = cmd[1]
Start = IntParam(cmd[2])
End = IntParam(cmd[3])
Width = IntParam(cmd[4])
Lines = floor((End - Start + 1) / Width)
if remainder((End - Start + 1), Width) != 0 then
Lines = Lines + 1
end if
end procedure
-- Main Program start!
GetParams()
FileHandle = open(Filename, "rb")
if FileHandle = -1 then
Error("Unable to read file '" & Filename & "'\n")
end if
sequence Bytes
sequence Output
integer Void, RemainingBytes, BytesToRead, Offset, Padding
Void = seek(FileHandle, Start)
RemainingBytes = End - Start + 1
Offset = Start
for lindex = 1 to Lines do
Output = sprintf("%04x : ", Offset)
if Width < RemainingBytes then
BytesToRead = Width
else
BytesToRead = RemainingBytes
end if
RemainingBytes -= BytesToRead
Offset += BytesToRead
Bytes = get_bytes(FileHandle, BytesToRead)
for bindex = 1 to length(Bytes) do
Output &= sprintf("%02x ", Bytes[bindex])
end for
Padding = 3 * Width + 7
if length(Output) < Padding then
Output &= repeat(' ', Padding - length(Output))
end if
Output &= ": "
for bindex = 1 to length(Bytes) do
if Bytes[bindex] >= 32 and Bytes[bindex] <= 127 then
Output &= Bytes[bindex] -- printable
else
Output &= ' ' -- not printable, show space instead
end if
end for
Output &= "\n"
puts(STDOUT, Output)
end for

Outputs:

  1. : 69 6E 63 6C 75 64 65 20 67 65 74 2E 65 0D 0A 69 : include get.e i
  2. : 6E 63 6C 75 64 65 20 66 69 6C 65 2E 65 0D 0A 0D : nclude file.e
  3. : 0A 63 6F 6E 73 74 61 6E 74 20 53 54 44 4F 55 54 : constant STDOUT
  4. : 20 3D 20 31 2C 20 53 54 44 45 52 52 20 3D 20 32 : = 1, STDERR = 2
  5. : 0D 0A 0D 0A 73 65 71 75 65 6E 63 65 20 46 69 6C : sequence Fil
  6. : 65 6E 61 6D 65 0D 0A 69 6E 74 65 67 65 72 20 53 : ename integer S
  7. : 74 61 72 74 : tart

In PhpLanguage:

array_shift($argv);
if (count($argv) !== 4) {
die("Usage: php hexdump.php \n");
}
list($filename, $start, $end, $width) = $argv;
$fn = @fopen($filename, 'rb');
if ($fn === FALSE) {
die("Unable to open file '$filename'\n");
}
fseek($fn, $start);
$offset = $start;
while ($offset < $end && !feof($fn)) {
$readto = min(array($offset + $width - 1, $end));
$bytes = fread($fn, $readto - $offset + 1);
$bytes = str_split($bytes);
$line = sprintf('%04x : ', $offset);
foreach ($bytes as $byte) {
$line .= sprintf('%02x ', ord($byte));
}
if (strlen($line) < ($width * 3 + 7)) {
$line .= str_repeat(' ', $width * 3 + 7 - strlen($line));
}
$line .= ': ';
foreach ($bytes as $byte) {
$line .= (ord($byte) >= 32 && ord($byte) <= 127) ? $byte : ' ';
}
print $line ."\n";
$offset += $width;
}
fclose($fn);

Outputs:

C:\programming-exercises\hexdump>php hexdump.php hexdump.php 0 99 16
  1. : 3c 3f 70 68 70 0a 61 72 72 61 79 5f 73 68 69 66 :
  2. : 74 28 24 61 72 67 76 29 3b 0a 69 66 20 28 63 6f : t($argv); if (co
  3. : 75 6e 74 28 24 61 72 67 76 29 20 21 3d 3d 20 34 : unt($argv) !== 4
  4. : 29 20 7b 0a 20 20 64 69 65 28 22 55 73 61 67 65 : ) { die("Usage
  5. : 3a 20 70 68 70 20 68 65 78 64 75 6d 70 2e 70 68 : : php hexdump.ph
  6. : 70 20 3c 66 69 6c 65 6e 61 6d 65 3e 20 3c 73 74 : p
  7. : 61 72 74 3e : art>

TCL

package require binary
proc hexdump {str} {
set s ""
set addr 0
append s "[format "%07x" $addr]: "
set t ""
for {set i 0} {$i < [string length $str]} {incr i} {
if {$i > 0 && [expr $i % 16] == 0} {
append s " $t\n"
append s "[format "%07x" $addr]: "
incr addr 16
set t ""
} elseif {$i > 0 && [expr $i % 2] == 0} {
append s " "
}
set char [string index $str $i]
binary scan $char H2 cc
append s $cc
if {[string is print $char]} {
append t $char
} else {
append t "."
}
}
puts $s
}

CategoryInManyProgrammingLanguages