Use UTF-8 for all communication with clang-format

Summary: Instead of picking the buffer file coding system, always use utf-8-unix for communicating with clang-format.  This is fine because clang-format never actually reads the file to be formatted, only standard input.  This is a bit simpler (process coding system is now a constant) and potentially faster, as utf-8-unix is Emacs's internal coding system.  Also add an end-to-end test that actually invokes clang-format.

Reviewers: klimek

Reviewed By: klimek

Differential Revision: https://reviews.llvm.org/D28904

llvm-svn: 292593
This commit is contained in:
Philipp Stephani
2017-01-20 09:37:50 +00:00
parent 3cdf650770
commit ce2f6b413f
2 changed files with 42 additions and 21 deletions

View File

@@ -31,8 +31,8 @@
(with-current-buffer stdout
(insert "<?xml version='1.0'?>
<replacements xml:space='preserve' incomplete_format='false'>
<replacement offset='7' length='0'> </replacement>
<replacement offset='14' length='0'> </replacement>
<replacement offset='4' length='0'> </replacement>
<replacement offset='10' length='0'> </replacement>
</replacements>
"))
0)))))
@@ -58,15 +58,14 @@
(should (equal args
'("-output-replacements-xml" "-assume-filename" "foo.cpp"
"-style" "file"
;; Length of the UTF-8 byte-order mark.
"-offset" "3"
;; Beginning of buffer, no byte-order mark.
"-offset" "0"
;; We have two lines with 2×2 bytes for the umlauts,
;; 2 bytes for the line ending, and 3 bytes for the
;; 1 byte for the line ending, and 3 bytes for the
;; other ASCII characters each.
"-length" "18"
;; Length of a single line (without line ending) plus
;; BOM.
"-cursor" "10")))))))
"-length" "16"
;; Length of a single line (without line ending).
"-cursor" "7")))))))
(ert-deftest clang-format-buffer--process-encoding ()
"Tests that text is sent to the clang-format process in the
@@ -105,6 +104,23 @@ right encoding."
(clang-format-buffer))
(should (equal (buffer-string) "ä\n"))
(should (eobp)))
(should (equal call-process-inputs '("ef bb bf c3 a4 0d 0a ")))))
(should (equal call-process-inputs '("c3 a4 0a ")))))
(ert-deftest clang-format-buffer--end-to-end ()
"End-to-end test for clang-format-buffer.
Actually calls the clang-format binary."
(skip-unless (file-executable-p clang-format-executable))
(with-temp-buffer
(let ((buffer-file-name "foo.cpp")
(buffer-file-coding-system 'utf-8-with-signature-dos)
(default-process-coding-system 'latin-1-unix))
(insert "ä =ö;\nü= ß;\n")
(goto-char (point-min))
(end-of-line)
(clang-format-buffer))
(should (equal (buffer-string) "ä = ö;\nü = ß;\n"))
(should (eolp))
(should (equal (buffer-substring (point) (point-max))
"\nü = ß;\n"))))
;;; clang-format-test.el ends here

View File

@@ -95,9 +95,10 @@ of the buffer."
(defun clang-format--replace (offset length &optional text)
"Replace the region defined by OFFSET and LENGTH with TEXT.
OFFSET and LENGTH are measured in bytes, not characters. OFFSET
is a zero-based file offset."
(let ((start (clang-format--filepos-to-bufferpos offset 'exact))
(end (clang-format--filepos-to-bufferpos (+ offset length) 'exact)))
is a zero-based file offset, assuming utf-8-unix coding."
(let ((start (clang-format--filepos-to-bufferpos offset 'exact 'utf-8-unix))
(end (clang-format--filepos-to-bufferpos (+ offset length) 'exact
'utf-8-unix)))
(goto-char start)
(delete-region start end)
(when text
@@ -130,15 +131,18 @@ is no active region. If no style is given uses `clang-format-style'."
(unless style
(setq style clang-format-style))
(let ((file-start (clang-format--bufferpos-to-filepos start 'approximate))
(file-end (clang-format--bufferpos-to-filepos end 'approximate))
(cursor (clang-format--bufferpos-to-filepos (point) 'exact))
(let ((file-start (clang-format--bufferpos-to-filepos start 'approximate
'utf-8-unix))
(file-end (clang-format--bufferpos-to-filepos end 'approximate
'utf-8-unix))
(cursor (clang-format--bufferpos-to-filepos (point) 'exact 'utf-8-unix))
(temp-buffer (generate-new-buffer " *clang-format-temp*"))
(temp-file (make-temp-file "clang-format"))
(default-process-coding-system
;; Output is XML, which is always UTF-8. Input encoding should match
;; the file encoding, otherwise the offsets calculated above are off.
(cons 'utf-8-unix buffer-file-coding-system)))
;; Output is XML, which is always UTF-8. Input encoding should match
;; the encoding used to convert between buffer and file positions,
;; otherwise the offsets calculated above are off. For simplicity, we
;; always use utf-8-unix and ignore the buffer coding system.
(default-process-coding-system '(utf-8-unix . utf-8-unix)))
(unwind-protect
(let ((status (call-process-region
nil nil clang-format-executable
@@ -168,7 +172,8 @@ is no active region. If no style is given uses `clang-format-style'."
(dolist (rpl replacements)
(apply #'clang-format--replace rpl)))
(when cursor
(goto-char (clang-format--filepos-to-bufferpos cursor 'exact)))
(goto-char (clang-format--filepos-to-bufferpos cursor 'exact
'utf-8-unix)))
(if incomplete-format
(message "(clang-format: incomplete (syntax errors)%s)" stderr)
(message "(clang-format: success%s)" stderr))))