pax_global_header00006660000000000000000000000064137360147310014517gustar00rootroot0000000000000052 comment=2d3dd6dc156ad5c11f366e673db1b7a0b83d2dc9 makeup_c-0.1.1/000077500000000000000000000000001373601473100133025ustar00rootroot00000000000000makeup_c-0.1.1/.formatter.exs000066400000000000000000000001411373601473100161000ustar00rootroot00000000000000# Used by "mix format" [ inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] ] makeup_c-0.1.1/.gitignore000066400000000000000000000011151373601473100152700ustar00rootroot00000000000000# The directory Mix will write compiled artifacts to. /_build/ # If you run "mix test --cover", coverage assets end up here. /cover/ # The directory Mix downloads your dependencies sources to. /deps/ # Where third-party dependencies like ExDoc output generated docs. /doc/ # Ignore .fetch files in case you like to edit your project deps locally. /.fetch # If the VM crashes, it generates a dump, let's ignore it too. erl_crash.dump # Also ignore archive artifacts (built via "mix archive.build"). *.ez # Ignore package tarball (built via "mix hex.build"). makeup_c-*.tar .DS_Storemakeup_c-0.1.1/.iex.exs000066400000000000000000000000601373601473100146620ustar00rootroot00000000000000import_if_available Makeup.Lexers.CLexer.Testingmakeup_c-0.1.1/.travis.yml000066400000000000000000000005761373601473100154230ustar00rootroot00000000000000sudo: false language: elixir elixir: '1.10' otp_release: '22.0' stages: - check formatted - test jobs: include: - stage: check formatted script: mix format --check-formatted - stage: test - elixir: 1.4 otp_release: 19.3 - elixir: 1.9 otp_release: 20.3 - elixir: 1.9 otp_release: 21.3 - elixir: 1.10 otp_release: 22.0 makeup_c-0.1.1/README.md000066400000000000000000000015001373601473100145550ustar00rootroot00000000000000# MakeupC A [Makeup](https://github.com/tmbb/makeup/) lexer for the C language. ## Installation Add `makeup_c` to your list of dependencies in `mix.exs`: ```elixir def deps do [ {:makeup_c, ">= 0.0.0"} ] end ``` The lexer will be automatically registered in Makeup for the languages "c" as well as the extensions ".c" and ".h". ## Status This lexer was built to highlight C code in a blog. It works for my purposes so I'm happy. It is fairly naive as it doesn't take into account any of the C language's type checking. It also has the C++ keywords, but doesn't attempt to do anything fancy regarding templates and other fancy type definitions. I needed it just for C, and that is what it does. If anybody wants to take a crack at making it more type aware or providing better support for C++, please have a go at it. makeup_c-0.1.1/lib/000077500000000000000000000000001373601473100140505ustar00rootroot00000000000000makeup_c-0.1.1/lib/c_lexer/000077500000000000000000000000001373601473100154715ustar00rootroot00000000000000makeup_c-0.1.1/lib/c_lexer/application.ex000066400000000000000000000005261373601473100203350ustar00rootroot00000000000000defmodule Makeup.Lexers.CLexer.Application do @moduledoc false use Application alias Makeup.Registry alias Makeup.Lexers.CLexer def start(_type, _args) do Registry.register_lexer(CLexer, options: [], names: ["c"], extensions: ["c", "h"] ) Supervisor.start_link([], strategy: :one_for_one) end end makeup_c-0.1.1/lib/c_lexer/helper.ex000066400000000000000000000035601373601473100173120ustar00rootroot00000000000000defmodule Makeup.Lexers.CLexer.Helper do @moduledoc false import NimbleParsec alias Makeup.Lexer.Combinators def with_optional_separator(combinator, separator) when is_binary(separator) do combinator |> repeat(string(separator) |> concat(combinator)) end # Allows escaping of the first character of a right delimiter. # This is used in sigils that don't support interpolation or character escapes but # must support escaping of the right delimiter. def escape_delim(rdelim) do rdelim_first_char = String.slice(rdelim, 0..0) string("\\" <> rdelim_first_char) end def sigil(ldelim, rdelim, ranges, middle, ttype, attrs \\ %{}) do left = string("~") |> utf8_string(ranges, 1) |> string(ldelim) right = string(rdelim) choices = middle ++ [utf8_char([])] left |> repeat(lookahead_not(right) |> choice(choices)) |> concat(right) |> optional(utf8_string([?a..?z, ?A..?Z], min: 1)) |> post_traverse({Combinators, :collect_raw_chars_and_binaries, [ttype, attrs]}) end def escaped(literal) when is_binary(literal) do string("\\" <> literal) end def keyword_matcher(kind, fun_name, words) do heads = for {ttype, words} <- words do for word <- words do case kind do :defp -> quote do defp unquote(fun_name)([{:name, attrs, unquote(ttype)} | tokens]) do [{unquote(ttype), attrs, unquote(word)} | unquote(fun_name)(tokens)] end end |> IO.inspect() :def -> quote do def unquote(fun_name)([{:name, attrs, unquote(ttype)} | tokens]) do [{unquote(ttype), attrs, unquote(word)} | unquote(fun_name)(tokens)] end end end end end quote do (unquote_splicing(heads)) end end end makeup_c-0.1.1/lib/c_lexer/testing.ex000066400000000000000000000035261373601473100175120ustar00rootroot00000000000000defmodule Makeup.Lexers.CLexer.Testing do @moduledoc false # The tests need to be checked manually!!! (remove this line when they've been checked) alias Makeup.Lexers.CLexer alias Makeup.Lexer.Postprocess @sample_a """ //--------------------------------------------------------- kos_status_t kos_msg_queue_notification( IN kos_cap_t notification_cap ) { kos_status_t status; if ( notification_cap ) { // setting the notification seL4_SetCap( 0, kos_cap_cptr(notification_cap) ); seL4_MessageInfo_t msg = seL4_Call( KOS_APP_SLOT_MESSAGING_EP, seL4_MessageInfo_new(KOS_MSG_QUEUE_SET_NOTIFICATION, 0, 1, 0) ); status = seL4_MessageInfo_get_label( msg ); } else { // clearing the notification seL4_MessageInfo_t msg = seL4_Call( KOS_APP_SLOT_MESSAGING_EP, seL4_MessageInfo_new(KOS_MSG_QUEUE_CLEAR_NOTIFICATION, 0, 0, 0) ); status = seL4_MessageInfo_get_label( msg ); } return status; } """ @sample_b """ static inline int test_fn( IN kos_cap_t notification_cap ) { int arr[3] = {0}; int a = 0; a--; return a; } """ @sample_c """ /*multi comment*/ #ifdef BOOGER return NULL; #endif """ def lex_a(), do: @sample_a |> lex() def lex_b(), do: @sample_b |> lex() def lex_c(), do: @sample_c |> lex() # This function has two purposes: # 1. Ensure deterministic lexer output (no random prefix) # 2. Convert the token values into binaries so that the output # is more obvious on visual inspection # (iolists are hard to parse by a human) def lex(text) do text |> CLexer.lex(group_prefix: "group") |> Postprocess.token_values_to_binaries() |> Enum.map(fn {ttype, meta, value} -> {ttype, Map.delete(meta, :language), value} end) end end makeup_c-0.1.1/lib/makeup_c.ex000066400000000000000000000246651373601473100162070ustar00rootroot00000000000000defmodule Makeup.Lexers.CLexer do import NimbleParsec import Makeup.Lexer.Combinators import Makeup.Lexer.Groups import Makeup.Lexers.CLexer.Helper @behaviour Makeup.Lexer ################################################################### # Step #1: tokenize the input (into a list of tokens) ################################################################### # We will often compose combinators into larger combinators. # Sometimes, the smaller combinator is usefull on its own as a token, and sometimes it isn't. # We'll adopt the following "convention": # # 1. A combinator that ends with `_name` returns a string # 2. Other combinators will *usually* return a token # # Why this convention? Tokens can't be composed further, while raw strings can. # This way, we immediately know which of the combinators we can compose. # TODO: check we're following this convention whitespace = ascii_string([?\r, ?\s, ?\n, ?\f], min: 1) |> token(:whitespace) any_char = utf8_char([]) |> token(:error) # Numbers digits = ascii_string([?0..?9], min: 1) bin_digits = ascii_string([?0..?1], min: 1) hex_digits = ascii_string([?0..?9, ?a..?f, ?A..?F], min: 1) oct_digits = ascii_string([?0..?7], min: 1) # Digits in an integer may be separated by underscores number_bin_part = with_optional_separator(bin_digits, "_") number_oct_part = with_optional_separator(oct_digits, "_") number_hex_part = with_optional_separator(hex_digits, "_") integer = with_optional_separator(digits, "_") # Tokens for the lexer number_bin = string("0b") |> concat(number_bin_part) |> token(:number_bin) number_oct = string("0o") |> concat(number_oct_part) |> token(:number_oct) number_hex = string("0x") |> concat(number_hex_part) |> token(:number_hex) # Base 10 number_integer = token(integer, :number_integer) # Floating point numbers float_scientific_notation_part = ascii_string([?e, ?E], 1) |> optional(string("-")) |> concat(integer) number_float = integer |> string(".") |> concat(integer) |> optional(float_scientific_notation_part) |> token(:number_float) # Yes, Elixir supports much more than this. # TODO: adapt the code from the official tokenizer, which parses the unicode database variable_name = ascii_string([?a..?z, ?_], 1) |> optional(ascii_string([?a..?z, ?_, ?0..?9, ?A..?Z], min: 1)) |> optional(ascii_string([??, ?!], 1)) # Can also be a function name variable = variable_name |> lexeme |> token(:name) define_name = ascii_string([?A..?Z], 1) |> optional(ascii_string([?a..?z, ?_, ?0..?9, ?A..?Z], min: 1)) define = token(define_name, :name_constant) operator_name = word_from_list(~W( -> + - * / % ++ -- ~ ^ & && | || = += -= *= /= &= |= %= ^= << >> <<= >>= > < >= <= == != ! ? : )) operator = token(operator_name, :operator) normal_char = string("?") |> utf8_string([], 1) |> token(:string_char) escape_char = string("?\\") |> utf8_string([], 1) |> token(:string_char) directive = string("#") |> concat(variable_name) |> token(:keyword_pseudo) punctuation = word_from_list( ["\\\\", ":", ";", ",", "."], :punctuation ) delimiters_punctuation = word_from_list( ~W( ( \) [ ] { }), :punctuation ) comment = many_surrounded_by(parsec(:root_element), "/*", "*/") delimiter_pairs = [ delimiters_punctuation, comment ] normal_atom_name = utf8_string([?A..?Z, ?a..?z, ?_], 1) |> optional(utf8_string([?A..?Z, ?a..?z, ?_, ?0..?9, ?@], min: 1)) # normal_atom = # string(":") # |> choice([operator_name, normal_atom_name]) # |> token(:string_symbol) unicode_char_in_string = string("\\u") |> ascii_string([?0..?9, ?a..?f, ?A..?F], 4) |> token(:string_escape) escaped_char = string("\\") |> utf8_string([], 1) |> token(:string_escape) combinators_inside_string = [ unicode_char_in_string, escaped_char ] string_keyword = choice([ string_like("\"", "\"", combinators_inside_string, :string_symbol), string_like("'", "'", combinators_inside_string, :string_symbol) ]) |> concat(token(string(":"), :punctuation)) normal_keyword = choice([operator_name, normal_atom_name]) |> token(:string_symbol) |> concat(token(string(":"), :punctuation)) keyword = choice([ normal_keyword, string_keyword ]) |> concat(whitespace) double_quoted_string_interpol = string_like("\"", "\"", combinators_inside_string, :string) line = repeat(lookahead_not(ascii_char([?\n])) |> utf8_string([], 1)) inline_comment = string("//") |> concat(line) |> token(:comment_single) multiline_comment = string_like("/*", "*/", combinators_inside_string, :comment_multiline) root_element_combinator = choice( [ whitespace, # Comments multiline_comment, inline_comment, # Syntax sugar for keyword lists (must come before variables and strings) directive, keyword, # Strings double_quoted_string_interpol ] ++ [ # Chars escape_char, normal_char ] ++ delimiter_pairs ++ [ # Operators operator, # Numbers number_bin, number_oct, number_hex, # Floats must come before integers number_float, number_integer, # Names variable, define, punctuation, # If we can't parse any of the above, we highlight the next character as an error # and proceed from there. # A lexer should always consume any string given as input. any_char ] ) # By default, don't inline the lexers. # Inlining them increases performance by ~20% # at the cost of doubling the compilation times... @inline false @doc false def __as_c_language__({ttype, meta, value}) do {ttype, Map.put(meta, :language, :c), value} end # Semi-public API: these two functions can be used by someone who wants to # embed an Elixir lexer into another lexer, but other than that, they are not # meant to be used by end-users. # @impl Makeup.Lexer defparsec( :root_element, root_element_combinator |> map({__MODULE__, :__as_c_language__, []}), inline: @inline ) # @impl Makeup.Lexer defparsec( :root, repeat(parsec(:root_element)), inline: @inline ) ################################################################### # Step #2: postprocess the list of tokens ################################################################### @keyword ~W[ alignas alignoif asm atomic_cancel atomic_commit atomic_noexcept auto break case catch class co_await co_return co_yield compl concept const const_cast constexpr continue decltype default delete do dynamic_cast else enum explicit export extern for friend goto if import inline module mutable namespace new noexcept nullptr operator private protected public register reinterpret_cast requires return sizeof static static_assert static_cast struct switch synchronized template this thread_local throw try typedef typeid typename union using virtual volatile while ] @keyword_type ~W[ bool byte int long unsigned double char short signed float wchar_t char16_t char32_t int8_t uint8_t int16_t uint16_t int32_t uint32_t int64_t uint64_t ] @keyword_constant ~W[ NULL true false void ] @operator_word ~W[and and_eq bitand bitor not not_eq or or_eq xor xor_eq] @name_builtin_pseudo ~W[__FUNCTION__ __FILE__ __LINE__] # The `postprocess/1` function will require a major redesign when we decide to support # custom `def`-like keywords supplied by the user. defp postprocess_helper([]), do: [] # match function names. They are followed by parens... defp postprocess_helper([ {:name, attrs, text}, {:punctuation, %{language: :c}, "("} | tokens ]) do [ {:name_function, attrs, text}, {:punctuation, %{language: :c}, "("} | postprocess_helper(tokens) ] end defp postprocess_helper([{:name, attrs, text} | tokens]) when text in @keyword, do: [{:keyword, attrs, text} | postprocess_helper(tokens)] defp postprocess_helper([{:name, attrs, text} | tokens]) when text in @keyword_type, do: [{:keyword_type, attrs, text} | postprocess_helper(tokens)] defp postprocess_helper([{:name, attrs, text} | tokens]) when text in @keyword_constant, do: [{:keyword_constant, attrs, text} | postprocess_helper(tokens)] defp postprocess_helper([{:name, attrs, text} | tokens]) when text in @operator_word, do: [{:operator_word, attrs, text} | postprocess_helper(tokens)] defp postprocess_helper([{:name, attrs, text} | tokens]) when text in @name_builtin_pseudo, do: [{:name_builtin_pseudo, attrs, text} | postprocess_helper(tokens)] # Unused variables defp postprocess_helper([{:name, attrs, "_" <> _name = text} | tokens]), do: [{:comment, attrs, text} | postprocess_helper(tokens)] # Otherwise, don't do anything with the current token and go to the next token. defp postprocess_helper([token | tokens]), do: [token | postprocess_helper(tokens)] # Public API @impl Makeup.Lexer def postprocess(tokens, _opts \\ []), do: postprocess_helper(tokens) ################################################################### # Step #3: highlight matching delimiters ################################################################### @impl Makeup.Lexer defgroupmatcher(:match_groups, parentheses: [ open: [[{:punctuation, %{language: :c}, "("}]], close: [[{:punctuation, %{language: :c}, ")"}]] ], array: [ open: [[{:punctuation, %{language: :c}, "["}]], close: [[{:punctuation, %{language: :c}, "]"}]] ], brackets: [ open: [[{:punctuation, %{language: :c}, "{"}]], close: [[{:punctuation, %{language: :c}, "}"}]] ] ) defp remove_initial_newline([{ttype, meta, text} | tokens]) do case to_string(text) do "\n" -> tokens "\n" <> rest -> [{ttype, meta, rest} | tokens] end end # Finally, the public API for the lexer @impl Makeup.Lexer def lex(text, opts \\ []) do group_prefix = Keyword.get(opts, :group_prefix, random_prefix(10)) {:ok, tokens, "", _, _, _} = root("\n" <> text) tokens |> remove_initial_newline() |> postprocess([]) |> match_groups(group_prefix) end end makeup_c-0.1.1/mix.exs000066400000000000000000000022071373601473100146210ustar00rootroot00000000000000defmodule MakeupC.MixProject do use Mix.Project @version "0.1.1" @url "https://github.com/elixir-makeup/makeup_c" def project do [ app: :makeup_c, version: @version, elixir: "~> 1.4", start_permanent: Mix.env() == :prod, deps: deps(), # Package package: package(), description: description(), # aliases: aliases(), docs: docs() ] end defp description do """ C lexer for the Makeup syntax highlighter. """ end defp package do [ name: :makeup_c, licenses: ["BSD"], maintainers: ["Boyd Multerer "], links: %{"GitHub" => @url} ] end # Run "mix help compile.app" to learn about applications. def application do [ extra_applications: [], mod: {Makeup.Lexers.CLexer.Application, []} ] end # Run "mix help deps" to learn about dependencies. defp deps do [ {:makeup, "~> 1.0"}, {:ex_doc, ">= 0.0.0", only: [:dev, :docs]}, ] end def docs do [ extras: ["README.md"], source_ref: "v#{@version}", main: "Makeup.Lexers.CLexer" ] end end makeup_c-0.1.1/mix.lock000066400000000000000000000034341373601473100147550ustar00rootroot00000000000000%{ "benchee": {:hex, :benchee, "0.99.0", "0efbfc31045ad2f75a48673bd1befa8a6a5855e93b8c3117aed7d7da8de65b71", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm", "672d8e9436471b7d5b77ca5be3ad69d065553e7ed8c5db29bb3d662378104618"}, "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, "earmark_parser": {:hex, :earmark_parser, "1.4.10", "6603d7a603b9c18d3d20db69921527f82ef09990885ed7525003c7fe7dc86c56", [:mix], [], "hexpm", "8e2d5370b732385db2c9b22215c3f59c84ac7dda7ed7e544d7c459496ae519c0"}, "ex_doc": {:hex, :ex_doc, "0.22.6", "0fb1e09a3e8b69af0ae94c8b4e4df36995d8c88d5ec7dbd35617929144b62c00", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "1e0aceda15faf71f1b0983165e6e7313be628a460e22a031e32913b98edbd638"}, "makeup": {:hex, :makeup, "1.0.3", "e339e2f766d12e7260e6672dd4047405963c5ec99661abdc432e6ec67d29ef95", [:mix], [{:nimble_parsec, "~> 0.5", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "2e9b4996d11832947731f7608fed7ad2f9443011b3b479ae288011265cdd3dad"}, "makeup_elixir": {:hex, :makeup_elixir, "0.14.1", "4f0e96847c63c17841d42c08107405a005a2680eb9c7ccadfd757bd31dabccfb", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "f2438b1a80eaec9ede832b5c41cd4f373b38fd7aa33e3b22d9db79e640cbde11"}, "nimble_parsec": {:hex, :nimble_parsec, "0.6.0", "32111b3bf39137144abd7ba1cce0914533b2d16ef35e8abc5ec8be6122944263", [:mix], [], "hexpm", "27eac315a94909d4dc68bc07a4a83e06c8379237c5ea528a9acff4ca1c873c52"}, } makeup_c-0.1.1/test/000077500000000000000000000000001373601473100142615ustar00rootroot00000000000000makeup_c-0.1.1/test/makeup_c_test.exs000066400000000000000000000011061373601473100176230ustar00rootroot00000000000000defmodule MakeupCTest do use ExUnit.Case doctest Makeup.Lexers.CLexer test "minimal lex test" do assert Makeup.Lexers.CLexer.lex("int a = 0;") == [ {:keyword_type, %{language: :c}, "int"}, {:whitespace, %{language: :c}, " "}, {:name, %{language: :c}, "a"}, {:whitespace, %{language: :c}, " "}, {:operator, %{language: :c}, "="}, {:whitespace, %{language: :c}, " "}, {:number_integer, %{language: :c}, "0"}, {:punctuation, %{language: :c}, ";"} ] end end makeup_c-0.1.1/test/test_helper.exs000066400000000000000000000000171373601473100173160ustar00rootroot00000000000000ExUnit.start()