language-python-0.4.1/0000755000000000000000000000000012441515301013000 5ustar0000000000000000language-python-0.4.1/language-python.cabal0000644000000000000000000000455712441515301017101 0ustar0000000000000000name: language-python version: 0.4.1 cabal-version: >= 1.6 synopsis: Parsing and pretty printing of Python code. description: language-python is a Haskell library for lexical analysis, parsing and pretty printing Python code. It supports versions 2.x and 3.x of Python. category: Language license: BSD3 license-file: LICENSE copyright: (c) 2008-2014 Bernard James Pope author: Bernard James Pope (Bernie Pope) maintainer: florbitous@gmail.com homepage: http://github.com/bjpop/language-python build-depends: base build-type: Simple stability: experimental extra-source-files: src/Language/Python/Version3/Parser/Parser.y src/Language/Python/Version3/Parser/Lexer.x ghc_6_12-13/Language/Python/Common/AST.hs ghc_normal/Language/Python/Common/AST.hs source-repository head type: git location: https://github.com/bjpop/language-python Library -- work around the memory usage bug in ghc 6.12.x if impl(ghc < 6.12) || impl(ghc >= 6.14) hs-source-dirs: src ghc_normal else hs-source-dirs: src ghc_6_12-13 build-depends: base == 4.*, containers == 0.5.*, pretty == 1.1.*, array >= 0.4 && < 0.6, transformers >= 0.3 && < 0.5, monads-tf == 0.1.* build-tools: happy, alex exposed-modules: Language.Python.Common Language.Python.Common.ParseError Language.Python.Common.SrcLocation Language.Python.Common.Pretty Language.Python.Common.Token Language.Python.Common.ParserMonad Language.Python.Common.PrettyToken Language.Python.Common.AST Language.Python.Common.PrettyAST Language.Python.Version3 Language.Python.Version3.Parser Language.Python.Version3.Lexer Language.Python.Version2 Language.Python.Version2.Parser Language.Python.Version2.Lexer Language.Python.Common.PrettyParseError Language.Python.Common.StringEscape other-modules: Language.Python.Common.ParserUtils Language.Python.Common.LexerUtils Language.Python.Version3.Parser.Parser Language.Python.Version3.Parser.Lexer Language.Python.Version2.Parser.Parser Language.Python.Version2.Parser.Lexer language-python-0.4.1/LICENSE0000644000000000000000000000276512441515301014017 0ustar0000000000000000Copyright (c) 2009-2010 Bernard James Pope (also known as Bernie Pope). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the author nor the names of his contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. language-python-0.4.1/Setup.lhs0000644000000000000000000000011412441515301014604 0ustar0000000000000000#!/usr/bin/env runhaskell > import Distribution.Simple > main = defaultMain language-python-0.4.1/ghc_6_12-13/0000755000000000000000000000000012441515301014511 5ustar0000000000000000language-python-0.4.1/ghc_6_12-13/Language/0000755000000000000000000000000012441515301016234 5ustar0000000000000000language-python-0.4.1/ghc_6_12-13/Language/Python/0000755000000000000000000000000012441515301017515 5ustar0000000000000000language-python-0.4.1/ghc_6_12-13/Language/Python/Common/0000755000000000000000000000000012441515301020745 5ustar0000000000000000language-python-0.4.1/ghc_6_12-13/Language/Python/Common/AST.hs0000644000000000000000000006273612441515301021746 0ustar0000000000000000{-# LANGUAGE FlexibleInstances, TypeSynonymInstances, CPP, DeriveDataTypeable #-} {-# OPTIONS_GHC -fomit-interface-pragmas #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version2.Syntax.AST -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Representation of the Python abstract syntax tree (AST). The representation is -- a superset of versions 2.x and 3.x of Python. In many cases they are -- identical. The documentation in this module indicates where they are -- different. -- -- All the data types have a (polymorphic) parameter which allows the AST to -- be annotated by an arbitrary type (for example source locations). Specialised -- instances of the types are provided for source spans. For example @Module a@ is -- the type of modules, and @ModuleSpan@ is the type of modules annoted with source -- span information. -- -- Note: there are cases where the AST is more liberal than the formal grammar -- of the language. Therefore some care must be taken when constructing -- Python programs using the raw AST. ----------------------------------------------------------------------------- module Language.Python.Common.AST ( -- * Annotation projection Annotated (..) -- * Modules , Module (..), ModuleSpan -- * Identifiers and dotted names , Ident (..), IdentSpan , DottedName, DottedNameSpan -- * Statements, suites, parameters, decorators and assignment operators , Statement (..), StatementSpan , Suite, SuiteSpan , Parameter (..), ParameterSpan , ParamTuple (..), ParamTupleSpan , Decorator (..), DecoratorSpan , AssignOp (..), AssignOpSpan -- * Expressions, operators, arguments and slices , Expr (..), ExprSpan , Op (..), OpSpan , Argument (..), ArgumentSpan , Slice (..), SliceSpan -- * Imports , ImportItem (..), ImportItemSpan , FromItem (..), FromItemSpan , FromItems (..), FromItemsSpan , ImportRelative (..), ImportRelativeSpan -- * Exceptions , Handler (..), HandlerSpan , ExceptClause (..), ExceptClauseSpan , RaiseExpr (..), RaiseExprSpan -- * Comprehensions , Comprehension (..), ComprehensionSpan , CompFor (..), CompForSpan , CompIf (..), CompIfSpan , CompIter (..), CompIterSpan ) where import Language.Python.Common.SrcLocation ( Span (getSpan), SrcSpan (..) ) import Data.Data -------------------------------------------------------------------------------- -- | Convenient access to annotations in annotated types. class Annotated t where -- | Given an annotated type, project out its annotation value. annot :: t annot -> annot -- | Identifier. data Ident annot = Ident { ident_string :: !String, ident_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type IdentSpan = Ident SrcSpan instance Span IdentSpan where getSpan = annot instance Annotated Ident where annot = ident_annot -- | A module (Python source file). -- -- * Version 2.6 -- -- * Version 3.1 -- newtype Module annot = Module [Statement annot] -- ^ A module is just a sequence of top-level statements. deriving (Eq,Ord,Show,Typeable,Data) type ModuleSpan = Module SrcSpan -- | A block of statements. A suite is a group of statements controlled by a clause, -- for example, the body of a loop. -- -- * Version 2.6 -- -- * Version 3.1 -- type Suite annot = [Statement annot] type SuiteSpan = Suite SrcSpan -- | A compound name constructed with the dot operator. type DottedName annot = [Ident annot] type DottedNameSpan = DottedName SrcSpan -- | An entity imported using the \'import\' keyword. -- -- * Version 2.6 -- -- * Version 3.1 -- data ImportItem annot = ImportItem { import_item_name :: DottedName annot -- ^ The name of module to import. , import_as_name :: Maybe (Ident annot) -- ^ An optional name to refer to the entity (the \'as\' name). , import_item_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ImportItemSpan = ImportItem SrcSpan instance Span ImportItemSpan where getSpan = annot instance Annotated ImportItem where annot = import_item_annot -- | An entity imported using the \'from ... import\' construct. -- -- * Version 2.6 -- -- * Version 3.1 -- data FromItem annot = FromItem { from_item_name :: Ident annot -- ^ The name of the entity imported. , from_as_name :: Maybe (Ident annot) -- ^ An optional name to refer to the entity (the \'as\' name). , from_item_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type FromItemSpan = FromItem SrcSpan instance Span FromItemSpan where getSpan = annot instance Annotated FromItem where annot = from_item_annot -- | Items imported using the \'from ... import\' construct. data FromItems annot = ImportEverything { from_items_annot :: annot } -- ^ Import everything exported from the module. | FromItems { from_items_items :: [FromItem annot], from_items_annot :: annot } -- ^ Import a specific list of items from the module. deriving (Eq,Ord,Show,Typeable,Data) type FromItemsSpan = FromItems SrcSpan instance Span FromItemsSpan where getSpan = annot instance Annotated FromItems where annot = from_items_annot -- | A reference to the module to import from using the \'from ... import\' construct. data ImportRelative annot = ImportRelative { import_relative_dots :: Int , import_relative_module :: Maybe (DottedName annot) , import_relative_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ImportRelativeSpan = ImportRelative SrcSpan instance Span ImportRelativeSpan where getSpan = annot instance Annotated ImportRelative where annot = import_relative_annot -- | Statements. -- -- * Simple statements: -- -- * Version 2.6 -- -- * Version 3.1 -- -- * Compound statements: -- -- * Version 2.6 -- -- * Version 3.1 -- data Statement annot -- | Import statement. = Import { import_items :: [ImportItem annot] -- ^ Items to import. , stmt_annot :: annot } -- | From ... import statement. | FromImport { from_module :: ImportRelative annot -- ^ Module to import from. , from_items :: FromItems annot -- ^ Items to import. , stmt_annot :: annot } -- | While loop. | While { while_cond :: Expr annot -- ^ Loop condition. , while_body :: Suite annot -- ^ Loop body. , while_else :: Suite annot -- ^ Else clause. , stmt_annot :: annot } -- | For loop. | For { for_targets :: [Expr annot] -- ^ Loop variables. , for_generator :: Expr annot -- ^ Loop generator. , for_body :: Suite annot -- ^ Loop body , for_else :: Suite annot -- ^ Else clause. , stmt_annot :: annot } -- | Function definition. | Fun { fun_name :: Ident annot -- ^ Function name. , fun_args :: [Parameter annot] -- ^ Function parameter list. , fun_result_annotation :: Maybe (Expr annot) -- ^ Optional result annotation. , fun_body :: Suite annot -- ^ Function body. , stmt_annot :: annot } -- | Class definition. | Class { class_name :: Ident annot -- ^ Class name. , class_args :: [Argument annot] -- ^ Class argument list. In version 2.x this is only ArgExprs. , class_body :: Suite annot -- ^ Class body. , stmt_annot :: annot } -- | Conditional statement (if-elif-else). | Conditional { cond_guards :: [(Expr annot, Suite annot)] -- ^ Sequence of if-elif conditional clauses. , cond_else :: Suite annot -- ^ Possibly empty unconditional else clause. , stmt_annot :: annot } -- | Assignment statement. | Assign { assign_to :: [Expr annot] -- ^ Entity to assign to. , assign_expr :: Expr annot -- ^ Expression to evaluate. , stmt_annot :: annot } -- | Augmented assignment statement. | AugmentedAssign { aug_assign_to :: Expr annot -- ^ Entity to assign to. , aug_assign_op :: AssignOp annot -- ^ Assignment operator (for example \'+=\'). , aug_assign_expr :: Expr annot -- ^ Expression to evaluate. , stmt_annot :: annot } -- | Decorated definition of a function or class. | Decorated { decorated_decorators :: [Decorator annot] -- ^ Decorators. , decorated_def :: Statement annot -- ^ Function or class definition to be decorated. , stmt_annot :: annot } -- | Return statement (may only occur syntactically nested in a function definition). | Return { return_expr :: Maybe (Expr annot) -- ^ Optional expression to evaluate and return to caller. , stmt_annot :: annot } -- | Try statement (exception handling). | Try { try_body :: Suite annot -- ^ Try clause. , try_excepts :: [Handler annot] -- ^ Exception handlers. , try_else :: Suite annot -- ^ Possibly empty else clause, executed if and when control flows off the end of the try clause. , try_finally :: Suite annot -- ^ Possibly empty finally clause. , stmt_annot :: annot } -- | Raise statement (exception throwing). | Raise { raise_expr :: RaiseExpr annot , stmt_annot :: annot } -- | With statement (context management). | With { with_context :: [(Expr annot, Maybe (Expr annot))] -- ^ Context expression(s) (yields a context manager). , with_body :: Suite annot -- ^ Suite to be managed. , stmt_annot :: annot } -- | Pass statement (null operation). | Pass { stmt_annot :: annot } -- | Break statement (may only occur syntactically nested in a for or while loop, but not nested in a function or class definition within that loop). | Break { stmt_annot :: annot } -- | Continue statement (may only occur syntactically nested in a for or while loop, but not nested in a function or class definition or finally clause within that loop). | Continue { stmt_annot :: annot } -- | Del statement (delete). | Delete { del_exprs :: [Expr annot] -- ^ Items to delete. , stmt_annot :: annot } -- | Expression statement. | StmtExpr { stmt_expr :: Expr annot, stmt_annot :: annot } -- | Global declaration. | Global { global_vars :: [Ident annot] -- ^ Variables declared global in the current block. , stmt_annot :: annot } -- | Nonlocal declaration. /Version 3.x only/. | NonLocal { nonLocal_vars :: [Ident annot] -- ^ Variables declared nonlocal in the current block (their binding comes from bound the nearest enclosing scope). , stmt_annot :: annot } -- | Assertion. | Assert { assert_exprs :: [Expr annot] -- ^ Expressions being asserted. , stmt_annot :: annot } -- | Print statement. /Version 2 only/. | Print { print_chevron :: Bool -- ^ Optional chevron (>>) , print_exprs :: [Expr annot] -- ^ Arguments to print , print_trailing_comma :: Bool -- ^ Does it end in a comma? , stmt_annot :: annot } -- | Exec statement. /Version 2 only/. | Exec { exec_expr :: Expr annot -- ^ Expression to exec. , exec_globals_locals :: Maybe (Expr annot, Maybe (Expr annot)) -- ^ Global and local environments to evaluate the expression within. , stmt_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type StatementSpan = Statement SrcSpan instance Span StatementSpan where getSpan = annot instance Annotated Statement where annot = stmt_annot -- | The argument for a @raise@ statement. data RaiseExpr annot = RaiseV3 (Maybe (Expr annot, Maybe (Expr annot))) -- ^ Optional expression to evaluate, and optional \'from\' clause. /Version 3 only/. | RaiseV2 (Maybe (Expr annot, (Maybe (Expr annot, Maybe (Expr annot))))) -- ^ /Version 2 only/. deriving (Eq,Ord,Show,Typeable,Data) type RaiseExprSpan = RaiseExpr SrcSpan -- | Decorator. data Decorator annot = Decorator { decorator_name :: DottedName annot -- ^ Decorator name. , decorator_args :: [Argument annot] -- ^ Decorator arguments. , decorator_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type DecoratorSpan = Decorator SrcSpan instance Span DecoratorSpan where getSpan = annot instance Annotated Decorator where annot = decorator_annot -- | Formal parameter of function definitions and lambda expressions. -- -- * Version 2.6: -- -- * -- -- * -- -- * Version 3.1: -- -- * -- -- * -- data Parameter annot -- | Ordinary named parameter. = Param { param_name :: Ident annot -- ^ Parameter name. , param_py_annotation :: Maybe (Expr annot) -- ^ Optional annotation. , param_default :: Maybe (Expr annot) -- ^ Optional default value. , param_annot :: annot } -- | Excess positional parameter (single asterisk before its name in the concrete syntax). | VarArgsPos { param_name :: Ident annot -- ^ Parameter name. , param_py_annotation :: Maybe (Expr annot) -- ^ Optional annotation. , param_annot :: annot } -- | Excess keyword parameter (double asterisk before its name in the concrete syntax). | VarArgsKeyword { param_name :: Ident annot -- ^ Parameter name. , param_py_annotation :: Maybe (Expr annot) -- ^ Optional annotation. , param_annot :: annot } -- | Marker for the end of positional parameters (not a parameter itself). | EndPositional { param_annot :: annot } -- | Tuple unpack. /Version 2 only/. | UnPackTuple { param_unpack_tuple :: ParamTuple annot -- ^ The tuple to unpack. , param_default :: Maybe (Expr annot) -- ^ Optional default value. , param_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ParameterSpan = Parameter SrcSpan instance Span ParameterSpan where getSpan = annot instance Annotated Parameter where annot = param_annot -- | Tuple unpack parameter. /Version 2 only/. data ParamTuple annot = ParamTupleName { param_tuple_name :: Ident annot, param_tuple_annot :: annot } -- ^ A variable name. | ParamTuple { param_tuple :: [ParamTuple annot], param_tuple_annot :: annot } -- ^ A (possibly nested) tuple parameter. deriving (Eq,Ord,Show,Typeable,Data) type ParamTupleSpan = ParamTuple SrcSpan instance Span ParamTupleSpan where getSpan = annot instance Annotated ParamTuple where annot = param_tuple_annot -- | Arguments to function calls, class declarations and decorators. data Argument annot -- | Ordinary argument expression. = ArgExpr { arg_expr :: Expr annot, arg_annot :: annot } -- | Excess positional argument. | ArgVarArgsPos { arg_expr :: Expr annot, arg_annot :: annot } -- | Excess keyword argument. | ArgVarArgsKeyword { arg_expr :: Expr annot, arg_annot :: annot } -- | Keyword argument. | ArgKeyword { arg_keyword :: Ident annot -- ^ Keyword name. , arg_expr :: Expr annot -- ^ Argument expression. , arg_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ArgumentSpan = Argument SrcSpan instance Span ArgumentSpan where getSpan = annot instance Annotated Argument where annot = arg_annot -- | Exception handler. data Handler annot = Handler { handler_clause :: ExceptClause annot , handler_suite :: Suite annot , handler_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type HandlerSpan = Handler SrcSpan instance Span HandlerSpan where getSpan = annot instance Annotated Handler where annot = handler_annot -- | Exception clause. data ExceptClause annot = ExceptClause -- NB: difference with version 3 (has NAME as target, but looks like bug in grammar) { except_clause :: Maybe (Expr annot, Maybe (Expr annot)) , except_clause_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ExceptClauseSpan = ExceptClause SrcSpan instance Span ExceptClauseSpan where getSpan = annot instance Annotated ExceptClause where annot = except_clause_annot -- | Comprehension. In version 3.x this can be used for lists, sets, dictionaries and generators. data Comprehension e annot = Comprehension { comprehension_expr :: e , comprehension_for :: CompFor annot , comprehension_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ComprehensionSpan e = Comprehension e SrcSpan instance Span (ComprehensionSpan e) where getSpan = annot instance Annotated (Comprehension e) where annot = comprehension_annot -- | Comprehension \'for\' component. data CompFor annot = CompFor { comp_for_exprs :: [Expr annot] , comp_in_expr :: Expr annot , comp_for_iter :: Maybe (CompIter annot) , comp_for_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type CompForSpan = CompFor SrcSpan instance Span CompForSpan where getSpan = annot instance Annotated CompFor where annot = comp_for_annot -- | Comprehension guard. data CompIf annot = CompIf { comp_if :: Expr annot , comp_if_iter :: Maybe (CompIter annot) , comp_if_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type CompIfSpan = CompIf SrcSpan instance Span CompIfSpan where getSpan = annot instance Annotated CompIf where annot = comp_if_annot -- | Comprehension iterator (either a \'for\' or an \'if\'). data CompIter annot = IterFor { comp_iter_for :: CompFor annot, comp_iter_annot :: annot } | IterIf { comp_iter_if :: CompIf annot, comp_iter_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type CompIterSpan = CompIter SrcSpan instance Span CompIterSpan where getSpan = annot instance Annotated CompIter where annot = comp_iter_annot -- | Expressions. -- -- * Version 2.6 . -- -- * Version 3.1 . -- data Expr annot -- | Variable. = Var { var_ident :: Ident annot, expr_annot :: annot } -- | Literal integer. | Int { int_value :: Integer, expr_literal :: String, expr_annot :: annot } -- | Long literal integer. /Version 2 only/. | LongInt { int_value :: Integer, expr_literal :: String, expr_annot :: annot } -- | Literal floating point number. | Float { float_value :: Double, expr_literal :: String, expr_annot :: annot } -- | Literal imaginary number. | Imaginary { imaginary_value :: Double, expr_literal :: String, expr_annot :: annot } -- | Literal boolean. | Bool { bool_value :: Bool, expr_annot :: annot } -- | Literal \'None\' value. | None { expr_annot :: annot } -- | Ellipsis \'...\'. | Ellipsis { expr_annot :: annot } -- | Literal byte string. | ByteStrings { byte_string_strings :: [String], expr_annot :: annot } -- | Literal strings (to be concatentated together). | Strings { strings_strings :: [String], expr_annot :: annot } -- | Unicode literal strings (to be concatentated together). Version 2 only. | UnicodeStrings { unicodestrings_strings :: [String], expr_annot :: annot } -- | Function call. | Call { call_fun :: Expr annot -- ^ Expression yielding a callable object (such as a function). , call_args :: [Argument annot] -- ^ Call arguments. , expr_annot :: annot } -- | Subscription, for example \'x [y]\'. | Subscript { subscriptee :: Expr annot, subscript_expr :: Expr annot, expr_annot :: annot } -- | Slicing, for example \'w [x:y:z]\'. | SlicedExpr { slicee :: Expr annot, slices :: [Slice annot], expr_annot :: annot } -- | Conditional expresison. | CondExpr { ce_true_branch :: Expr annot -- ^ Expression to evaluate if condition is True. , ce_condition :: Expr annot -- ^ Boolean condition. , ce_false_branch :: Expr annot -- ^ Expression to evaluate if condition is False. , expr_annot :: annot } -- | Binary operator application. | BinaryOp { operator :: Op annot, left_op_arg :: Expr annot, right_op_arg :: Expr annot, expr_annot :: annot } -- | Unary operator application. | UnaryOp { operator :: Op annot, op_arg :: Expr annot, expr_annot :: annot } -- | Anonymous function definition (lambda). | Lambda { lambda_args :: [Parameter annot], lambda_body :: Expr annot, expr_annot :: annot } -- | Tuple. Can be empty. | Tuple { tuple_exprs :: [Expr annot], expr_annot :: annot } -- | Generator yield. | Yield { yield_expr :: Maybe (Expr annot) -- ^ Optional expression to yield. , expr_annot :: annot } -- | Generator. | Generator { gen_comprehension :: Comprehension (Expr annot) annot, expr_annot :: annot } -- | List comprehension. | ListComp { list_comprehension :: Comprehension (Expr annot) annot, expr_annot :: annot } -- | List. | List { list_exprs :: [Expr annot], expr_annot :: annot } -- | Dictionary. | Dictionary { dict_mappings :: [(Expr annot, Expr annot)], expr_annot :: annot } -- | Dictionary comprehension. /Version 3 only/. | DictComp { dict_comprehension :: Comprehension (Expr annot, Expr annot) annot, expr_annot :: annot } -- | Set. | Set { set_exprs :: [Expr annot], expr_annot :: annot } -- | Set comprehension. /Version 3 only/. | SetComp { set_comprehension :: Comprehension (Expr annot) annot, expr_annot :: annot } -- | Starred expression. /Version 3 only/. | Starred { starred_expr :: Expr annot, expr_annot :: annot } -- | Parenthesised expression. | Paren { paren_expr :: Expr annot, expr_annot :: annot } -- | String conversion (backquoted expression). Version 2 only. | StringConversion { backquoted_expr :: Expr annot, expr_anot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ExprSpan = Expr SrcSpan instance Span ExprSpan where getSpan = annot instance Annotated Expr where annot = expr_annot -- | Slice compenent. data Slice annot = SliceProper { slice_lower :: Maybe (Expr annot) , slice_upper :: Maybe (Expr annot) , slice_stride :: Maybe (Maybe (Expr annot)) , slice_annot :: annot } | SliceExpr { slice_expr :: Expr annot , slice_annot :: annot } | SliceEllipsis { slice_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type SliceSpan = Slice SrcSpan instance Span SliceSpan where getSpan = annot instance Annotated Slice where annot = slice_annot -- | Operators. data Op annot = And { op_annot :: annot } -- ^ \'and\' | Or { op_annot :: annot } -- ^ \'or\' | Not { op_annot :: annot } -- ^ \'not\' | Exponent { op_annot :: annot } -- ^ \'**\' | LessThan { op_annot :: annot } -- ^ \'<\' | GreaterThan { op_annot :: annot } -- ^ \'>\' | Equality { op_annot :: annot } -- ^ \'==\' | GreaterThanEquals { op_annot :: annot } -- ^ \'>=\' | LessThanEquals { op_annot :: annot } -- ^ \'<=\' | NotEquals { op_annot :: annot } -- ^ \'!=\' | NotEqualsV2 { op_annot :: annot } -- ^ \'<>\'. Version 2 only. | In { op_annot :: annot } -- ^ \'in\' | Is { op_annot :: annot } -- ^ \'is\' | IsNot { op_annot :: annot } -- ^ \'is not\' | NotIn { op_annot :: annot } -- ^ \'not in\' | BinaryOr { op_annot :: annot } -- ^ \'|\' | Xor { op_annot :: annot } -- ^ \'^\' | BinaryAnd { op_annot :: annot } -- ^ \'&\' | ShiftLeft { op_annot :: annot } -- ^ \'<<\' | ShiftRight { op_annot :: annot } -- ^ \'>>\' | Multiply { op_annot :: annot } -- ^ \'*\' | Plus { op_annot :: annot } -- ^ \'+\' | Minus { op_annot :: annot } -- ^ \'-\' | Divide { op_annot :: annot } -- ^ \'\/\' | FloorDivide { op_annot :: annot } -- ^ \'\/\/\' | Invert { op_annot :: annot } -- ^ \'~\' (bitwise inversion of its integer argument) | Modulo { op_annot :: annot } -- ^ \'%\' | Dot { op_annot :: annot } -- ^ \'.\' deriving (Eq,Ord,Show,Typeable,Data) type OpSpan = Op SrcSpan instance Span OpSpan where getSpan = annot instance Annotated Op where annot = op_annot -- | Augmented assignment operators. data AssignOp annot = PlusAssign { assignOp_annot :: annot } -- ^ \'+=\' | MinusAssign { assignOp_annot :: annot } -- ^ \'-=\' | MultAssign { assignOp_annot :: annot } -- ^ \'*=\' | DivAssign { assignOp_annot :: annot } -- ^ \'\/=\' | ModAssign { assignOp_annot :: annot } -- ^ \'%=\' | PowAssign { assignOp_annot :: annot } -- ^ \'*=\' | BinAndAssign { assignOp_annot :: annot } -- ^ \'&=\' | BinOrAssign { assignOp_annot :: annot } -- ^ \'|=\' | BinXorAssign { assignOp_annot :: annot } -- ^ \'^=\' | LeftShiftAssign { assignOp_annot :: annot } -- ^ \'<<=\' | RightShiftAssign { assignOp_annot :: annot } -- ^ \'>>=\' | FloorDivAssign { assignOp_annot :: annot } -- ^ \'\/\/=\' deriving (Eq,Ord,Show,Typeable,Data) type AssignOpSpan = AssignOp SrcSpan instance Span AssignOpSpan where getSpan = annot instance Annotated AssignOp where annot = assignOp_annot language-python-0.4.1/ghc_normal/0000755000000000000000000000000012441515301015111 5ustar0000000000000000language-python-0.4.1/ghc_normal/Language/0000755000000000000000000000000012441515301016634 5ustar0000000000000000language-python-0.4.1/ghc_normal/Language/Python/0000755000000000000000000000000012441515301020115 5ustar0000000000000000language-python-0.4.1/ghc_normal/Language/Python/Common/0000755000000000000000000000000012441515301021345 5ustar0000000000000000language-python-0.4.1/ghc_normal/Language/Python/Common/AST.hs0000644000000000000000000006266112441515301022343 0ustar0000000000000000{-# LANGUAGE FlexibleInstances, TypeSynonymInstances, CPP, DeriveDataTypeable #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version2.Syntax.AST -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Representation of the Python abstract syntax tree (AST). The representation is -- a superset of versions 2.x and 3.x of Python. In many cases they are -- identical. The documentation in this module indicates where they are -- different. -- -- All the data types have a (polymorphic) parameter which allows the AST to -- be annotated by an arbitrary type (for example source locations). Specialised -- instances of the types are provided for source spans. For example @Module a@ is -- the type of modules, and @ModuleSpan@ is the type of modules annoted with source -- span information. -- -- Note: there are cases where the AST is more liberal than the formal grammar -- of the language. Therefore some care must be taken when constructing -- Python programs using the raw AST. ----------------------------------------------------------------------------- module Language.Python.Common.AST ( -- * Annotation projection Annotated (..) -- * Modules , Module (..), ModuleSpan -- * Identifiers and dotted names , Ident (..), IdentSpan , DottedName, DottedNameSpan -- * Statements, suites, parameters, decorators and assignment operators , Statement (..), StatementSpan , Suite, SuiteSpan , Parameter (..), ParameterSpan , ParamTuple (..), ParamTupleSpan , Decorator (..), DecoratorSpan , AssignOp (..), AssignOpSpan -- * Expressions, operators, arguments and slices , Expr (..), ExprSpan , Op (..), OpSpan , Argument (..), ArgumentSpan , Slice (..), SliceSpan -- * Imports , ImportItem (..), ImportItemSpan , FromItem (..), FromItemSpan , FromItems (..), FromItemsSpan , ImportRelative (..), ImportRelativeSpan -- * Exceptions , Handler (..), HandlerSpan , ExceptClause (..), ExceptClauseSpan , RaiseExpr (..), RaiseExprSpan -- * Comprehensions , Comprehension (..), ComprehensionSpan , CompFor (..), CompForSpan , CompIf (..), CompIfSpan , CompIter (..), CompIterSpan ) where import Language.Python.Common.SrcLocation ( Span (getSpan), SrcSpan (..) ) import Data.Data -------------------------------------------------------------------------------- -- | Convenient access to annotations in annotated types. class Annotated t where -- | Given an annotated type, project out its annotation value. annot :: t annot -> annot -- | Identifier. data Ident annot = Ident { ident_string :: !String, ident_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type IdentSpan = Ident SrcSpan instance Span IdentSpan where getSpan = annot instance Annotated Ident where annot = ident_annot -- | A module (Python source file). -- -- * Version 2.6 -- -- * Version 3.1 -- newtype Module annot = Module [Statement annot] -- ^ A module is just a sequence of top-level statements. deriving (Eq,Ord,Show,Typeable,Data) type ModuleSpan = Module SrcSpan -- | A block of statements. A suite is a group of statements controlled by a clause, -- for example, the body of a loop. -- -- * Version 2.6 -- -- * Version 3.1 -- type Suite annot = [Statement annot] type SuiteSpan = Suite SrcSpan -- | A compound name constructed with the dot operator. type DottedName annot = [Ident annot] type DottedNameSpan = DottedName SrcSpan -- | An entity imported using the \'import\' keyword. -- -- * Version 2.6 -- -- * Version 3.1 -- data ImportItem annot = ImportItem { import_item_name :: DottedName annot -- ^ The name of module to import. , import_as_name :: Maybe (Ident annot) -- ^ An optional name to refer to the entity (the \'as\' name). , import_item_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ImportItemSpan = ImportItem SrcSpan instance Span ImportItemSpan where getSpan = annot instance Annotated ImportItem where annot = import_item_annot -- | An entity imported using the \'from ... import\' construct. -- -- * Version 2.6 -- -- * Version 3.1 -- data FromItem annot = FromItem { from_item_name :: Ident annot -- ^ The name of the entity imported. , from_as_name :: Maybe (Ident annot) -- ^ An optional name to refer to the entity (the \'as\' name). , from_item_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type FromItemSpan = FromItem SrcSpan instance Span FromItemSpan where getSpan = annot instance Annotated FromItem where annot = from_item_annot -- | Items imported using the \'from ... import\' construct. data FromItems annot = ImportEverything { from_items_annot :: annot } -- ^ Import everything exported from the module. | FromItems { from_items_items :: [FromItem annot], from_items_annot :: annot } -- ^ Import a specific list of items from the module. deriving (Eq,Ord,Show,Typeable,Data) type FromItemsSpan = FromItems SrcSpan instance Span FromItemsSpan where getSpan = annot instance Annotated FromItems where annot = from_items_annot -- | A reference to the module to import from using the \'from ... import\' construct. data ImportRelative annot = ImportRelative { import_relative_dots :: Int , import_relative_module :: Maybe (DottedName annot) , import_relative_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ImportRelativeSpan = ImportRelative SrcSpan instance Span ImportRelativeSpan where getSpan = annot instance Annotated ImportRelative where annot = import_relative_annot -- | Statements. -- -- * Simple statements: -- -- * Version 2.6 -- -- * Version 3.1 -- -- * Compound statements: -- -- * Version 2.6 -- -- * Version 3.1 -- data Statement annot -- | Import statement. = Import { import_items :: [ImportItem annot] -- ^ Items to import. , stmt_annot :: annot } -- | From ... import statement. | FromImport { from_module :: ImportRelative annot -- ^ Module to import from. , from_items :: FromItems annot -- ^ Items to import. , stmt_annot :: annot } -- | While loop. | While { while_cond :: Expr annot -- ^ Loop condition. , while_body :: Suite annot -- ^ Loop body. , while_else :: Suite annot -- ^ Else clause. , stmt_annot :: annot } -- | For loop. | For { for_targets :: [Expr annot] -- ^ Loop variables. , for_generator :: Expr annot -- ^ Loop generator. , for_body :: Suite annot -- ^ Loop body , for_else :: Suite annot -- ^ Else clause. , stmt_annot :: annot } -- | Function definition. | Fun { fun_name :: Ident annot -- ^ Function name. , fun_args :: [Parameter annot] -- ^ Function parameter list. , fun_result_annotation :: Maybe (Expr annot) -- ^ Optional result annotation. , fun_body :: Suite annot -- ^ Function body. , stmt_annot :: annot } -- | Class definition. | Class { class_name :: Ident annot -- ^ Class name. , class_args :: [Argument annot] -- ^ Class argument list. In version 2.x this is only ArgExprs. , class_body :: Suite annot -- ^ Class body. , stmt_annot :: annot } -- | Conditional statement (if-elif-else). | Conditional { cond_guards :: [(Expr annot, Suite annot)] -- ^ Sequence of if-elif conditional clauses. , cond_else :: Suite annot -- ^ Possibly empty unconditional else clause. , stmt_annot :: annot } -- | Assignment statement. | Assign { assign_to :: [Expr annot] -- ^ Entity to assign to. , assign_expr :: Expr annot -- ^ Expression to evaluate. , stmt_annot :: annot } -- | Augmented assignment statement. | AugmentedAssign { aug_assign_to :: Expr annot -- ^ Entity to assign to. , aug_assign_op :: AssignOp annot -- ^ Assignment operator (for example \'+=\'). , aug_assign_expr :: Expr annot -- ^ Expression to evaluate. , stmt_annot :: annot } -- | Decorated definition of a function or class. | Decorated { decorated_decorators :: [Decorator annot] -- ^ Decorators. , decorated_def :: Statement annot -- ^ Function or class definition to be decorated. , stmt_annot :: annot } -- | Return statement (may only occur syntactically nested in a function definition). | Return { return_expr :: Maybe (Expr annot) -- ^ Optional expression to evaluate and return to caller. , stmt_annot :: annot } -- | Try statement (exception handling). | Try { try_body :: Suite annot -- ^ Try clause. , try_excepts :: [Handler annot] -- ^ Exception handlers. , try_else :: Suite annot -- ^ Possibly empty else clause, executed if and when control flows off the end of the try clause. , try_finally :: Suite annot -- ^ Possibly empty finally clause. , stmt_annot :: annot } -- | Raise statement (exception throwing). | Raise { raise_expr :: RaiseExpr annot , stmt_annot :: annot } -- | With statement (context management). | With { with_context :: [(Expr annot, Maybe (Expr annot))] -- ^ Context expression(s) (yields a context manager). , with_body :: Suite annot -- ^ Suite to be managed. , stmt_annot :: annot } -- | Pass statement (null operation). | Pass { stmt_annot :: annot } -- | Break statement (may only occur syntactically nested in a for or while loop, but not nested in a function or class definition within that loop). | Break { stmt_annot :: annot } -- | Continue statement (may only occur syntactically nested in a for or while loop, but not nested in a function or class definition or finally clause within that loop). | Continue { stmt_annot :: annot } -- | Del statement (delete). | Delete { del_exprs :: [Expr annot] -- ^ Items to delete. , stmt_annot :: annot } -- | Expression statement. | StmtExpr { stmt_expr :: Expr annot, stmt_annot :: annot } -- | Global declaration. | Global { global_vars :: [Ident annot] -- ^ Variables declared global in the current block. , stmt_annot :: annot } -- | Nonlocal declaration. /Version 3.x only/. | NonLocal { nonLocal_vars :: [Ident annot] -- ^ Variables declared nonlocal in the current block (their binding comes from bound the nearest enclosing scope). , stmt_annot :: annot } -- | Assertion. | Assert { assert_exprs :: [Expr annot] -- ^ Expressions being asserted. , stmt_annot :: annot } -- | Print statement. /Version 2 only/. | Print { print_chevron :: Bool -- ^ Optional chevron (>>) , print_exprs :: [Expr annot] -- ^ Arguments to print , print_trailing_comma :: Bool -- ^ Does it end in a comma? , stmt_annot :: annot } -- | Exec statement. /Version 2 only/. | Exec { exec_expr :: Expr annot -- ^ Expression to exec. , exec_globals_locals :: Maybe (Expr annot, Maybe (Expr annot)) -- ^ Global and local environments to evaluate the expression within. , stmt_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type StatementSpan = Statement SrcSpan instance Span StatementSpan where getSpan = annot instance Annotated Statement where annot = stmt_annot -- | The argument for a @raise@ statement. data RaiseExpr annot = RaiseV3 (Maybe (Expr annot, Maybe (Expr annot))) -- ^ Optional expression to evaluate, and optional \'from\' clause. /Version 3 only/. | RaiseV2 (Maybe (Expr annot, (Maybe (Expr annot, Maybe (Expr annot))))) -- ^ /Version 2 only/. deriving (Eq,Ord,Show,Typeable,Data) type RaiseExprSpan = RaiseExpr SrcSpan -- | Decorator. data Decorator annot = Decorator { decorator_name :: DottedName annot -- ^ Decorator name. , decorator_args :: [Argument annot] -- ^ Decorator arguments. , decorator_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type DecoratorSpan = Decorator SrcSpan instance Span DecoratorSpan where getSpan = annot instance Annotated Decorator where annot = decorator_annot -- | Formal parameter of function definitions and lambda expressions. -- -- * Version 2.6: -- -- * -- -- * -- -- * Version 3.1: -- -- * -- -- * -- data Parameter annot -- | Ordinary named parameter. = Param { param_name :: Ident annot -- ^ Parameter name. , param_py_annotation :: Maybe (Expr annot) -- ^ Optional annotation. , param_default :: Maybe (Expr annot) -- ^ Optional default value. , param_annot :: annot } -- | Excess positional parameter (single asterisk before its name in the concrete syntax). | VarArgsPos { param_name :: Ident annot -- ^ Parameter name. , param_py_annotation :: Maybe (Expr annot) -- ^ Optional annotation. , param_annot :: annot } -- | Excess keyword parameter (double asterisk before its name in the concrete syntax). | VarArgsKeyword { param_name :: Ident annot -- ^ Parameter name. , param_py_annotation :: Maybe (Expr annot) -- ^ Optional annotation. , param_annot :: annot } -- | Marker for the end of positional parameters (not a parameter itself). | EndPositional { param_annot :: annot } -- | Tuple unpack. /Version 2 only/. | UnPackTuple { param_unpack_tuple :: ParamTuple annot -- ^ The tuple to unpack. , param_default :: Maybe (Expr annot) -- ^ Optional default value. , param_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ParameterSpan = Parameter SrcSpan instance Span ParameterSpan where getSpan = annot instance Annotated Parameter where annot = param_annot -- | Tuple unpack parameter. /Version 2 only/. data ParamTuple annot = ParamTupleName { param_tuple_name :: Ident annot, param_tuple_annot :: annot } -- ^ A variable name. | ParamTuple { param_tuple :: [ParamTuple annot], param_tuple_annot :: annot } -- ^ A (possibly nested) tuple parameter. deriving (Eq,Ord,Show,Typeable,Data) type ParamTupleSpan = ParamTuple SrcSpan instance Span ParamTupleSpan where getSpan = annot instance Annotated ParamTuple where annot = param_tuple_annot -- | Arguments to function calls, class declarations and decorators. data Argument annot -- | Ordinary argument expression. = ArgExpr { arg_expr :: Expr annot, arg_annot :: annot } -- | Excess positional argument. | ArgVarArgsPos { arg_expr :: Expr annot, arg_annot :: annot } -- | Excess keyword argument. | ArgVarArgsKeyword { arg_expr :: Expr annot, arg_annot :: annot } -- | Keyword argument. | ArgKeyword { arg_keyword :: Ident annot -- ^ Keyword name. , arg_expr :: Expr annot -- ^ Argument expression. , arg_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ArgumentSpan = Argument SrcSpan instance Span ArgumentSpan where getSpan = annot instance Annotated Argument where annot = arg_annot -- | Exception handler. data Handler annot = Handler { handler_clause :: ExceptClause annot , handler_suite :: Suite annot , handler_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type HandlerSpan = Handler SrcSpan instance Span HandlerSpan where getSpan = annot instance Annotated Handler where annot = handler_annot -- | Exception clause. data ExceptClause annot = ExceptClause -- NB: difference with version 3 (has NAME as target, but looks like bug in grammar) { except_clause :: Maybe (Expr annot, Maybe (Expr annot)) , except_clause_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ExceptClauseSpan = ExceptClause SrcSpan instance Span ExceptClauseSpan where getSpan = annot instance Annotated ExceptClause where annot = except_clause_annot -- | Comprehension. In version 3.x this can be used for lists, sets, dictionaries and generators. data Comprehension e annot = Comprehension { comprehension_expr :: e , comprehension_for :: CompFor annot , comprehension_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ComprehensionSpan e = Comprehension e SrcSpan instance Span (ComprehensionSpan e) where getSpan = annot instance Annotated (Comprehension e) where annot = comprehension_annot -- | Comprehension \'for\' component. data CompFor annot = CompFor { comp_for_exprs :: [Expr annot] , comp_in_expr :: Expr annot , comp_for_iter :: Maybe (CompIter annot) , comp_for_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type CompForSpan = CompFor SrcSpan instance Span CompForSpan where getSpan = annot instance Annotated CompFor where annot = comp_for_annot -- | Comprehension guard. data CompIf annot = CompIf { comp_if :: Expr annot , comp_if_iter :: Maybe (CompIter annot) , comp_if_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type CompIfSpan = CompIf SrcSpan instance Span CompIfSpan where getSpan = annot instance Annotated CompIf where annot = comp_if_annot -- | Comprehension iterator (either a \'for\' or an \'if\'). data CompIter annot = IterFor { comp_iter_for :: CompFor annot, comp_iter_annot :: annot } | IterIf { comp_iter_if :: CompIf annot, comp_iter_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type CompIterSpan = CompIter SrcSpan instance Span CompIterSpan where getSpan = annot instance Annotated CompIter where annot = comp_iter_annot -- | Expressions. -- -- * Version 2.6 . -- -- * Version 3.1 . -- data Expr annot -- | Variable. = Var { var_ident :: Ident annot, expr_annot :: annot } -- | Literal integer. | Int { int_value :: Integer, expr_literal :: String, expr_annot :: annot } -- | Long literal integer. /Version 2 only/. | LongInt { int_value :: Integer, expr_literal :: String, expr_annot :: annot } -- | Literal floating point number. | Float { float_value :: Double, expr_literal :: String, expr_annot :: annot } -- | Literal imaginary number. | Imaginary { imaginary_value :: Double, expr_literal :: String, expr_annot :: annot } -- | Literal boolean. | Bool { bool_value :: Bool, expr_annot :: annot } -- | Literal \'None\' value. | None { expr_annot :: annot } -- | Ellipsis \'...\'. | Ellipsis { expr_annot :: annot } -- | Literal byte string. | ByteStrings { byte_string_strings :: [String], expr_annot :: annot } -- | Literal strings (to be concatentated together). | Strings { strings_strings :: [String], expr_annot :: annot } -- | Unicode literal strings (to be concatentated together). Version 2 only. | UnicodeStrings { unicodestrings_strings :: [String], expr_annot :: annot } -- | Function call. | Call { call_fun :: Expr annot -- ^ Expression yielding a callable object (such as a function). , call_args :: [Argument annot] -- ^ Call arguments. , expr_annot :: annot } -- | Subscription, for example \'x [y]\'. | Subscript { subscriptee :: Expr annot, subscript_expr :: Expr annot, expr_annot :: annot } -- | Slicing, for example \'w [x:y:z]\'. | SlicedExpr { slicee :: Expr annot, slices :: [Slice annot], expr_annot :: annot } -- | Conditional expresison. | CondExpr { ce_true_branch :: Expr annot -- ^ Expression to evaluate if condition is True. , ce_condition :: Expr annot -- ^ Boolean condition. , ce_false_branch :: Expr annot -- ^ Expression to evaluate if condition is False. , expr_annot :: annot } -- | Binary operator application. | BinaryOp { operator :: Op annot, left_op_arg :: Expr annot, right_op_arg :: Expr annot, expr_annot :: annot } -- | Unary operator application. | UnaryOp { operator :: Op annot, op_arg :: Expr annot, expr_annot :: annot } -- | Anonymous function definition (lambda). | Lambda { lambda_args :: [Parameter annot], lambda_body :: Expr annot, expr_annot :: annot } -- | Tuple. Can be empty. | Tuple { tuple_exprs :: [Expr annot], expr_annot :: annot } -- | Generator yield. | Yield { yield_expr :: Maybe (Expr annot) -- ^ Optional expression to yield. , expr_annot :: annot } -- | Generator. | Generator { gen_comprehension :: Comprehension (Expr annot) annot, expr_annot :: annot } -- | List comprehension. | ListComp { list_comprehension :: Comprehension (Expr annot) annot, expr_annot :: annot } -- | List. | List { list_exprs :: [Expr annot], expr_annot :: annot } -- | Dictionary. | Dictionary { dict_mappings :: [(Expr annot, Expr annot)], expr_annot :: annot } -- | Dictionary comprehension. /Version 3 only/. | DictComp { dict_comprehension :: Comprehension (Expr annot, Expr annot) annot, expr_annot :: annot } -- | Set. | Set { set_exprs :: [Expr annot], expr_annot :: annot } -- | Set comprehension. /Version 3 only/. | SetComp { set_comprehension :: Comprehension (Expr annot) annot, expr_annot :: annot } -- | Starred expression. /Version 3 only/. | Starred { starred_expr :: Expr annot, expr_annot :: annot } -- | Parenthesised expression. | Paren { paren_expr :: Expr annot, expr_annot :: annot } -- | String conversion (backquoted expression). Version 2 only. | StringConversion { backquoted_expr :: Expr annot, expr_anot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type ExprSpan = Expr SrcSpan instance Span ExprSpan where getSpan = annot instance Annotated Expr where annot = expr_annot -- | Slice compenent. data Slice annot = SliceProper { slice_lower :: Maybe (Expr annot) , slice_upper :: Maybe (Expr annot) , slice_stride :: Maybe (Maybe (Expr annot)) , slice_annot :: annot } | SliceExpr { slice_expr :: Expr annot , slice_annot :: annot } | SliceEllipsis { slice_annot :: annot } deriving (Eq,Ord,Show,Typeable,Data) type SliceSpan = Slice SrcSpan instance Span SliceSpan where getSpan = annot instance Annotated Slice where annot = slice_annot -- | Operators. data Op annot = And { op_annot :: annot } -- ^ \'and\' | Or { op_annot :: annot } -- ^ \'or\' | Not { op_annot :: annot } -- ^ \'not\' | Exponent { op_annot :: annot } -- ^ \'**\' | LessThan { op_annot :: annot } -- ^ \'<\' | GreaterThan { op_annot :: annot } -- ^ \'>\' | Equality { op_annot :: annot } -- ^ \'==\' | GreaterThanEquals { op_annot :: annot } -- ^ \'>=\' | LessThanEquals { op_annot :: annot } -- ^ \'<=\' | NotEquals { op_annot :: annot } -- ^ \'!=\' | NotEqualsV2 { op_annot :: annot } -- ^ \'<>\'. Version 2 only. | In { op_annot :: annot } -- ^ \'in\' | Is { op_annot :: annot } -- ^ \'is\' | IsNot { op_annot :: annot } -- ^ \'is not\' | NotIn { op_annot :: annot } -- ^ \'not in\' | BinaryOr { op_annot :: annot } -- ^ \'|\' | Xor { op_annot :: annot } -- ^ \'^\' | BinaryAnd { op_annot :: annot } -- ^ \'&\' | ShiftLeft { op_annot :: annot } -- ^ \'<<\' | ShiftRight { op_annot :: annot } -- ^ \'>>\' | Multiply { op_annot :: annot } -- ^ \'*\' | Plus { op_annot :: annot } -- ^ \'+\' | Minus { op_annot :: annot } -- ^ \'-\' | Divide { op_annot :: annot } -- ^ \'\/\' | FloorDivide { op_annot :: annot } -- ^ \'\/\/\' | Invert { op_annot :: annot } -- ^ \'~\' (bitwise inversion of its integer argument) | Modulo { op_annot :: annot } -- ^ \'%\' | Dot { op_annot :: annot } -- ^ \'.\' deriving (Eq,Ord,Show,Typeable,Data) type OpSpan = Op SrcSpan instance Span OpSpan where getSpan = annot instance Annotated Op where annot = op_annot -- | Augmented assignment operators. data AssignOp annot = PlusAssign { assignOp_annot :: annot } -- ^ \'+=\' | MinusAssign { assignOp_annot :: annot } -- ^ \'-=\' | MultAssign { assignOp_annot :: annot } -- ^ \'*=\' | DivAssign { assignOp_annot :: annot } -- ^ \'\/=\' | ModAssign { assignOp_annot :: annot } -- ^ \'%=\' | PowAssign { assignOp_annot :: annot } -- ^ \'*=\' | BinAndAssign { assignOp_annot :: annot } -- ^ \'&=\' | BinOrAssign { assignOp_annot :: annot } -- ^ \'|=\' | BinXorAssign { assignOp_annot :: annot } -- ^ \'^=\' | LeftShiftAssign { assignOp_annot :: annot } -- ^ \'<<=\' | RightShiftAssign { assignOp_annot :: annot } -- ^ \'>>=\' | FloorDivAssign { assignOp_annot :: annot } -- ^ \'\/\/=\' deriving (Eq,Ord,Show,Typeable,Data) type AssignOpSpan = AssignOp SrcSpan instance Span AssignOpSpan where getSpan = annot instance Annotated AssignOp where annot = assignOp_annot language-python-0.4.1/src/0000755000000000000000000000000012441515301013567 5ustar0000000000000000language-python-0.4.1/src/Language/0000755000000000000000000000000012441515301015312 5ustar0000000000000000language-python-0.4.1/src/Language/Python/0000755000000000000000000000000012441515301016573 5ustar0000000000000000language-python-0.4.1/src/Language/Python/Common.hs0000644000000000000000000000302312441515301020355 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Convenient re-export of common code, which -- works with both version 2.x and 3.x of Python. ----------------------------------------------------------------------------- module Language.Python.Common ( -- * Pretty printing infrastructure module Language.Python.Common.Pretty, -- * Lexical tokens module Language.Python.Common.Token, -- * Abstract Syntax Tree module Language.Python.Common.AST, -- * Source locations module Language.Python.Common.SrcLocation, -- * Pretty printing the Abstract Syntax Tree to concrete Python syntax module Language.Python.Common.PrettyAST, -- this export is for Haddock. -- * Pretty printing tokens module Language.Python.Common.PrettyToken, -- this export is for Haddock -- * Parse errors module Language.Python.Common.ParseError, -- * Pretty printing parse errors module Language.Python.Common.PrettyParseError -- this export is for Haddock ) where import Language.Python.Common.Pretty import Language.Python.Common.Token import Language.Python.Common.AST import Language.Python.Common.PrettyAST import Language.Python.Common.PrettyToken import Language.Python.Common.SrcLocation import Language.Python.Common.PrettyParseError import Language.Python.Common.ParseError language-python-0.4.1/src/Language/Python/Version2.hs0000644000000000000000000000205012441515301020633 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version2 -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- A convenient re-export of the parser and lexer for version 2.x of Python. -- -- See: -- -- * for an overview of the language. -- -- * for the full grammar. -- -- * for a description of -- the various Python top-levels, which correspond to the parsers provided here. ----------------------------------------------------------------------------- module Language.Python.Version2 ( -- * The parser module Language.Python.Version2.Parser, -- * The lexer module Language.Python.Version2.Lexer ) where import Language.Python.Version2.Parser import Language.Python.Version2.Lexer language-python-0.4.1/src/Language/Python/Version3.hs0000644000000000000000000000205012441515301020634 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version3 -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- A convenient re-export of the parser and lexer for version 3.x of Python. -- -- See: -- -- * for an overview of the language. -- -- * for the full grammar. -- -- * for a description of -- the various Python top-levels, which correspond to the parsers provided here. ----------------------------------------------------------------------------- module Language.Python.Version3 ( -- * The parser module Language.Python.Version3.Parser, -- * The lexer module Language.Python.Version3.Lexer ) where import Language.Python.Version3.Parser import Language.Python.Version3.Lexer language-python-0.4.1/src/Language/Python/Common/0000755000000000000000000000000012441515301020023 5ustar0000000000000000language-python-0.4.1/src/Language/Python/Common/LexerUtils.hs0000644000000000000000000001472212441515301022465 0ustar0000000000000000{-# OPTIONS #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.LexerUtils -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Various utilities to support the Python lexer. ----------------------------------------------------------------------------- module Language.Python.Common.LexerUtils where import Control.Monad (liftM) import Control.Monad.Error.Class (throwError) import Data.List (foldl') import Data.Map as Map hiding (null, map, foldl') import Data.Word (Word8) import Data.Char (ord) import Numeric (readHex, readOct) import Language.Python.Common.Token as Token import Language.Python.Common.ParserMonad hiding (location) import Language.Python.Common.SrcLocation -- Beginning of. BOF = beginning of file, BOL = beginning of line data BO = BOF | BOL -- Functions for building tokens type StartCode = Int type Action = SrcSpan -> Int -> String -> P Token lineJoin :: Action lineJoin span _len _str = return $ LineJoinToken $ spanStartPoint span endOfLine :: P Token -> Action endOfLine lexToken span _len _str = do setLastEOL $ spanStartPoint span lexToken bolEndOfLine :: P Token -> Int -> Action bolEndOfLine lexToken bol span len inp = do pushStartCode bol endOfLine lexToken span len inp dedentation :: P Token -> Action dedentation lexToken span _len _str = do topIndent <- getIndent -- case compare (endCol span) topIndent of case compare (startCol span) topIndent of EQ -> do popStartCode lexToken LT -> do popIndent return dedentToken GT -> spanError span "indentation error" indentation :: P Token -> Int -> BO -> Action -- Check if we are at the EOF. If yes, we may need to generate a newline, -- in case we came here from BOL (but not BOF). indentation lexToken _dedentCode bo _loc _len [] = do popStartCode case bo of BOF -> lexToken BOL -> newlineToken indentation lexToken dedentCode bo span _len _str = do popStartCode parenDepth <- getParenStackDepth if parenDepth > 0 then lexToken else do topIndent <- getIndent -- case compare (endCol span) topIndent of case compare (startCol span) topIndent of EQ -> case bo of BOF -> lexToken BOL -> newlineToken LT -> do pushStartCode dedentCode newlineToken -- GT -> do pushIndent (endCol span) GT -> do pushIndent (startCol span) return indentToken where indentToken = IndentToken span symbolToken :: (SrcSpan -> Token) -> Action symbolToken mkToken location _ _ = return (mkToken location) token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action token mkToken read location len str = return $ mkToken location literal (read literal) where literal = take len str -- special tokens for the end of file and end of line endOfFileToken :: Token endOfFileToken = EOFToken SpanEmpty dedentToken = DedentToken SpanEmpty newlineToken :: P Token newlineToken = do loc <- getLastEOL return $ NewlineToken loc -- Test if we are at the end of the line or file atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool atEOLorEOF _user _inputBeforeToken _tokenLength (_loc, inputAfterToken) = null inputAfterToken || nextChar == '\n' || nextChar == '\r' where nextChar = head inputAfterToken notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool notEOF _user _inputBeforeToken _tokenLength (_loc, inputAfterToken) = not (null inputAfterToken) readBinary :: String -> Integer readBinary = toBinary . drop 2 where toBinary = foldl' acc 0 acc b '0' = 2 * b acc b '1' = 2 * b + 1 readFloat :: String -> Double readFloat str@('.':cs) = read ('0':readFloatRest str) readFloat str = read (readFloatRest str) readFloatRest :: String -> String readFloatRest [] = [] readFloatRest ['.'] = ".0" readFloatRest (c:cs) = c : readFloatRest cs mkString :: (SrcSpan -> String -> Token) -> Action mkString toToken loc len str = do return $ toToken loc (take len str) stringToken :: SrcSpan -> String -> Token stringToken = StringToken rawStringToken :: SrcSpan -> String -> Token rawStringToken = StringToken byteStringToken :: SrcSpan -> String -> Token byteStringToken = ByteStringToken unicodeStringToken :: SrcSpan -> String -> Token unicodeStringToken = UnicodeStringToken rawByteStringToken :: SrcSpan -> String -> Token rawByteStringToken = ByteStringToken openParen :: (SrcSpan -> Token) -> Action openParen mkToken loc _len _str = do let token = mkToken loc pushParen token return token closeParen :: (SrcSpan -> Token) -> Action closeParen mkToken loc _len _str = do let token = mkToken loc topParen <- getParen case topParen of Nothing -> spanError loc err1 Just open -> if matchParen open token then popParen >> return token else spanError loc err2 where -- XXX fix these error messages err1 = "Lexical error ! unmatched closing paren" err2 = "Lexical error ! unmatched closing paren" matchParen :: Token -> Token -> Bool matchParen (LeftRoundBracketToken {}) (RightRoundBracketToken {}) = True matchParen (LeftBraceToken {}) (RightBraceToken {}) = True matchParen (LeftSquareBracketToken {}) (RightSquareBracketToken {}) = True matchParen _ _ = False -- ----------------------------------------------------------------------------- -- Functionality required by Alex type AlexInput = (SrcLocation, String) alexInputPrevChar :: AlexInput -> Char alexInputPrevChar _ = error "alexInputPrevChar not used" alexGetChar :: AlexInput -> Maybe (Char, AlexInput) alexGetChar (loc, input) | null input = Nothing | otherwise = Just (nextChar, (nextLoc, rest)) where nextChar = head input rest = tail input nextLoc = moveChar nextChar loc mapFst :: (a -> b) -> (a, c) -> (b, c) mapFst f (a, c) = (f a, c) alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) alexGetByte = fmap (mapFst (fromIntegral . ord)) . alexGetChar moveChar :: Char -> SrcLocation -> SrcLocation moveChar '\n' = incLine 1 moveChar '\t' = incTab moveChar '\r' = id moveChar _ = incColumn 1 lexicalError :: P a lexicalError = do location <- getLocation c <- liftM head getInput throwError $ UnexpectedChar c location readOctNoO :: String -> Integer readOctNoO (zero:rest) = read (zero:'O':rest) language-python-0.4.1/src/Language/Python/Common/ParseError.hs0000644000000000000000000000217012441515301022443 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.ParseError -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Error values for the lexer and parser. ----------------------------------------------------------------------------- module Language.Python.Common.ParseError ( ParseError (..) ) where import Language.Python.Common.Pretty import Language.Python.Common.SrcLocation (SrcLocation) import Language.Python.Common.Token (Token) import Control.Monad.Error.Class data ParseError = UnexpectedToken Token -- ^ An error from the parser. Token found where it should not be. Note: tokens contain their own source span. | UnexpectedChar Char SrcLocation -- ^ An error from the lexer. Character found where it should not be. | StrError String -- ^ A generic error containing a string message. No source location. deriving (Eq, Ord, Show) instance Error ParseError where noMsg = StrError "" strMsg = StrError language-python-0.4.1/src/Language/Python/Common/ParserMonad.hs0000644000000000000000000001352212441515301022575 0ustar0000000000000000{-# OPTIONS #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.ParserMonad -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Monad support for Python parser and lexer. ----------------------------------------------------------------------------- module Language.Python.Common.ParserMonad ( P , execParser , execParserKeepComments , runParser , thenP , returnP , setLocation , getLocation , getInput , setInput , getLastToken , setLastToken , setLastEOL , getLastEOL , ParseError (..) , ParseState (..) , initialState , pushStartCode , popStartCode , getStartCode , getIndent , pushIndent , popIndent , getIndentStackDepth , getParen , pushParen , popParen , getParenStackDepth , addComment , getComments , spanError ) where import Language.Python.Common.SrcLocation (SrcLocation (..), SrcSpan (..), Span (..)) import Language.Python.Common.Token (Token (..)) import Language.Python.Common.ParseError (ParseError (..)) import Control.Applicative ((<$>)) import Control.Monad.State.Class import Control.Monad.State.Strict as State import Control.Monad.Error as Error import Control.Monad.Error.Class import Control.Monad.Identity as Identity import Control.Monad.Trans as Trans import Language.Python.Common.Pretty internalError :: String -> P a internalError = throwError . StrError spanError :: Span a => a -> String -> P b spanError x str = throwError $ StrError $ unwords [prettyText $ getSpan x, str] data ParseState = ParseState { location :: !SrcLocation -- position at current input location , input :: !String -- the current input , previousToken :: !Token -- the previous token , startCodeStack :: [Int] -- a stack of start codes for the state of the lexer , indentStack :: [Int] -- a stack of source column positions of indentation levels , parenStack :: [Token] -- a stack of parens and brackets for indentation handling , lastEOL :: !SrcSpan -- location of the most recent end-of-line encountered , comments :: [Token] -- accumulated comments } deriving Show initToken :: Token initToken = NewlineToken SpanEmpty initialState :: SrcLocation -> String -> [Int] -> ParseState initialState initLoc inp scStack = ParseState { location = initLoc , input = inp , previousToken = initToken , startCodeStack = scStack , indentStack = [1] , parenStack = [] , lastEOL = SpanEmpty , comments = [] } type P a = StateT ParseState (Either ParseError) a execParser :: P a -> ParseState -> Either ParseError a execParser = evalStateT execParserKeepComments :: P a -> ParseState -> Either ParseError (a, [Token]) execParserKeepComments parser state = evalStateT (parser >>= \x -> getComments >>= \c -> return (x, c)) state runParser :: P a -> ParseState -> Either ParseError (a, ParseState) runParser = runStateT {-# INLINE returnP #-} returnP :: a -> P a returnP = return {-# INLINE thenP #-} thenP :: P a -> (a -> P b) -> P b thenP = (>>=) {- failP :: SrcSpan -> [String] -> P a failP span strs = throwError (prettyText span ++ ": " ++ unwords strs) -} setLastEOL :: SrcSpan -> P () setLastEOL span = modify $ \s -> s { lastEOL = span } getLastEOL :: P SrcSpan getLastEOL = gets lastEOL setLocation :: SrcLocation -> P () setLocation loc = modify $ \s -> s { location = loc } getLocation :: P SrcLocation getLocation = gets location getInput :: P String getInput = gets input setInput :: String -> P () setInput inp = modify $ \s -> s { input = inp } getLastToken :: P Token getLastToken = gets previousToken setLastToken :: Token -> P () setLastToken tok = modify $ \s -> s { previousToken = tok } pushStartCode :: Int -> P () pushStartCode code = do oldStack <- gets startCodeStack modify $ \s -> s { startCodeStack = code : oldStack } popStartCode :: P () popStartCode = do oldStack <- gets startCodeStack case oldStack of [] -> internalError "fatal error in lexer: attempt to pop empty start code stack" _:rest -> modify $ \s -> s { startCodeStack = rest } getStartCode :: P Int getStartCode = do oldStack <- gets startCodeStack case oldStack of [] -> internalError "fatal error in lexer: start code stack empty on getStartCode" code:_ -> return code pushIndent :: Int -> P () pushIndent indent = do oldStack <- gets indentStack modify $ \s -> s { indentStack = indent : oldStack } popIndent :: P () popIndent = do oldStack <- gets indentStack case oldStack of [] -> internalError "fatal error in lexer: attempt to pop empty indentation stack" _:rest -> modify $ \s -> s { indentStack = rest } getIndent :: P Int getIndent = do oldStack <- gets indentStack case oldStack of [] -> internalError "fatal error in lexer: indent stack empty on getIndent" indent:_ -> return indent getIndentStackDepth :: P Int getIndentStackDepth = gets (length . indentStack) pushParen :: Token -> P () pushParen symbol = do oldStack <- gets parenStack modify $ \s -> s { parenStack = symbol : oldStack } popParen :: P () popParen = do oldStack <- gets parenStack case oldStack of [] -> internalError "fatal error in lexer: attempt to pop empty paren stack" _:rest -> modify $ \s -> s { parenStack = rest } getParen :: P (Maybe Token) getParen = do oldStack <- gets parenStack case oldStack of [] -> return Nothing symbol:_ -> return $ Just symbol getParenStackDepth :: P Int getParenStackDepth = gets (length . parenStack) addComment :: Token -> P () addComment c = do oldComments <- gets comments modify $ \s -> s { comments = c : oldComments } getComments :: P [Token] getComments = reverse <$> gets comments language-python-0.4.1/src/Language/Python/Common/ParserUtils.hs0000644000000000000000000002761412441515301022646 0ustar0000000000000000{-# OPTIONS #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.ParserUtils -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Various utilities to support the Python parser. ----------------------------------------------------------------------------- module Language.Python.Common.ParserUtils where import Data.List (foldl') import Data.Maybe (isJust) import Control.Monad.Error.Class (throwError) import Language.Python.Common.AST as AST import Language.Python.Common.Token as Token import Language.Python.Common.ParserMonad hiding (location) import Language.Python.Common.SrcLocation makeConditionalExpr :: ExprSpan -> Maybe (ExprSpan, ExprSpan) -> ExprSpan makeConditionalExpr e Nothing = e makeConditionalExpr e opt@(Just (cond, false_branch)) = CondExpr e cond false_branch (spanning e opt) makeBinOp :: ExprSpan -> [(OpSpan, ExprSpan)] -> ExprSpan makeBinOp e es = foldl' mkOp e es where mkOp e1 (op, e2) = BinaryOp op e1 e2 (spanning e1 e2) parseError :: Token -> P a parseError = throwError . UnexpectedToken data Trailer = TrailerCall { trailer_call_args :: [ArgumentSpan], trailer_span :: SrcSpan } | TrailerSubscript { trailer_subs :: [Subscript], trailer_span :: SrcSpan } | TrailerDot { trailer_dot_ident :: IdentSpan, dot_span :: SrcSpan, trailer_span :: SrcSpan } instance Span Trailer where getSpan = trailer_span data Subscript = SubscriptExpr { subscription :: ExprSpan, subscript_span :: SrcSpan } | SubscriptSlice { subscript_slice_span1 :: Maybe ExprSpan , subscript_slice_span2 :: Maybe ExprSpan , subscript_slice_span3 :: Maybe (Maybe ExprSpan) , subscript_span :: SrcSpan } | SubscriptSliceEllipsis { subscript_span :: SrcSpan } instance Span Subscript where getSpan = subscript_span isProperSlice :: Subscript -> Bool isProperSlice (SubscriptSlice {}) = True isProperSlice other = False subscriptToSlice :: Subscript -> SliceSpan subscriptToSlice (SubscriptSlice lower upper stride span) = SliceProper lower upper stride span subscriptToSlice (SubscriptExpr e span) = SliceExpr e span subscriptToSlice (SubscriptSliceEllipsis span) = SliceEllipsis span subscriptToExpr :: Subscript -> ExprSpan subscriptToExpr (SubscriptExpr { subscription = s }) = s subscriptToExpr other = error "subscriptToExpr applied to non subscript" subscriptsToExpr :: [Subscript] -> ExprSpan subscriptsToExpr subs | length subs > 1 = Tuple (map subscriptToExpr subs) (getSpan subs) | length subs == 1 = subscriptToExpr $ head subs | otherwise = error "subscriptsToExpr: empty subscript list" {- = TrailerCall { trailer_call_args :: [ArgumentSpan], trailer_span :: SrcSpan } | TrailerSubscript { trailer_subs :: [Subscript], trailer_span :: SrcSpan } | TrailerDot { trailer_dot_ident :: IdentSpan, dot_span :: SrcSpan, trailer_span :: SrcSpan } -} addTrailer :: ExprSpan -> [Trailer] -> ExprSpan addTrailer = foldl' trail where trail :: ExprSpan -> Trailer -> ExprSpan -- XXX fix the span trail e trail@(TrailerCall { trailer_call_args = args }) = Call e args (spanning e trail) trail e trail@(TrailerSubscript { trailer_subs = subs }) | any isProperSlice subs = SlicedExpr e (map subscriptToSlice subs) (spanning e trail) | otherwise = Subscript e (subscriptsToExpr subs) (spanning e trail) trail e trail@(TrailerDot { trailer_dot_ident = ident, dot_span = ds }) = BinaryOp (AST.Dot ds) e (Var ident (getSpan ident)) (spanning e trail) makeTupleOrExpr :: [ExprSpan] -> Maybe Token -> ExprSpan makeTupleOrExpr [e] Nothing = e makeTupleOrExpr es@(_:_) (Just t) = Tuple es (spanning es t) makeTupleOrExpr es@(_:_) Nothing = Tuple es (getSpan es) makeAssignmentOrExpr :: ExprSpan -> Either [ExprSpan] (AssignOpSpan, ExprSpan) -> StatementSpan makeAssignmentOrExpr e (Left es) = makeNormalAssignment e es where makeNormalAssignment :: ExprSpan -> [ExprSpan] -> StatementSpan makeNormalAssignment e [] = StmtExpr e (getSpan e) makeNormalAssignment e es = AST.Assign (e : front) (head back) (spanning e es) where (front, back) = splitAt (len - 1) es len = length es makeAssignmentOrExpr e1 (Right (op, e2)) = makeAugAssignment e1 op e2 where makeAugAssignment :: ExprSpan -> AssignOpSpan -> ExprSpan -> StatementSpan makeAugAssignment e1 op e2 = AST.AugmentedAssign e1 op e2 (spanning e1 e2) makeTry :: Token -> SuiteSpan -> ([HandlerSpan], [StatementSpan], [StatementSpan]) -> StatementSpan makeTry t1 body (handlers, elses, finally) = AST.Try body handlers elses finally (spanning (spanning (spanning (spanning t1 body) handlers) elses) finally) makeParam :: (IdentSpan, Maybe ExprSpan) -> Maybe ExprSpan -> ParameterSpan makeParam (name, annot) defaultVal = Param name annot defaultVal paramSpan where paramSpan = spanning (spanning name annot) defaultVal makeStarParam :: Token -> Maybe (IdentSpan, Maybe ExprSpan) -> ParameterSpan makeStarParam t1 Nothing = EndPositional (getSpan t1) makeStarParam t1 (Just (name, annot)) = VarArgsPos name annot (spanning t1 annot) makeStarStarParam :: Token -> (IdentSpan, Maybe ExprSpan) -> ParameterSpan makeStarStarParam t1 (name, annot) = VarArgsKeyword name annot (spanning (spanning t1 name) annot) -- version 2 only makeTupleParam :: ParamTupleSpan -> Maybe ExprSpan -> ParameterSpan -- just a name makeTupleParam p@(ParamTupleName {}) optDefault = Param (param_tuple_name p) Nothing optDefault (spanning p optDefault) -- a parenthesised tuple. NOTE: we do not distinguish between (foo) and (foo,) makeTupleParam p@(ParamTuple { param_tuple_annot = span }) optDefault = UnPackTuple p optDefault span makeComprehension :: ExprSpan -> CompForSpan -> ComprehensionSpan ExprSpan makeComprehension e for = Comprehension e for (spanning e for) makeListForm :: SrcSpan -> Either ExprSpan (ComprehensionSpan ExprSpan) -> ExprSpan makeListForm span (Left tuple@(Tuple {})) = List (tuple_exprs tuple) span makeListForm span (Left other) = List [other] span makeListForm span (Right comprehension) = ListComp comprehension span makeSet :: ExprSpan -> Either CompForSpan [ExprSpan] -> SrcSpan -> ExprSpan makeSet e (Left compFor) = SetComp (Comprehension e compFor (spanning e compFor)) makeSet e (Right es) = Set (e:es) makeDictionary :: (ExprSpan, ExprSpan) -> Either CompForSpan [(ExprSpan,ExprSpan)] -> SrcSpan -> ExprSpan makeDictionary e (Left compFor) = DictComp (Comprehension e compFor (spanning e compFor)) makeDictionary e (Right es) = Dictionary (e:es) fromEither :: Either a a -> a fromEither (Left x) = x fromEither (Right x) = x makeDecorator :: Token -> DottedNameSpan -> [ArgumentSpan] -> DecoratorSpan makeDecorator t1 name [] = Decorator name [] (spanning t1 name) makeDecorator t1 name args = Decorator name args (spanning t1 args) -- parser guarantees that the first list is non-empty makeDecorated :: [DecoratorSpan] -> StatementSpan -> StatementSpan makeDecorated ds@(d:_) def = Decorated ds def (spanning d def) -- suite can't be empty so it is safe to take span over it makeFun :: Token -> IdentSpan -> [ParameterSpan] -> Maybe ExprSpan -> SuiteSpan -> StatementSpan makeFun t1 name params annot body = Fun name params annot body $ spanning t1 body makeReturn :: Token -> Maybe ExprSpan -> StatementSpan makeReturn t1 Nothing = AST.Return Nothing (getSpan t1) makeReturn t1 expr@(Just e) = AST.Return expr (spanning t1 e) makeParenOrGenerator :: Either ExprSpan (ComprehensionSpan ExprSpan) -> SrcSpan -> ExprSpan makeParenOrGenerator (Left e) span = Paren e span makeParenOrGenerator (Right comp) span = Generator comp span makePrint :: Bool -> Maybe ([ExprSpan], Maybe Token) -> SrcSpan -> StatementSpan makePrint chevron Nothing span = AST.Print chevron [] False span makePrint chevron (Just (args, last_comma)) span = AST.Print chevron args (isJust last_comma) span {- makeRelative :: Int -> ImportRelativeSpan -> SrcSpan -> ImportRelativeSpan makeRelative dots importRelative span = importRelative { import_relative_dots = dots + oldDots, import_relative_annot = span } where oldDots = import_relative_dots importRelative -} makeRelative :: [Either Token DottedNameSpan] -> ImportRelativeSpan makeRelative items = ImportRelative ndots maybeName (getSpan items) where (ndots, maybeName) = countDots 0 items -- parser ensures that the dotted name will be at the end -- of the list if it is there at all countDots :: Int -> [Either Token DottedNameSpan] -> (Int, Maybe DottedNameSpan) countDots count [] = (count, Nothing) countDots count (Right name:_) = (count, Just name) countDots count (Left token:rest) = countDots (count + dots token) rest dots (DotToken {}) = 1 dots (EllipsisToken {}) = 3 {- See: http://www.python.org/doc/3.0/reference/expressions.html#calls arglist: (argument ',')* (argument [','] |'*' test (',' argument)* [',' '**' test] |'**' test) (state 1) Positional arguments come first. (state 2) Then keyword arguments. (state 3) Then the single star form. (state 4) Then more keyword arguments (but no positional arguments). (state 5) Then the double star form. XXX fixme: we need to include SrcLocations for the errors. -} checkArguments :: [ArgumentSpan] -> P [ArgumentSpan] checkArguments args = do check 1 args return args where check :: Int -> [ArgumentSpan] -> P () check state [] = return () check 5 (arg:_) = spanError arg "an **argument must not be followed by any other arguments" check state (arg:rest) = do case arg of ArgExpr {} | state == 1 -> check state rest | state == 2 -> spanError arg "a positional argument must not follow a keyword argument" | otherwise -> spanError arg "a positional argument must not follow a *argument" ArgKeyword {} | state `elem` [1,2] -> check 2 rest | state `elem` [3,4] -> check 4 rest ArgVarArgsPos {} | state `elem` [1,2] -> check 3 rest | state `elem` [3,4] -> spanError arg "there must not be two *arguments in an argument list" ArgVarArgsKeyword {} -> check 5 rest {- See: http://docs.python.org/3.1/reference/compound_stmts.html#grammar-token-parameter_list parameter_list ::= (defparameter ",")* ( "*" [parameter] ("," defparameter)* [, "**" parameter] | "**" parameter | defparameter [","] ) (state 1) Parameters/unpack tuples first. (state 2) Then the single star (on its own or with parameter) (state 3) Then more parameters. (state 4) Then the double star form. XXX fixme, add support for version 2 unpack tuple. -} checkParameters :: [ParameterSpan] -> P [ParameterSpan] checkParameters params = do check 1 params return params where check :: Int -> [ParameterSpan] -> P () check state [] = return () check 4 (param:_) = spanError param "a **parameter must not be followed by any other parameters" check state (param:rest) = do case param of -- Param and UnPackTuple are treated the same. UnPackTuple {} | state `elem` [1,3] -> check state rest | state == 2 -> check 3 rest Param {} | state `elem` [1,3] -> check state rest | state == 2 -> check 3 rest EndPositional {} | state == 1 -> check 2 rest | otherwise -> spanError param "there must not be two *parameters in a parameter list" VarArgsPos {} | state == 1 -> check 2 rest | otherwise -> spanError param "there must not be two *parameters in a parameter list" VarArgsKeyword {} -> check 4 rest {- spanError :: Span a => a -> String -> P () spanError x str = throwError $ StrError $ unwords [prettyText $ getSpan x, str] -} language-python-0.4.1/src/Language/Python/Common/Pretty.hs0000644000000000000000000000361512441515301021653 0ustar0000000000000000{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.Pretty -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Convenience class for pretty printing combinators. ----------------------------------------------------------------------------- module Language.Python.Common.Pretty (module TextPP, module Language.Python.Common.Pretty) where import Text.PrettyPrint as TextPP -------------------------------------------------------------------------------- -- | All types which can be transformed into a 'Doc'. class Pretty a where pretty :: a -> Doc -- | Transform values into strings. prettyText :: Pretty a => a -> String prettyText = render . pretty -- | Print just the prefix of something prettyPrefix :: Pretty a => Int -> a -> Doc prettyPrefix maxLen x | length fullText <= maxLen = pretty fullText | otherwise = pretty (take maxLen fullText) <+> text "..." where fullText = prettyText x instance Pretty String where pretty s = text s -- | Conditionally wrap parentheses around an item. parensIf :: Pretty a => (a -> Bool) -> a -> Doc parensIf test x = if test x then parens $ pretty x else pretty x perhaps :: Pretty a => Maybe a -> Doc -> Doc perhaps Nothing doc = empty perhaps (Just {}) doc = doc -- | A list of things separated by commas. commaList :: Pretty a => [a] -> Doc commaList = hsep . punctuate comma . map pretty instance Pretty Int where pretty = int instance Pretty Integer where pretty = integer instance Pretty Double where pretty = double instance Pretty Bool where pretty True = text "True" pretty False = text "False" instance Pretty a => Pretty (Maybe a) where pretty Nothing = empty pretty (Just x) = pretty x language-python-0.4.1/src/Language/Python/Common/PrettyAST.hs0000644000000000000000000003245112441515301022223 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version2.Syntax.PrettyAST -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Pretty printing of the Python abstract syntax (version 2.x and 3.x). ----------------------------------------------------------------------------- module Language.Python.Common.PrettyAST () where import Language.Python.Common.Pretty import Language.Python.Common.AST -------------------------------------------------------------------------------- dot :: Doc dot = char '.' indent :: Doc -> Doc indent doc = nest 4 doc -- XXX is there a better way to do this? blankLine :: Doc blankLine = text [] prettyString :: String -> Doc -- XXX should handle the escaping properly -- prettyString str = text (show str) prettyString str = text str instance Pretty (Module a) where pretty (Module stmts) = vcat $ map pretty stmts instance Pretty (Ident a) where pretty name@(Ident {}) = text $ ident_string name prettyDottedName :: DottedName a -> Doc prettyDottedName [] = empty prettyDottedName [name] = pretty name prettyDottedName (name:rest@(_:_)) = pretty name <> dot <> prettyDottedName rest instance Pretty (ImportItem a) where pretty (ImportItem {import_item_name = name, import_as_name = asName}) = prettyDottedName name <+> (maybe empty (\n -> text "as" <+> pretty n) asName) instance Pretty (FromItem a) where pretty (FromItem { from_item_name = name, from_as_name = asName }) = pretty name <+> (maybe empty (\n -> text "as" <+> pretty n) asName) instance Pretty (FromItems a) where pretty ImportEverything {} = char '*' pretty (FromItems { from_items_items = [item] }) = pretty item pretty (FromItems { from_items_items = items }) = parens (commaList items) instance Pretty (ImportRelative a) where pretty (ImportRelative { import_relative_dots = dots, import_relative_module = mod }) = case mod of Nothing -> dotDoc Just name -> dotDoc <> prettyDottedName name where dotDoc = text (replicate dots '.') prettySuite :: [Statement a] -> Doc prettySuite stmts = vcat $ map pretty stmts optionalKeywordSuite :: String -> [Statement a] -> Doc optionalKeywordSuite _ [] = empty optionalKeywordSuite keyword stmts = text keyword <> colon $+$ indent (prettySuite stmts) prettyParenList :: Pretty a => [a] -> Doc prettyParenList = parens . commaList prettyOptionalList :: Pretty a => [a] -> Doc prettyOptionalList [] = empty prettyOptionalList list = parens $ commaList list prettyGuards :: [(Expr a, Suite a)] -> Doc prettyGuards [] = empty prettyGuards ((cond,body):guards) = text "elif" <+> pretty cond <> colon $+$ indent (prettySuite body) $+$ prettyGuards guards instance Pretty (Statement a) where -- pretty :: Statement -> Doc pretty (Import { import_items = items}) = text "import" <+> commaList items pretty stmt@(FromImport {}) = text "from" <+> pretty (from_module stmt) <+> text "import" <+> pretty (from_items stmt) pretty stmt@(While {}) = text "while" <+> pretty (while_cond stmt) <> colon $+$ indent (prettySuite (while_body stmt)) $+$ optionalKeywordSuite "else" (while_else stmt) pretty stmt@(For {}) = text "for" <+> commaList (for_targets stmt) <+> text "in" <+> pretty (for_generator stmt) <> colon $+$ indent (prettySuite (for_body stmt)) $+$ optionalKeywordSuite "else" (for_else stmt) pretty stmt@(Fun {}) = text "def" <+> pretty (fun_name stmt) <> parens (commaList (fun_args stmt)) <+> perhaps (fun_result_annotation stmt) (text "->") <+> pretty (fun_result_annotation stmt) <> colon $+$ indent (prettySuite (fun_body stmt)) pretty stmt@(Class {}) = text "class" <+> pretty (class_name stmt) <> prettyOptionalList (class_args stmt) <> colon $+$ indent (prettySuite (class_body stmt)) pretty stmt@(Conditional { cond_guards = guards, cond_else = optionalElse }) = case guards of (cond,body):xs -> text "if" <+> pretty cond <> colon $+$ indent (prettySuite body) $+$ prettyGuards xs $+$ optionalKeywordSuite "else" optionalElse -- XXX is the assign_to always a singleton? pretty (Assign { assign_to = pattern, assign_expr = e }) = commaList pattern <+> equals <+> pretty e pretty (AugmentedAssign { aug_assign_to = to_expr, aug_assign_op = op, aug_assign_expr = e}) = pretty to_expr <+> pretty op <+> pretty e pretty (Decorated { decorated_decorators = decs, decorated_def = stmt}) = vcat (map pretty decs) $+$ pretty stmt pretty (Return { return_expr = e }) = text "return" <+> pretty e pretty (Try { try_body = body, try_excepts = handlers, try_else = optionalElse, try_finally = finally}) = text "try" <> colon $+$ indent (prettySuite body) $+$ prettyHandlers handlers $+$ optionalKeywordSuite "else" optionalElse $+$ optionalKeywordSuite "finally" finally pretty (Raise { raise_expr = e }) = text "raise" <+> pretty e pretty (With { with_context = context, with_body = body }) = text "with" <+> hcat (punctuate comma (map prettyWithContext context)) <+> colon $+$ indent (prettySuite body) pretty Pass {} = text "pass" pretty Break {} = text "break" pretty Continue {} = text "continue" pretty (Delete { del_exprs = es }) = text "del" <+> commaList es pretty (StmtExpr { stmt_expr = e }) = pretty e pretty (Global { global_vars = idents }) = text "global" <+> commaList idents pretty (NonLocal { nonLocal_vars = idents }) = text "nonlocal" <+> commaList idents pretty (Assert { assert_exprs = es }) = text "assert" <+> commaList es pretty (Print { print_chevron = have_chevron, print_exprs = es, print_trailing_comma = trail_comma }) = text "print" <> (if have_chevron then text " >>" else empty) <+> hcat (punctuate comma (map pretty es)) <> if trail_comma then comma else empty pretty (Exec { exec_expr = e, exec_globals_locals = gls }) = text "exec" <+> pretty e <+> maybe empty (\ (globals, next) -> text "in" <+> pretty globals <+> maybe empty (\locals -> comma <+> pretty locals) next) gls prettyWithContext :: (Expr a, Maybe (Expr a)) -> Doc prettyWithContext (e, Nothing) = pretty e prettyWithContext (e, Just as) = pretty e <+> text "as" <+> pretty as prettyHandlers :: [Handler a] -> Doc prettyHandlers = foldr (\next rec -> pretty next $+$ rec) empty instance Pretty (Handler a) where pretty (Handler { handler_clause = exceptClause, handler_suite = suite }) = pretty exceptClause <> colon $+$ indent (prettySuite suite) instance Pretty (ExceptClause a) where pretty (ExceptClause { except_clause = Nothing }) = text "except" pretty (ExceptClause { except_clause = Just (e, target)}) = text "except" <+> pretty e <+> maybe empty (\t -> text "as" <+> pretty t) target instance Pretty (RaiseExpr a) where pretty (RaiseV3 e) = maybe empty (\ (x, fromE) -> pretty x <+> (maybe empty (\f -> text "from" <+> pretty f) fromE)) e pretty (RaiseV2 exp) = maybe empty (\ (e1, next1) -> pretty e1 <> maybe empty (\ (e2, next2) -> comma <+> pretty e2 <> maybe empty (\ e3 -> comma <+> pretty e3) next2) next1) exp instance Pretty (Decorator a) where pretty (Decorator { decorator_name = name, decorator_args = args }) = char '@' <> prettyDottedName name <+> prettyOptionalList args instance Pretty (Parameter a) where pretty (Param { param_name = ident, param_py_annotation = annot, param_default = def }) = pretty ident <> (maybe empty (\e -> colon <> pretty e <> space) annot) <> maybe empty (\e -> equals <> pretty e) def pretty (VarArgsPos { param_name = ident, param_py_annotation = annot}) = char '*' <> pretty ident <> (maybe empty (\e -> colon <> pretty e) annot) pretty (VarArgsKeyword { param_name = ident, param_py_annotation = annot }) = text "**" <> pretty ident <> (maybe empty (\e -> colon <> pretty e) annot) pretty EndPositional {} = char '*' pretty (UnPackTuple { param_unpack_tuple = tuple, param_default = def }) = pretty tuple <> maybe empty (\e -> equals <> pretty e) def instance Pretty (ParamTuple a) where pretty (ParamTupleName { param_tuple_name = name }) = pretty name pretty (ParamTuple { param_tuple = tuple }) = prettyParenList tuple instance Pretty (Argument a) where pretty (ArgExpr { arg_expr = e }) = pretty e pretty (ArgVarArgsPos { arg_expr = e}) = char '*' <> pretty e pretty (ArgVarArgsKeyword { arg_expr = e }) = text "**" <> pretty e pretty (ArgKeyword { arg_keyword = ident, arg_expr = e }) = pretty ident <> equals <> pretty e instance Pretty t => Pretty (Comprehension t a) where pretty (Comprehension { comprehension_expr = e, comprehension_for = for }) = pretty e <+> pretty for instance Pretty (CompFor a) where pretty (CompFor { comp_for_exprs = es, comp_in_expr = e, comp_for_iter = iter }) = text "for" <+> commaList es <+> text "in" <+> pretty e <+> pretty iter instance Pretty (CompIf a) where pretty (CompIf { comp_if = e, comp_if_iter = iter }) = text "if" <+> pretty e <+> pretty iter instance Pretty (CompIter a) where pretty (IterFor { comp_iter_for = compFor }) = pretty compFor pretty (IterIf { comp_iter_if = compIf }) = pretty compIf instance Pretty (Expr a) where pretty (Var { var_ident = i }) = pretty i pretty (Int { expr_literal = str }) = text str pretty (LongInt { expr_literal = str }) = text str pretty (Float { expr_literal = str }) = text str pretty (Imaginary { expr_literal = str }) = text str pretty (Bool { bool_value = b}) = pretty b pretty None {} = text "None" pretty Ellipsis {} = text "..." pretty (ByteStrings { byte_string_strings = bs }) = hcat (map pretty bs) pretty (Strings { strings_strings = ss }) = hcat (map prettyString ss) pretty (UnicodeStrings { unicodestrings_strings = ss }) = hcat (map prettyString ss) pretty (Call { call_fun = f, call_args = args }) = pretty f <> prettyParenList args pretty (Subscript { subscriptee = e, subscript_expr = sub }) = pretty e <> brackets (pretty sub) pretty (SlicedExpr { slicee = e, slices = ss }) = pretty e <> brackets (commaList ss) pretty (CondExpr { ce_true_branch = trueBranch, ce_condition = cond, ce_false_branch = falseBranch }) = pretty trueBranch <+> text "if" <+> pretty cond <+> text "else" <+> pretty falseBranch pretty (BinaryOp { operator = op, left_op_arg = left, right_op_arg = right }) = pretty left <> (if isDot op then dot else space <> pretty op <> space) <> pretty right where isDot (Dot {}) = True isDot _other = False pretty (UnaryOp { operator = op, op_arg = e }) = pretty op <+> pretty e pretty (Lambda { lambda_args = args, lambda_body = body }) = text "lambda" <+> commaList args <> colon <+> pretty body pretty (Tuple { tuple_exprs = es }) = case es of [] -> text "()" [e] -> pretty e <> comma _ -> commaList es pretty (Yield { yield_expr = e }) = text "yield" <+> pretty e pretty (List { list_exprs = es }) = brackets (commaList es) pretty (Dictionary { dict_mappings = mappings }) = braces (hsep (punctuate comma $ map (\ (e1,e2) -> pretty e1 <> colon <> pretty e2) mappings)) pretty (Set { set_exprs = es }) = braces $ commaList es pretty (ListComp { list_comprehension = lc }) = brackets $ pretty lc pretty (Generator { gen_comprehension = gc }) = parens $ pretty gc pretty (Paren { paren_expr = e }) = parens $ pretty e instance Pretty (Slice a) where pretty (SliceProper { slice_lower = lower, slice_upper = upper, slice_stride = stride }) = pretty lower <> colon <> pretty upper <> (maybe empty (\s -> colon <> pretty s) stride) pretty (SliceExpr { slice_expr = e }) = pretty e instance Pretty (Op a) where pretty (And {}) = text "and" pretty (Or {}) = text "or" pretty (Not {}) = text "not" pretty (Exponent {}) = text "**" pretty (LessThan {}) = text "<" pretty (GreaterThan {}) = text ">" pretty (Equality {}) = text "==" pretty (GreaterThanEquals {}) = text ">=" pretty (LessThanEquals {}) = text "<=" pretty (NotEquals {}) = text "!=" pretty (NotEqualsV2 {}) = text "<>" pretty (In {}) = text "in" pretty (Is {}) = text "is" pretty (IsNot {}) = text "is not" pretty (NotIn {}) = text "not in" pretty (BinaryOr {}) = text "|" pretty (Xor {}) = text "^" pretty (BinaryAnd {}) = text "&" pretty (ShiftLeft {}) = text "<<" pretty (ShiftRight {}) = text ">>" pretty (Multiply {}) = text "*" pretty (Plus {}) = text "+" pretty (Minus {}) = text "-" pretty (Divide {}) = text "/" pretty (FloorDivide {}) = text "//" pretty (Invert {}) = text "~" pretty (Modulo {}) = text "%" pretty (Dot {}) = dot instance Pretty (AssignOp a) where pretty (PlusAssign {}) = text "+=" pretty (MinusAssign {}) = text "-=" pretty (MultAssign {}) = text "*=" pretty (DivAssign {}) = text "/=" pretty (ModAssign {}) = text "%=" pretty (PowAssign {}) = text "**=" pretty (BinAndAssign {}) = text "&=" pretty (BinOrAssign {}) = text "|=" pretty (BinXorAssign {}) = text "^=" pretty (LeftShiftAssign {}) = text "<<=" pretty (RightShiftAssign {}) = text ">>=" pretty (FloorDivAssign {}) = text "//=" language-python-0.4.1/src/Language/Python/Common/PrettyParseError.hs0000644000000000000000000000161512441515301023656 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.PrettyParseError -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Pretty printing of parse errors. ----------------------------------------------------------------------------- module Language.Python.Common.PrettyParseError where import Language.Python.Common.Pretty import Language.Python.Common.ParseError (ParseError (..)) import Language.Python.Common.SrcLocation import Language.Python.Common.PrettyToken instance Pretty ParseError where pretty (UnexpectedToken t) = pretty (getSpan t) <+> text "unexpected token:" <+> pretty t pretty (UnexpectedChar c loc) = pretty loc <+> text "unexpected characer:" <+> char c pretty (StrError str) = text str language-python-0.4.1/src/Language/Python/Common/PrettyToken.hs0000644000000000000000000001166612441515301022661 0ustar0000000000000000----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.PrettyToken -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Pretty printing of tokens. Note the output is intended for displaying in -- messages to the user, and may not be valid Python syntax. For instance -- the pretty printing is useful for displaying parser error messages, but -- not useful for producing concrete Python source. ----------------------------------------------------------------------------- module Language.Python.Common.PrettyToken where import Language.Python.Common.Token import Language.Python.Common.Pretty instance Pretty Token where pretty tok = case tok of IndentToken {} -> text "indentation" DedentToken {} -> text "dedentation" NewlineToken {} -> text "end of line" CommentToken { token_literal = str } -> text "comment:" <+> prettyPrefix 10 str IdentifierToken { token_literal = str } -> text "identifier:" <+> text str StringToken { token_literal = str } -> text "string:" <+> prettyPrefix 10 str ByteStringToken { token_literal = str } -> text "byte string:" <+> prettyPrefix 10 str IntegerToken { token_literal = str } -> text "integer:" <+> text str LongIntegerToken { token_literal = str } -> text "long integer:" <+> text str FloatToken { token_literal = str } -> text "floating point number:" <+> text str ImaginaryToken { token_literal = str } -> text "imaginary number:" <+> text str DefToken {} -> text "def" WhileToken {} -> text "while" IfToken {} -> text "if" TrueToken {} -> text "True" FalseToken {} -> text "False" ReturnToken {} -> text "return" TryToken {} -> text "try" ExceptToken {} -> text "except" RaiseToken {} -> text "raise" InToken {} -> text "in" IsToken {} -> text "is" LambdaToken {} -> text "lambda" ClassToken {} -> text "class" FinallyToken {} -> text "finally" NoneToken {} -> text "None" ForToken {} -> text "for" FromToken {} -> text "from" GlobalToken {} -> text "global" WithToken {} -> text "with" AsToken {} -> text "as" ElifToken {} -> text "elif" YieldToken {} -> text "yield" AssertToken {} -> text "assert" ImportToken {} -> text "import" PassToken {} -> text "pass" BreakToken {} -> text "break" ContinueToken {} -> text "continue" DeleteToken {} -> text "delete" ElseToken {} -> text "else" NotToken {} -> text "not" AndToken {} -> text "and" OrToken {} -> text "or" NonLocalToken {} -> text "nonlocal" PrintToken {} -> text "print" ExecToken {} -> text "exec" AtToken {} -> text "at" LeftRoundBracketToken {} -> text "(" RightRoundBracketToken {} -> text ")" LeftSquareBracketToken {} -> text "[" RightSquareBracketToken {} -> text "]" LeftBraceToken {} -> text "{" RightBraceToken {} -> text "}" DotToken {} -> text "." CommaToken {} -> text "," SemiColonToken {} -> text ";" ColonToken {} -> text ":" EllipsisToken {} -> text "..." RightArrowToken {} -> text "->" AssignToken {} -> text "=" PlusAssignToken {} -> text "+=" MinusAssignToken {} -> text "-=" MultAssignToken {} -> text "*=" DivAssignToken {} -> text "/=" ModAssignToken {} -> text "%=" PowAssignToken {} -> text "**=" BinAndAssignToken {} -> text "&=" BinOrAssignToken {} -> text "|=" BinXorAssignToken {} -> text "^=" LeftShiftAssignToken {} -> text "<<=" RightShiftAssignToken {} -> text ">>=" FloorDivAssignToken {} -> text "//=" BackQuoteToken {} -> text "` (back quote)" PlusToken {} -> text "+" MinusToken {} -> text "-" MultToken {} -> text "*" DivToken {} -> text "/" GreaterThanToken {} -> text ">" LessThanToken {} -> text "<" EqualityToken {} -> text "==" GreaterThanEqualsToken {} -> text ">=" LessThanEqualsToken {} -> text "<=" ExponentToken {} -> text "**" BinaryOrToken {} -> text "|" XorToken {} -> text "^" BinaryAndToken {} -> text "&" ShiftLeftToken {} -> text "<<" ShiftRightToken {} -> text ">>" ModuloToken {} -> text "%" FloorDivToken {} -> text "//" TildeToken {} -> text "~" NotEqualsToken {} -> text "!=" NotEqualsV2Token {} -> text "<>" EOFToken {} -> text "end of input" LineJoinToken {} -> text "line join" language-python-0.4.1/src/Language/Python/Common/SrcLocation.hs0000644000000000000000000001724212441515301022605 0ustar0000000000000000{-# LANGUAGE CPP, DeriveDataTypeable #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.SrcLocation -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Source location information for the Python lexer and parser. This module -- provides single-point locations and spans, and conversions between them. ----------------------------------------------------------------------------- module Language.Python.Common.SrcLocation ( -- * Construction SrcLocation (..), SrcSpan (..), Span (..), spanning, mkSrcSpan, combineSrcSpans, initialSrcLocation, spanStartPoint, -- * Modification incColumn, decColumn, incLine, incTab, endCol, -- * Projection of components of a span endRow, startCol, startRow ) where import Language.Python.Common.Pretty import Data.Data -- | A location for a syntactic entity from the source code. -- The location is specified by its filename, and starting row -- and column. data SrcLocation = Sloc { sloc_filename :: !String , sloc_row :: {-# UNPACK #-} !Int , sloc_column :: {-# UNPACK #-} !Int } | NoLocation deriving (Eq,Ord,Show,Typeable,Data) instance Pretty SrcLocation where pretty = pretty . getSpan -- | Types which have a span. class Span a where getSpan :: a -> SrcSpan getSpan x = SpanEmpty -- | Create a new span which encloses two spanned things. spanning :: (Span a, Span b) => a -> b -> SrcSpan spanning x y = combineSrcSpans (getSpan x) (getSpan y) instance Span a => Span [a] where getSpan [] = SpanEmpty getSpan [x] = getSpan x getSpan list@(x:xs) = combineSrcSpans (getSpan x) (getSpan (last list)) instance Span a => Span (Maybe a) where getSpan Nothing = SpanEmpty getSpan (Just x) = getSpan x instance (Span a, Span b) => Span (Either a b) where getSpan (Left x) = getSpan x getSpan (Right x) = getSpan x instance (Span a, Span b) => Span (a, b) where getSpan (x,y) = spanning x y instance Span SrcSpan where getSpan = id -- | Construct the initial source location for a file. initialSrcLocation :: String -> SrcLocation initialSrcLocation filename = Sloc { sloc_filename = filename , sloc_row = 1 , sloc_column = 1 } -- | Decrement the column of a location, only if they are on the same row. decColumn :: Int -> SrcLocation -> SrcLocation decColumn n loc | n < col = loc { sloc_column = col - n } | otherwise = loc where col = sloc_column loc -- | Increment the column of a location. incColumn :: Int -> SrcLocation -> SrcLocation incColumn n loc@(Sloc { sloc_column = col }) = loc { sloc_column = col + n } -- | Increment the column of a location by one tab stop. incTab :: SrcLocation -> SrcLocation incTab loc@(Sloc { sloc_column = col }) = loc { sloc_column = newCol } where newCol = col + 8 - (col - 1) `mod` 8 -- | Increment the line number (row) of a location by one. incLine :: Int -> SrcLocation -> SrcLocation incLine n loc@(Sloc { sloc_row = row }) = loc { sloc_column = 1, sloc_row = row + n } {- Inspired heavily by compiler/basicTypes/SrcLoc.lhs A SrcSpan delimits a portion of a text file. -} -- | Source location spanning a contiguous section of a file. data SrcSpan -- | A span which starts and ends on the same line. = SpanCoLinear { span_filename :: !String , span_row :: {-# UNPACK #-} !Int , span_start_column :: {-# UNPACK #-} !Int , span_end_column :: {-# UNPACK #-} !Int } -- | A span which starts and ends on different lines. | SpanMultiLine { span_filename :: !String , span_start_row :: {-# UNPACK #-} !Int , span_start_column :: {-# UNPACK #-} !Int , span_end_row :: {-# UNPACK #-} !Int , span_end_column :: {-# UNPACK #-} !Int } -- | A span which is actually just one point in the file. | SpanPoint { span_filename :: !String , span_row :: {-# UNPACK #-} !Int , span_column :: {-# UNPACK #-} !Int } -- | No span information. | SpanEmpty deriving (Eq,Ord,Show,Typeable,Data) instance Pretty SrcSpan where pretty span@(SpanCoLinear {}) = prettyMultiSpan span pretty span@(SpanMultiLine {}) = prettyMultiSpan span pretty span@(SpanPoint {}) = text (span_filename span) <> colon <+> parens (pretty (span_row span) <> comma <> pretty (span_column span)) pretty SpanEmpty = empty prettyMultiSpan :: SrcSpan -> Doc prettyMultiSpan span = text (span_filename span) <> colon <+> parens (pretty (startRow span) <> comma <> pretty (startCol span)) <> char '-' <> parens (pretty (endRow span) <> comma <> pretty (endCol span)) instance Span SrcLocation where getSpan loc@(Sloc {}) = SpanPoint { span_filename = sloc_filename loc , span_row = sloc_row loc , span_column = sloc_column loc } getSpan NoLocation = SpanEmpty -- | Make a point span from the start of a span spanStartPoint :: SrcSpan -> SrcSpan spanStartPoint SpanEmpty = SpanEmpty spanStartPoint span = SpanPoint { span_filename = span_filename span , span_row = startRow span , span_column = startCol span } -- | Make a span from two locations. Assumption: either the -- arguments are the same, or the left one preceeds the right one. mkSrcSpan :: SrcLocation -> SrcLocation -> SrcSpan mkSrcSpan NoLocation _ = SpanEmpty mkSrcSpan _ NoLocation = SpanEmpty mkSrcSpan loc1 loc2 | line1 == line2 = if col2 <= col1 then SpanPoint file line1 col1 else SpanCoLinear file line1 col1 col2 | otherwise = SpanMultiLine file line1 col1 line2 col2 where line1 = sloc_row loc1 line2 = sloc_row loc2 col1 = sloc_column loc1 col2 = sloc_column loc2 file = sloc_filename loc1 -- | Combines two 'SrcSpan' into one that spans at least all the characters -- within both spans. Assumes the "file" part is the same in both inputs combineSrcSpans :: SrcSpan -> SrcSpan -> SrcSpan combineSrcSpans SpanEmpty r = r -- this seems more useful combineSrcSpans l SpanEmpty = l combineSrcSpans start end = case row1 `compare` row2 of EQ -> case col1 `compare` col2 of EQ -> SpanPoint file row1 col1 LT -> SpanCoLinear file row1 col1 col2 GT -> SpanCoLinear file row1 col2 col1 LT -> SpanMultiLine file row1 col1 row2 col2 GT -> SpanMultiLine file row2 col2 row1 col1 where row1 = startRow start col1 = startCol start row2 = endRow end col2 = endCol end file = span_filename start -- | Get the row of the start of a span. startRow :: SrcSpan -> Int startRow (SpanCoLinear { span_row = row }) = row startRow (SpanMultiLine { span_start_row = row }) = row startRow (SpanPoint { span_row = row }) = row startRow SpanEmpty = error "startRow called on empty span" -- | Get the row of the end of a span. endRow :: SrcSpan -> Int endRow (SpanCoLinear { span_row = row }) = row endRow (SpanMultiLine { span_end_row = row }) = row endRow (SpanPoint { span_row = row }) = row endRow SpanEmpty = error "endRow called on empty span" -- | Get the column of the start of a span. startCol :: SrcSpan -> Int startCol (SpanCoLinear { span_start_column = col }) = col startCol (SpanMultiLine { span_start_column = col }) = col startCol (SpanPoint { span_column = col }) = col startCol SpanEmpty = error "startCol called on empty span" -- | Get the column of the end of a span. endCol :: SrcSpan -> Int endCol (SpanCoLinear { span_end_column = col }) = col endCol (SpanMultiLine { span_end_column = col }) = col endCol (SpanPoint { span_column = col }) = col endCol SpanEmpty = error "endCol called on empty span" language-python-0.4.1/src/Language/Python/Common/StringEscape.hs0000644000000000000000000001061412441515301022750 0ustar0000000000000000{-# OPTIONS #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.StringEscape -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Conversion to/from escaped characters in strings. Note: currently does not -- support escaped Unicode character names. -- -- See: -- -- * Version 2.6 -- -- * Version 3.1 ----------------------------------------------------------------------------- module Language.Python.Common.StringEscape ( -- * String conversion. unescapeString, unescapeRawString, -- * Digits allowed in octal and hex representation. octalDigits, hexDigits) where import Numeric (readHex, readOct) -- | Convert escaped sequences of characters into /real/ characters in a normal Python string. -- XXX does not handle escaped unicode literals unescapeString :: String -> String unescapeString ('\\':'\\':cs) = '\\' : unescapeString cs -- Backslash (\) unescapeString ('\\':'\'':cs) = '\'' : unescapeString cs -- Single quote (') unescapeString ('\\':'"':cs) = '"' : unescapeString cs -- Double quote (") unescapeString ('\\':'a':cs) = '\a' : unescapeString cs -- ASCII Bell (BEL) unescapeString ('\\':'b':cs) = '\b' : unescapeString cs -- ASCII Backspace (BS) unescapeString ('\\':'f':cs) = '\f' : unescapeString cs -- ASCII Formfeed (FF) unescapeString ('\\':'n':cs) = '\n' : unescapeString cs -- ASCII Linefeed (LF) unescapeString ('\\':'r':cs) = '\r' : unescapeString cs -- ASCII Carriage Return (CR) unescapeString ('\\':'t':cs) = '\t' : unescapeString cs -- ASCII Horizontal Tab (TAB) unescapeString ('\\':'v':cs) = '\v' : unescapeString cs -- ASCII Vertical Tab (VT) unescapeString ('\\':'\n':cs) = unescapeString cs -- line continuation unescapeString ('\\':rest@(o:_)) | o `elem` octalDigits = unescapeNumeric 3 octalDigits (fst . head . readOct) rest unescapeString ('\\':'x':rest@(h:_)) | h `elem` hexDigits = unescapeNumeric 2 hexDigits (fst . head . readHex) rest unescapeString (c:cs) = c : unescapeString cs unescapeString [] = [] {- -- | This function is a placeholder for unescaping characters in raw strings. -- The Python documentation explicitly says that -- "When an 'r' or 'R' prefix is present, a character following a backslash is included -- in the string without change, and all backslashes are left in the string." -- However it also says that When an 'r' or 'R' prefix is used in conjunction with -- a 'u' or 'U' prefix, then the \uXXXX and \UXXXXXXXX escape sequences are processed -- while all other backslashes are left in the string. Currently the function is the identity -- but it ought to process unicode escape sequences. -} -- XXX does not handle escaped unicode literals unescapeRawString :: String -> String unescapeRawString = id {- -- | Convert escaped sequences of characters into /real/ characters in a raw Python string. -- Note: despite their name, Python raw strings do allow a small set of character escapings, -- namely the single and double quote characters and the line continuation marker. unescapeRawString ('\\':'\'':cs) = '\'' : unescapeRawString cs -- Single quote (') unescapeRawString ('\\':'"':cs) = '"' : unescapeRawString cs -- Double quote (") unescapeRawString ('\\':'\n':cs) = unescapeRawString cs -- line continuation unescapeRawString (c:cs) = c : unescapeRawString cs unescapeRawString [] = [] -} {- This is a bit complicated because Python allows between 1 and 3 octal characters after the \, and 1 and 2 hex characters after a \x. -} unescapeNumeric :: Int -> String -> (String -> Int) -> String -> String unescapeNumeric n numericDigits readNumeric str = loop n [] str where loop _ acc [] = [numericToChar acc] loop 0 acc rest = numericToChar acc : unescapeString rest loop n acc (c:cs) | c `elem` numericDigits = loop (n-1) (c:acc) cs | otherwise = numericToChar acc : unescapeString (c:cs) numericToChar :: String -> Char numericToChar = toEnum . readNumeric . reverse octalDigits, hexDigits :: String -- | The set of valid octal digits in Python. octalDigits = "01234567" -- | The set of valid hex digits in Python. hexDigits = "0123456789abcdef" language-python-0.4.1/src/Language/Python/Common/Token.hs0000644000000000000000000004445612441515301021454 0ustar0000000000000000{-# LANGUAGE CPP, DeriveDataTypeable #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Common.Token -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Lexical tokens for the Python lexer. Contains the superset of tokens from -- version 2 and version 3 of Python (they are mostly the same). ----------------------------------------------------------------------------- module Language.Python.Common.Token ( -- * The tokens Token (..), -- * String conversion debugTokenString, tokenString, -- * Classification hasLiteral, TokenClass (..), classifyToken ) where import Language.Python.Common.Pretty import Language.Python.Common.SrcLocation (SrcSpan (..), SrcLocation (..), Span(getSpan)) import Data.Data -- | Lexical tokens. data Token -- Whitespace = IndentToken { token_span :: !SrcSpan } -- ^ Indentation: increase. | DedentToken { token_span :: !SrcSpan } -- ^ Indentation: decrease. | NewlineToken { token_span :: !SrcSpan } -- ^ Newline. | LineJoinToken { token_span :: !SrcSpan } -- ^ Line join (backslash at end of line). -- Comment | CommentToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Single line comment. -- Identifiers | IdentifierToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Identifier. -- Literals | StringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: string. | ByteStringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: byte string. | UnicodeStringToken { token_span :: !SrcSpan, token_literal :: !String } -- ^ Literal: unicode string, version 2 only. | IntegerToken { token_span :: !SrcSpan, token_literal :: !String, token_integer :: !Integer } -- ^ Literal: integer. | LongIntegerToken { token_span :: !SrcSpan, token_literal :: !String, token_integer :: !Integer } -- ^ Literal: long integer. /Version 2 only/. | FloatToken { token_span :: !SrcSpan, token_literal :: !String, token_double :: !Double } -- ^ Literal: floating point. | ImaginaryToken { token_span :: !SrcSpan, token_literal :: !String, token_double :: !Double } -- ^ Literal: imaginary number. -- Keywords | DefToken { token_span :: !SrcSpan } -- ^ Keyword: \'def\'. | WhileToken { token_span :: !SrcSpan } -- ^ Keyword: \'while\'. | IfToken { token_span :: !SrcSpan } -- ^ Keyword: \'if\'. | TrueToken { token_span :: !SrcSpan } -- ^ Keyword: \'True\'. | FalseToken { token_span :: !SrcSpan } -- ^ Keyword: \'False\'. | ReturnToken { token_span :: !SrcSpan } -- ^ Keyword: \'Return\'. | TryToken { token_span :: !SrcSpan } -- ^ Keyword: \'try\'. | ExceptToken { token_span :: !SrcSpan } -- ^ Keyword: \'except\'. | RaiseToken { token_span :: !SrcSpan } -- ^ Keyword: \'raise\'. | InToken { token_span :: !SrcSpan } -- ^ Keyword: \'in\'. | IsToken { token_span :: !SrcSpan } -- ^ Keyword: \'is\'. | LambdaToken { token_span :: !SrcSpan } -- ^ Keyword: \'lambda\'. | ClassToken { token_span :: !SrcSpan } -- ^ Keyword: \'class\'. | FinallyToken { token_span :: !SrcSpan } -- ^ Keyword: \'finally\'. | NoneToken { token_span :: !SrcSpan } -- ^ Keyword: \'None\'. | ForToken { token_span :: !SrcSpan } -- ^ Keyword: \'for\'. | FromToken { token_span :: !SrcSpan } -- ^ Keyword: \'from\'. | GlobalToken { token_span :: !SrcSpan } -- ^ Keyword: \'global\'. | WithToken { token_span :: !SrcSpan } -- ^ Keyword: \'with\'. | AsToken { token_span :: !SrcSpan } -- ^ Keyword: \'as\'. | ElifToken { token_span :: !SrcSpan } -- ^ Keyword: \'elif\'. | YieldToken { token_span :: !SrcSpan } -- ^ Keyword: \'yield\'. | AssertToken { token_span :: !SrcSpan } -- ^ Keyword: \'assert\'. | ImportToken { token_span :: !SrcSpan } -- ^ Keyword: \'import\'. | PassToken { token_span :: !SrcSpan } -- ^ Keyword: \'pass\'. | BreakToken { token_span :: !SrcSpan } -- ^ Keyword: \'break\'. | ContinueToken { token_span :: !SrcSpan } -- ^ Keyword: \'continue\'. | DeleteToken { token_span :: !SrcSpan } -- ^ Keyword: \'del\'. | ElseToken { token_span :: !SrcSpan } -- ^ Keyword: \'else\'. | NotToken { token_span :: !SrcSpan } -- ^ Keyword: \'not\'. | AndToken { token_span :: !SrcSpan } -- ^ Keyword: boolean conjunction \'and\'. | OrToken { token_span :: !SrcSpan } -- ^ Keyword: boolean disjunction \'or\'. -- Version 3.x only: | NonLocalToken { token_span :: !SrcSpan } -- ^ Keyword: \'nonlocal\' (Python 3.x only) -- Version 2.x only: | PrintToken { token_span :: !SrcSpan } -- ^ Keyword: \'print\'. (Python 2.x only) | ExecToken { token_span :: !SrcSpan } -- ^ Keyword: \'exec\'. (Python 2.x only) -- Delimiters | AtToken { token_span :: !SrcSpan } -- ^ Delimiter: at sign \'\@\'. | LeftRoundBracketToken { token_span :: !SrcSpan } -- ^ Delimiter: left round bracket \'(\'. | RightRoundBracketToken { token_span :: !SrcSpan } -- ^ Delimiter: right round bracket \')\'. | LeftSquareBracketToken { token_span :: !SrcSpan } -- ^ Delimiter: left square bracket \'[\'. | RightSquareBracketToken { token_span :: !SrcSpan } -- ^ Delimiter: right square bracket \']\'. | LeftBraceToken { token_span :: !SrcSpan } -- ^ Delimiter: left curly bracket \'{\'. | RightBraceToken { token_span :: !SrcSpan } -- ^ Delimiter: right curly bracket \'}\'. | DotToken { token_span :: !SrcSpan } -- ^ Delimiter: dot (full stop) \'.\'. | CommaToken { token_span :: !SrcSpan } -- ^ Delimiter: comma \',\'. | SemiColonToken { token_span :: !SrcSpan } -- ^ Delimiter: semicolon \';\'. | ColonToken { token_span :: !SrcSpan } -- ^ Delimiter: colon \':\'. | EllipsisToken { token_span :: !SrcSpan } -- ^ Delimiter: ellipses (three dots) \'...\'. | RightArrowToken { token_span :: !SrcSpan } -- ^ Delimiter: right facing arrow \'->\'. | AssignToken { token_span :: !SrcSpan } -- ^ Delimiter: assignment \'=\'. | PlusAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: plus assignment \'+=\'. | MinusAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: minus assignment \'-=\'. | MultAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: multiply assignment \'*=\' | DivAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: divide assignment \'/=\'. | ModAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: modulus assignment \'%=\'. | PowAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: power assignment \'**=\'. | BinAndAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-and assignment \'&=\'. | BinOrAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-or assignment \'|=\'. | BinXorAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-xor assignment \'^=\'. | LeftShiftAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-left-shift assignment \'<<=\'. | RightShiftAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: binary-right-shift assignment \'>>=\'. | FloorDivAssignToken { token_span :: !SrcSpan } -- ^ Delimiter: floor-divide assignment \'//=\'. | BackQuoteToken { token_span :: !SrcSpan } -- ^ Delimiter: back quote character \'`\'. -- Operators | PlusToken { token_span :: !SrcSpan } -- ^ Operator: plus \'+\'. | MinusToken { token_span :: !SrcSpan } -- ^ Operator: minus: \'-\'. | MultToken { token_span :: !SrcSpan } -- ^ Operator: multiply \'*\'. | DivToken { token_span :: !SrcSpan } -- ^ Operator: divide \'/\'. | GreaterThanToken { token_span :: !SrcSpan } -- ^ Operator: greater-than \'>\'. | LessThanToken { token_span :: !SrcSpan } -- ^ Operator: less-than \'<\'. | EqualityToken { token_span :: !SrcSpan } -- ^ Operator: equals \'==\'. | GreaterThanEqualsToken { token_span :: !SrcSpan } -- ^ Operator: greater-than-or-equals \'>=\'. | LessThanEqualsToken { token_span :: !SrcSpan } -- ^ Operator: less-than-or-equals \'<=\'. | ExponentToken { token_span :: !SrcSpan } -- ^ Operator: exponential \'**\'. | BinaryOrToken { token_span :: !SrcSpan } -- ^ Operator: binary-or \'|\'. | XorToken { token_span :: !SrcSpan } -- ^ Operator: binary-xor \'^\'. | BinaryAndToken { token_span :: !SrcSpan } -- ^ Operator: binary-and \'&\'. | ShiftLeftToken { token_span :: !SrcSpan } -- ^ Operator: binary-shift-left \'<<\'. | ShiftRightToken { token_span :: !SrcSpan } -- ^ Operator: binary-shift-right \'>>\'. | ModuloToken { token_span :: !SrcSpan } -- ^ Operator: modulus \'%\'. | FloorDivToken { token_span :: !SrcSpan } -- ^ Operator: floor-divide \'//\'. | TildeToken { token_span :: !SrcSpan } -- ^ Operator: tilde \'~\'. | NotEqualsToken { token_span :: !SrcSpan } -- ^ Operator: not-equals \'!=\'. | NotEqualsV2Token { token_span :: !SrcSpan } -- ^ Operator: not-equals \'<>\'. Version 2 only. -- Special cases | EOFToken { token_span :: !SrcSpan } -- ^ End of file deriving (Eq,Ord,Show,Typeable,Data) instance Span Token where getSpan = token_span -- | Produce a string from a token containing detailed information. Mainly intended for debugging. debugTokenString :: Token -> String debugTokenString token = render (text (show $ toConstr token) <+> pretty (token_span token) <+> if hasLiteral token then text (token_literal token) else empty) -- | Test if a token contains its literal source text. hasLiteral :: Token -> Bool hasLiteral token = case token of CommentToken {} -> True IdentifierToken {} -> True StringToken {} -> True ByteStringToken {} -> True IntegerToken {} -> True LongIntegerToken {} -> True FloatToken {} -> True ImaginaryToken {} -> True other -> False -- | Classification of tokens data TokenClass = Comment | Number | Identifier | Punctuation | Bracket | Layout | Keyword | String | Operator | Assignment deriving (Show, Eq, Ord) classifyToken :: Token -> TokenClass classifyToken token = case token of IndentToken {} -> Layout DedentToken {} -> Layout NewlineToken {} -> Layout CommentToken {} -> Comment IdentifierToken {} -> Identifier StringToken {} -> String ByteStringToken {} -> String IntegerToken {} -> Number LongIntegerToken {} -> Number FloatToken {} -> Number ImaginaryToken {} -> Number DefToken {} -> Keyword WhileToken {} -> Keyword IfToken {} -> Keyword TrueToken {} -> Keyword FalseToken {} -> Keyword ReturnToken {} -> Keyword TryToken {} -> Keyword ExceptToken {} -> Keyword RaiseToken {} -> Keyword InToken {} -> Keyword IsToken {} -> Keyword LambdaToken {} -> Keyword ClassToken {} -> Keyword FinallyToken {} -> Keyword NoneToken {} -> Keyword ForToken {} -> Keyword FromToken {} -> Keyword GlobalToken {} -> Keyword WithToken {} -> Keyword AsToken {} -> Keyword ElifToken {} -> Keyword YieldToken {} -> Keyword AssertToken {} -> Keyword ImportToken {} -> Keyword PassToken {} -> Keyword BreakToken {} -> Keyword ContinueToken {} -> Keyword DeleteToken {} -> Keyword ElseToken {} -> Keyword NotToken {} -> Keyword AndToken {} -> Keyword OrToken {} -> Keyword NonLocalToken {} -> Keyword PrintToken {} -> Keyword ExecToken {} -> Keyword AtToken {} -> Keyword LeftRoundBracketToken {} -> Bracket RightRoundBracketToken {} -> Bracket LeftSquareBracketToken {} -> Bracket RightSquareBracketToken {} -> Bracket LeftBraceToken {} -> Bracket RightBraceToken {} -> Bracket DotToken {} -> Operator CommaToken {} -> Punctuation SemiColonToken {} -> Punctuation ColonToken {} -> Punctuation EllipsisToken {} -> Keyword -- What kind of thing is an ellipsis? RightArrowToken {} -> Punctuation AssignToken {} -> Assignment PlusAssignToken {} -> Assignment MinusAssignToken {} -> Assignment MultAssignToken {} -> Assignment DivAssignToken {} -> Assignment ModAssignToken {} -> Assignment PowAssignToken {} -> Assignment BinAndAssignToken {} -> Assignment BinOrAssignToken {} -> Assignment BinXorAssignToken {} -> Assignment LeftShiftAssignToken {} -> Assignment RightShiftAssignToken {} -> Assignment FloorDivAssignToken {} -> Assignment BackQuoteToken {} -> Punctuation PlusToken {} -> Operator MinusToken {} -> Operator MultToken {} -> Operator DivToken {} -> Operator GreaterThanToken {} -> Operator LessThanToken {} -> Operator EqualityToken {} -> Operator GreaterThanEqualsToken {} -> Operator LessThanEqualsToken {} -> Operator ExponentToken {} -> Operator BinaryOrToken {} -> Operator XorToken {} -> Operator BinaryAndToken {} -> Operator ShiftLeftToken {} -> Operator ShiftRightToken {} -> Operator ModuloToken {} -> Operator FloorDivToken {} -> Operator TildeToken {} -> Operator NotEqualsToken {} -> Operator NotEqualsV2Token {} -> Operator LineJoinToken {} -> Layout EOFToken {} -> Layout -- maybe a spurious classification. -- | Produce a string from a token which is suitable for printing as Python concrete syntax. -- /Invisible/ tokens yield an empty string. tokenString :: Token -> String tokenString token = case token of IndentToken {} -> "" DedentToken {} -> "" NewlineToken {} -> "" CommentToken {} -> token_literal token IdentifierToken {} -> token_literal token StringToken {} -> token_literal token ByteStringToken {} -> token_literal token IntegerToken {} -> token_literal token LongIntegerToken {} -> token_literal token FloatToken {} -> token_literal token ImaginaryToken {} -> token_literal token DefToken {} -> "def" WhileToken {} -> "while" IfToken {} -> "if" TrueToken {} -> "True" FalseToken {} -> "False" ReturnToken {} -> "return" TryToken {} -> "try" ExceptToken {} -> "except" RaiseToken {} -> "raise" InToken {} -> "in" IsToken {} -> "is" LambdaToken {} -> "lambda" ClassToken {} -> "class" FinallyToken {} -> "finally" NoneToken {} -> "None" ForToken {} -> "for" FromToken {} -> "from" GlobalToken {} -> "global" WithToken {} -> "with" AsToken {} -> "as" ElifToken {} -> "elif" YieldToken {} -> "yield" AssertToken {} -> "assert" ImportToken {} -> "import" PassToken {} -> "pass" BreakToken {} -> "break" ContinueToken {} -> "continue" DeleteToken {} -> "delete" ElseToken {} -> "else" NotToken {} -> "not" AndToken {} -> "and" OrToken {} -> "or" NonLocalToken {} -> "nonlocal" PrintToken {} -> "print" ExecToken {} -> "exec" AtToken {} -> "at" LeftRoundBracketToken {} -> "(" RightRoundBracketToken {} -> ")" LeftSquareBracketToken {} -> "[" RightSquareBracketToken {} -> "]" LeftBraceToken {} -> "{" RightBraceToken {} -> "}" DotToken {} -> "." CommaToken {} -> "," SemiColonToken {} -> ";" ColonToken {} -> ":" EllipsisToken {} -> "..." RightArrowToken {} -> "->" AssignToken {} -> "=" PlusAssignToken {} -> "+=" MinusAssignToken {} -> "-=" MultAssignToken {} -> "*=" DivAssignToken {} -> "/=" ModAssignToken {} -> "%=" PowAssignToken {} -> "**=" BinAndAssignToken {} -> "&=" BinOrAssignToken {} -> "|=" BinXorAssignToken {} -> "^=" LeftShiftAssignToken {} -> "<<=" RightShiftAssignToken {} -> ">>=" FloorDivAssignToken {} -> "//=" BackQuoteToken {} -> "`" PlusToken {} -> "+" MinusToken {} -> "-" MultToken {} -> "*" DivToken {} -> "/" GreaterThanToken {} -> ">" LessThanToken {} -> "<" EqualityToken {} -> "==" GreaterThanEqualsToken {} -> ">=" LessThanEqualsToken {} -> "<=" ExponentToken {} -> "**" BinaryOrToken {} -> "|" XorToken {} -> "^" BinaryAndToken {} -> "&" ShiftLeftToken {} -> "<<" ShiftRightToken {} -> ">>" ModuloToken {} -> "%" FloorDivToken {} -> "//" TildeToken {} -> "~" NotEqualsToken {} -> "!=" NotEqualsV2Token {} -> "<>" LineJoinToken {} -> "\\" EOFToken {} -> "" language-python-0.4.1/src/Language/Python/Version2/0000755000000000000000000000000012441515301020302 5ustar0000000000000000language-python-0.4.1/src/Language/Python/Version2/Lexer.hs0000644000000000000000000000430612441515301021720 0ustar0000000000000000{-# OPTIONS #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version2.Lexer -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Lexical analysis for Python version 2.x programs. -- See: . ----------------------------------------------------------------------------- module Language.Python.Version2.Lexer ( -- * Lexical analysis lex, lexOneToken) where import Prelude hiding (lex) import Language.Python.Version2.Parser.Lexer (lexToken, initStartCodeStack) import Language.Python.Common.Token as Token import Language.Python.Common.SrcLocation (initialSrcLocation) import Language.Python.Common.ParserMonad (ParseState (input), P, runParser, execParser, ParseError, initialState) -- | Parse a string into a list of Python Tokens, or return an error. lex :: String -- ^ The input stream (python source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError [Token] -- ^ An error or a list of tokens. lex input srcName = execParser lexer state where initLoc = initialSrcLocation srcName state = initialState initLoc input initStartCodeStack -- | Try to lex the first token in an input string. Return either a parse error -- or a pair containing the next token and the rest of the input after the token. lexOneToken :: String -- ^ The input stream (python source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError (Token, String) -- ^ An error or the next token and the rest of the input after the token. lexOneToken source srcName = case runParser lexToken state of Left err -> Left err Right (tok, state) -> Right (tok, input state) where initLoc = initialSrcLocation srcName state = initialState initLoc source initStartCodeStack lexer :: P [Token] lexer = loop [] where loop toks = do tok <- lexToken case tok of EOFToken {} -> return (reverse toks) other -> loop (tok:toks) language-python-0.4.1/src/Language/Python/Version2/Parser.hs0000644000000000000000000000657412441515301022106 0ustar0000000000000000{-# OPTIONS #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version2.Parser -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- A parser for Python version 2.x programs. Parsers are provided for -- modules, statements, and expressions. The parsers produce comment tokens -- in addition to the abstract syntax tree. -- -- See: -- -- * for an overview of the language. -- -- * for the full grammar. -- -- * for a description of -- the various Python top-levels, which correspond to the parsers provided here. ----------------------------------------------------------------------------- module Language.Python.Version2.Parser ( -- * Parsing modules parseModule, -- * Parsing statements parseStmt, -- * Parsing expressions parseExpr) where import Language.Python.Version2.Parser.Parser (parseFileInput, parseSingleInput, parseEval) import Language.Python.Version2.Parser.Lexer (initStartCodeStack) import Language.Python.Common.AST (ModuleSpan, StatementSpan, ExprSpan) import Language.Python.Common.Token (Token) import Language.Python.Common.SrcLocation (initialSrcLocation) import Language.Python.Common.ParserMonad (execParser, execParserKeepComments, ParseError, initialState) -- | Parse a whole Python source file. Return comments in addition to the parsed module. parseModule :: String -- ^ The input stream (python module source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError (ModuleSpan, [Token]) -- ^ An error or the abstract syntax tree (AST) of the python module and comment tokens. parseModule input srcName = execParserKeepComments parseFileInput state where initLoc = initialSrcLocation srcName state = initialState initLoc input initStartCodeStack -- | Parse one compound statement, or a sequence of simple statements. Generally used for interactive input, such as from the command line of an interpreter. Return comments in addition to the parsed statements. parseStmt :: String -- ^ The input stream (python statement source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError ([StatementSpan], [Token]) -- ^ An error or maybe the abstract syntax tree (AST) of zero or more python statements, plus comments. parseStmt input srcName = execParserKeepComments parseSingleInput state where initLoc = initialSrcLocation srcName state = initialState initLoc input initStartCodeStack -- | Parse an expression. Generally used as input for the \'eval\' primitive. Return comments in addition to the parsed expression. parseExpr :: String -- ^ The input stream (python statement source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError (ExprSpan, [Token]) -- ^ An error or maybe the abstract syntax tree (AST) of the python expression, plus comment tokens. parseExpr input srcName = execParserKeepComments parseEval state where initLoc = initialSrcLocation srcName state = initialState initLoc input initStartCodeStack language-python-0.4.1/src/Language/Python/Version2/Parser/0000755000000000000000000000000012441515301021536 5ustar0000000000000000language-python-0.4.1/src/Language/Python/Version2/Parser/Lexer.x0000644000000000000000000002723312441515301023015 0ustar0000000000000000{ ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version2.Parser.Lexer -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Implementation of a lexer for Python version 2.x programs. Generated by -- alex. ----------------------------------------------------------------------------- module Language.Python.Version2.Parser.Lexer (initStartCodeStack, lexToken, endOfFileToken, lexCont) where import Language.Python.Common.Token as Token import Language.Python.Common.ParserMonad hiding (location) import Language.Python.Common.SrcLocation import Language.Python.Common.LexerUtils import qualified Data.Map as Map import Control.Monad (liftM) import Data.List (foldl') import Numeric (readHex, readOct) } -- character sets $lf = \n -- line feed $cr = \r -- carriage return $eol_char = [$lf $cr] -- any end of line character $not_eol_char = ~$eol_char -- anything but an end of line character $white_char = [\ \n\r\f\v\t] $white_no_nl = $white_char # $eol_char $ident_letter = [a-zA-Z_] $digit = 0-9 $non_zero_digit = 1-9 $oct_digit = 0-7 $hex_digit = [$digit a-fA-F] $bin_digit = 0-1 $short_str_char = [^ \n \r ' \" \\] $long_str_char = [. \n] # [' \"] $short_byte_str_char = \0-\127 # [\n \r ' \" \\] $long_byte_str_char = \0-\127 # [' \"] $not_single_quote = [. \n] # ' $not_double_quote = [. \n] # \" -- macro definitions @exponent = (e | E) (\+ | \-)? $digit+ @fraction = \. $digit+ @int_part = $digit+ @point_float = (@int_part? @fraction) | @int_part \. @exponent_float = (@int_part | @point_float) @exponent @float_number = @point_float | @exponent_float @eol_pattern = $lf | $cr $lf | $cr $lf @one_single_quote = ' $not_single_quote @two_single_quotes = '' $not_single_quote @one_double_quote = \" $not_double_quote @two_double_quotes = \"\" $not_double_quote @byte_str_prefix = b | B @raw_str_prefix = r | R @unicode_str_prefix = u | U @raw_byte_str_prefix = @byte_str_prefix @raw_str_prefix @backslash_pair = \\ (\\|'|\"|@eol_pattern|$short_str_char) @backslash_pair_bs = \\ (\\|'|\"|@eol_pattern|$short_byte_str_char) @short_str_item_single = $short_str_char|@backslash_pair|\" @short_str_item_double = $short_str_char|@backslash_pair|' @short_byte_str_item_single = $short_byte_str_char|@backslash_pair_bs|\" @short_byte_str_item_double = $short_byte_str_char|@backslash_pair_bs|' @long_str_item_single = $long_str_char|@backslash_pair|@one_single_quote|@two_single_quotes|\" @long_str_item_double = $long_str_char|@backslash_pair|@one_double_quote|@two_double_quotes|' @long_byte_str_item_single = $long_byte_str_char|@backslash_pair_bs|@one_single_quote|@two_single_quotes|\" @long_byte_str_item_double = $long_byte_str_char|@backslash_pair_bs|@one_double_quote|@two_double_quotes|' tokens :- -- these rules below could match inside a string literal, but they -- will not be applied because the rule for the literal will always -- match a longer sequence of characters. \# ($not_eol_char)* { token (\ span lit val -> CommentToken span lit) id } $white_no_nl+ ; -- skip whitespace -- \\ @eol_pattern { endOfLine lexToken } -- line join \\ @eol_pattern { lineJoin } -- line join <0> { @float_number { token FloatToken readFloat } (@float_number | @int_part) (j | J) { token ImaginaryToken (readFloat.init) } $non_zero_digit $digit* { token IntegerToken read } $non_zero_digit $digit* (l | L) { token LongIntegerToken (read.init) } 0+ { token IntegerToken read } 0+ (l | L) { token LongIntegerToken (read.init) } 0 (o | O) $oct_digit+ { token IntegerToken read } 0 (o | O) $oct_digit+ (l | L) { token LongIntegerToken (read.init) } 0 $oct_digit+ { token IntegerToken readOctNoO } 0 $oct_digit+ (l | L) { token LongIntegerToken (readOctNoO.init) } 0 (x | X) $hex_digit+ { token IntegerToken read } 0 (x | X) $hex_digit+ (l | L) { token LongIntegerToken (read.init) } 0 (b | B) $bin_digit+ { token IntegerToken readBinary } 0 (b | B) $bin_digit+ (l | L) { token LongIntegerToken (readBinary.init) } } -- String literals <0> { ' @short_str_item_single* ' { mkString stringToken } @raw_str_prefix ' @short_str_item_single* ' { mkString rawStringToken } @byte_str_prefix ' @short_byte_str_item_single* ' { mkString byteStringToken } @raw_byte_str_prefix ' @short_byte_str_item_single* ' { mkString rawByteStringToken } @unicode_str_prefix ' @short_str_item_single* ' { mkString unicodeStringToken } \" @short_str_item_double* \" { mkString stringToken } @raw_str_prefix \" @short_str_item_double* \" { mkString rawStringToken } @byte_str_prefix \" @short_byte_str_item_double* \" { mkString byteStringToken } @raw_byte_str_prefix \" @short_byte_str_item_double* \" { mkString rawByteStringToken } @unicode_str_prefix \" @short_str_item_double* \" { mkString unicodeStringToken } ''' @long_str_item_single* ''' { mkString stringToken } @raw_str_prefix ''' @long_str_item_single* ''' { mkString rawStringToken } @byte_str_prefix ''' @long_byte_str_item_single* ''' { mkString byteStringToken } @raw_byte_str_prefix ''' @long_byte_str_item_single* ''' { mkString rawByteStringToken } @unicode_str_prefix ''' @long_str_item_single* ''' { mkString unicodeStringToken } \"\"\" @long_str_item_double* \"\"\" { mkString stringToken } @raw_str_prefix \"\"\" @long_str_item_double* \"\"\" { mkString rawStringToken } @byte_str_prefix \"\"\" @long_byte_str_item_double* \"\"\" { mkString byteStringToken } @raw_byte_str_prefix \"\"\" @long_byte_str_item_double* \"\"\" { mkString rawByteStringToken } @unicode_str_prefix \"\"\" @long_str_item_double* \"\"\" { mkString unicodeStringToken } } -- NOTE: we pass lexToken into some functions as an argument. -- That allows us to define those functions in a separate module, -- which increases code reuse in the lexer (because that code can -- be shared between the lexer for versions 2 and 3 of Python. -- Unfortunately lexToken must be defined in this file because -- it refers to data types which are only included by Alex in -- the generated file (this seems like a limitation in Alex -- that should be improved). <0> { @eol_pattern { bolEndOfLine lexToken bol } } () { dedentation lexToken } -- beginning of line { -- @eol_pattern ; @eol_pattern { endOfLine lexToken } () { indentation lexToken dedent BOL } } -- beginning of file { -- @eol_pattern ; @eol_pattern { endOfLine lexToken } () { indentation lexToken dedent BOF } } <0> $ident_letter($ident_letter|$digit)* { \loc len str -> keywordOrIdent (take len str) loc } -- operators and separators -- <0> { "(" { openParen LeftRoundBracketToken } ")" { closeParen RightRoundBracketToken } "[" { openParen LeftSquareBracketToken } "]" { closeParen RightSquareBracketToken } "{" { openParen LeftBraceToken } "}" { closeParen RightBraceToken } "->" { symbolToken RightArrowToken } "." { symbolToken DotToken } "..." { symbolToken EllipsisToken } "~" { symbolToken TildeToken } "+" { symbolToken PlusToken } "-" { symbolToken MinusToken } "**" { symbolToken ExponentToken } "*" { symbolToken MultToken } "/" { symbolToken DivToken } "//" { symbolToken FloorDivToken } "%" { symbolToken ModuloToken } "<<" { symbolToken ShiftLeftToken } ">>" { symbolToken ShiftRightToken } "<" { symbolToken LessThanToken } "<=" { symbolToken LessThanEqualsToken } ">" { symbolToken GreaterThanToken } ">=" { symbolToken GreaterThanEqualsToken } "==" { symbolToken EqualityToken } "!=" { symbolToken NotEqualsToken } "<>" { symbolToken NotEqualsV2Token } -- only version 2 "^" { symbolToken XorToken } "|" { symbolToken BinaryOrToken } "&&" { symbolToken AndToken } "&" { symbolToken BinaryAndToken } "||" { symbolToken OrToken } ":" { symbolToken ColonToken } "=" { symbolToken AssignToken } "+=" { symbolToken PlusAssignToken } "-=" { symbolToken MinusAssignToken } "*=" { symbolToken MultAssignToken } "/=" { symbolToken DivAssignToken } "%=" { symbolToken ModAssignToken } "**=" { symbolToken PowAssignToken } "&=" { symbolToken BinAndAssignToken } "|=" { symbolToken BinOrAssignToken } "^=" { symbolToken BinXorAssignToken } "<<=" { symbolToken LeftShiftAssignToken } ">>=" { symbolToken RightShiftAssignToken } "//=" { symbolToken FloorDivAssignToken } "," { symbolToken CommaToken } "@" { symbolToken AtToken } \; { symbolToken SemiColonToken } "`" { symbolToken BackQuoteToken } } { -- The lexer starts off in the beginning of file state (bof) initStartCodeStack :: [Int] initStartCodeStack = [bof,0] lexToken :: P Token lexToken = do location <- getLocation input <- getInput startCode <- getStartCode case alexScan (location, input) startCode of AlexEOF -> do -- Ensure there is a newline token before the EOF previousToken <- getLastToken case previousToken of NewlineToken {} -> do -- Ensure that there is sufficient dedent -- tokens for the outstanding indentation -- levels depth <- getIndentStackDepth if depth <= 1 then return endOfFileToken else do popIndent return dedentToken other -> do let insertedNewlineToken = NewlineToken $ mkSrcSpan location location setLastToken insertedNewlineToken return insertedNewlineToken AlexError _ -> lexicalError AlexSkip (nextLocation, rest) len -> do setLocation nextLocation setInput rest lexToken AlexToken (nextLocation, rest) len action -> do setLocation nextLocation setInput rest token <- action (mkSrcSpan location $ decColumn 1 nextLocation) len input setLastToken token return token -- This is called by the Happy parser. lexCont :: (Token -> P a) -> P a lexCont cont = do lexLoop where -- lexLoop :: P a lexLoop = do tok <- lexToken case tok of CommentToken {} -> do addComment tok lexLoop LineJoinToken {} -> lexLoop _other -> cont tok -- a keyword or an identifier (the syntax overlaps) keywordOrIdent :: String -> SrcSpan -> P Token keywordOrIdent str location = return $ case Map.lookup str keywords of Just symbol -> symbol location Nothing -> IdentifierToken location str -- mapping from strings to keywords keywords :: Map.Map String (SrcSpan -> Token) keywords = Map.fromList keywordNames -- see: keywordNames :: [(String, SrcSpan -> Token)] keywordNames = [ ("and", AndToken), ("as", AsToken), ("assert", AssertToken), ("break", BreakToken) , ("class", ClassToken), ("continue", ContinueToken), ("def", DefToken), ("del", DeleteToken) , ("elif", ElifToken), ("else", ElseToken), ("except", ExceptToken), ("exec", ExecToken) , ("finally", FinallyToken), ("for", ForToken), ("from", FromToken), ("global", GlobalToken) , ("if", IfToken), ("import", ImportToken), ("in", InToken), ("is", IsToken) , ("lambda", LambdaToken), ("not", NotToken), ("or", OrToken), ("pass", PassToken) , ("print", PrintToken), ("raise", RaiseToken), ("return", ReturnToken), ("try", TryToken) , ("while", WhileToken), ("with", WithToken), ("yield", YieldToken) ] } language-python-0.4.1/src/Language/Python/Version2/Parser/Parser.y0000644000000000000000000007113312441515301023171 0ustar0000000000000000{ ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version2.Parser.Parser -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Implementation of the Python version 2.x parser. Generated by happy. ----------------------------------------------------------------------------- module Language.Python.Version2.Parser.Parser (parseFileInput, parseSingleInput, parseEval) where import Language.Python.Version2.Parser.Lexer import Language.Python.Common.Token as Token import Language.Python.Common.AST as AST import Language.Python.Common.ParserMonad import Language.Python.Common.SrcLocation import Language.Python.Common.ParserUtils import Data.Either (rights, either) import Data.Maybe (maybeToList) } %name parseFileInput file_input %name parseSingleInput single_input %name parseEval eval_input %tokentype { Token } %error { parseError } %monad { P } { thenP } { returnP } %lexer { lexCont } { EOFToken {} } %token '=' { AssignToken {} } '(' { LeftRoundBracketToken {} } ')' { RightRoundBracketToken {} } '[' { LeftSquareBracketToken {} } ']' { RightSquareBracketToken {} } '{' { LeftBraceToken {} } '}' { RightBraceToken {} } ',' { CommaToken {} } ';' { SemiColonToken {} } ':' { ColonToken {} } '+' { PlusToken {} } '-' { MinusToken {} } '*' { MultToken {} } '**' { ExponentToken {} } '/' { DivToken {} } '//' { FloorDivToken {} } '>' { GreaterThanToken {} } '<' { LessThanToken {} } '==' { EqualityToken {} } '>=' { GreaterThanEqualsToken {} } '<=' { LessThanEqualsToken {} } '|' { BinaryOrToken {} } '^' { XorToken {} } '&' { BinaryAndToken {} } '>>' { ShiftRightToken {} } '<<' { ShiftLeftToken {} } '%' { ModuloToken {} } '~' { TildeToken {} } '!=' { NotEqualsToken {} } '<>' { NotEqualsV2Token {} } '.' { DotToken {} } '`' { BackQuoteToken {} } '+=' { PlusAssignToken {} } '-=' { MinusAssignToken {} } '*=' { MultAssignToken {} } '/=' { DivAssignToken {} } '%=' { ModAssignToken {} } '**=' { PowAssignToken {} } '&=' { BinAndAssignToken {} } '|=' { BinOrAssignToken {} } '^=' { BinXorAssignToken {} } '<<=' { LeftShiftAssignToken {} } '>>=' { RightShiftAssignToken {} } '//=' { FloorDivAssignToken {} } '@' { AtToken {} } 'and' { AndToken {} } 'as' { AsToken {} } 'assert' { AssertToken {} } 'break' { BreakToken {} } 'bytestring' { ByteStringToken {} } 'class' { ClassToken {} } 'continue' { ContinueToken {} } 'dedent' { DedentToken {} } 'def' { DefToken {} } 'del' { DeleteToken {} } 'elif' { ElifToken {} } 'else' { ElseToken {} } 'except' { ExceptToken {} } 'exec' { ExecToken {} } 'finally' { FinallyToken {} } 'float' { FloatToken {} } 'for' { ForToken {} } 'from' { FromToken {} } 'global' { GlobalToken {} } 'ident' { IdentifierToken {} } 'if' { IfToken {} } 'imaginary' { ImaginaryToken {} } 'import' { ImportToken {} } 'indent' { IndentToken {} } 'in' { InToken {} } 'integer' { IntegerToken {} } 'long_integer' { LongIntegerToken {} } 'is' { IsToken {} } 'lambda' { LambdaToken {} } 'NEWLINE' { NewlineToken {} } 'not' { NotToken {} } 'or' { OrToken {} } 'pass' { PassToken {} } 'print' { PrintToken {} } 'raise' { RaiseToken {} } 'return' { ReturnToken {} } 'string' { StringToken {} } 'try' { TryToken {} } 'unicodestring' { UnicodeStringToken {} } 'while' { WhileToken {} } 'with' { WithToken {} } 'yield' { YieldToken {} } %% pair(p,q): p q { ($1, $2) } left(p,q): p q { $1 } right(p,q): p q { $2 } or(p,q) : p { $1 } | q { $1 } either(p,q) : p { Left $1 } | q { Right $1 } opt(p) : { Nothing } | p { Just $1 } rev_list1(p) : p { [$1] } | rev_list1(p) p { $2 : $1 } many1(p) : rev_list1(p) { reverse $1 } many0(p) : many1(p) { $1 } | { [] } sepOptEndBy(p,sep) : sepByRev(p,sep) ',' { reverse $1 } | sepByRev(p,sep) { reverse $1 } sepBy(p,sep): sepByRev(p,sep) { reverse $1 } sepByRev(p,sep) : p { [$1] } | sepByRev(p,sep) sep p { $3 : $1 } NAME :: { IdentSpan } NAME : 'ident' { Ident (token_literal $1) (getSpan $1) } {- Note: newline tokens in the grammar: It seems there are some dubious uses of NEWLINE in the grammar. This is corroborated by this posting: http://mail.python.org/pipermail/python-dev/2005-October/057014.html The general idea is that the lexer does not generate NEWLINE tokens for lines which contain only spaces or comments. However, the grammar sometimes suggests that such tokens may exist. -} -- single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE {- We don't support the newline at the end of a compound stmt because the lexer would not produce a newline there. It seems like a weirdness in the way the interactive input works. -} single_input :: { [StatementSpan] } single_input : 'NEWLINE' { [] } | simple_stmt { $1 } | compound_stmt {- No newline here! -} { [$1] } -- file_input: (NEWLINE | stmt)* ENDMARKER file_input :: { ModuleSpan } file_input : many0(either('NEWLINE',stmt)) {- No need to mention ENDMARKER -} { Module (concat (rights $1)) } -- eval_input: testlist NEWLINE* ENDMARKER eval_input :: { ExprSpan } eval_input : testlist many0('NEWLINE') {- No need to mention ENDMARKER -} { $1 } -- decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE opt_paren_arg_list :: { [ArgumentSpan] } opt_paren_arg_list: opt(paren_arg_list) { concat (maybeToList $1) } paren_arg_list :: { [ArgumentSpan] } paren_arg_list : '(' optional_arg_list ')' { $2 } decorator :: { DecoratorSpan } decorator : '@' dotted_name opt_paren_arg_list 'NEWLINE' { makeDecorator $1 $2 $3 } -- decorators: decorator+ decorators :: { [DecoratorSpan] } decorators : many1(decorator) { $1 } -- decorated: decorators (classdef | funcdef) decorated :: { StatementSpan } decorated : decorators or(classdef,funcdef) { makeDecorated $1 $2 } -- funcdef: 'def' NAME parameters ':' suite funcdef :: { StatementSpan } funcdef : 'def' NAME parameters ':' suite { makeFun $1 $2 $3 Nothing $5 } -- parameters: '(' [varargslist] ')' parameters :: { [ParameterSpan] } parameters : '(' opt(varargslist) ')' { concat (maybeToList $2) } {- varargslist: ((vfpdef ['=' test] ',')* ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) -} {- varargslist: ((fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']) -} {- There is some tedious similarity in these rules to the ones for TypedArgsList. varargslist is used for lambda functions, and they do not have parentheses around them (unlike function definitions). Therefore lambda parameters cannot have the optional annotations that normal functions can, because the annotations are introduced using a colon. This would cause ambibguity with the colon that marks the end of the lambda parameter list! -} varargslist :: { [ParameterSpan] } varargslist : sepOptEndBy(one_varargs_param,',') {% checkParameters $1 } one_varargs_param :: { ParameterSpan } one_varargs_param : '*' NAME { makeStarParam $1 (Just ($2, Nothing)) } | '**' NAME { makeStarStarParam $1 ($2, Nothing) } | fpdef optional_default { makeTupleParam $1 $2 } optional_default :: { Maybe ExprSpan } optional_default: opt(equals_test) { $1 } equals_test :: { ExprSpan } equals_test: '=' test { $2 } -- fpdef: NAME | '(' fplist ')' fpdef :: { ParamTupleSpan } fpdef : NAME { ParamTupleName $1 (getSpan $1) } | '(' fplist ')' { ParamTuple $2 (spanning $1 $3) } -- fplist: fpdef (',' fpdef)* [','] fplist :: { [ParamTupleSpan] } fplist: sepOptEndBy(fpdef,',') { $1 } -- stmt: simple_stmt | compound_stmt stmt :: { [StatementSpan] } stmt : simple_stmt { $1 } | compound_stmt { [$1] } -- simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE simple_stmt :: { [StatementSpan] } simple_stmt : small_stmts opt(';') 'NEWLINE' { reverse $1 } small_stmts :: { [StatementSpan] } small_stmts : small_stmt { [$1] } | small_stmts ';' small_stmt { $3 : $1 } {- small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | exec_stmt | assert_stmt) -} small_stmt :: { StatementSpan } small_stmt : expr_stmt { $1 } | print_stmt { $1 } | del_stmt { $1 } | pass_stmt { $1 } | flow_stmt { $1 } | import_stmt { $1 } | global_stmt { $1 } | exec_stmt { $1 } | assert_stmt { $1 } -- expr_stmt: testlist (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist))*) expr_stmt :: { StatementSpan } expr_stmt : testlist either(many_assign, augassign_yield_or_test_list) { makeAssignmentOrExpr $1 $2 } many_assign :: { [ExprSpan] } many_assign : many0(right('=', yield_or_test_list)) { $1 } yield_or_test_list :: { ExprSpan } yield_or_test_list : or(yield_expr,testlist) { $1 } augassign_yield_or_test_list :: { (AssignOpSpan, ExprSpan) } augassign_yield_or_test_list : augassign yield_or_test_list { ($1, $2) } {- augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=') -} augassign :: { AssignOpSpan } augassign : '+=' { AST.PlusAssign (getSpan $1) } | '-=' { AST.MinusAssign (getSpan $1) } | '*=' { AST.MultAssign (getSpan $1) } | '/=' { AST.DivAssign (getSpan $1) } | '%=' { AST.ModAssign (getSpan $1) } | '**=' { AST.PowAssign (getSpan $1) } | '&=' { AST.BinAndAssign (getSpan $1) } | '|=' { AST.BinOrAssign (getSpan $1) } | '^=' { AST.BinXorAssign (getSpan $1) } | '<<=' { AST.LeftShiftAssign (getSpan $1) } | '>>=' { AST.RightShiftAssign (getSpan $1) } | '//=' { AST.FloorDivAssign (getSpan $1) } {- print_stmt: 'print' ( [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ] ) -} print_stmt :: { StatementSpan } print_stmt : 'print' '>>' print_exprs { makePrint True (Just $3) (spanning $1 $3) } | 'print' opt(print_exprs) { makePrint False $2 (spanning $1 $2) } print_exprs :: { ([ExprSpan], Maybe Token) } print_exprs : testlistrev opt_comma { (reverse $1, $2) } -- del_stmt: 'del' exprlist del_stmt :: { StatementSpan } del_stmt : 'del' exprlist { AST.Delete $2 (spanning $1 $2) } -- pass_stmt: 'pass' pass_stmt :: { StatementSpan } pass_stmt : 'pass' { AST.Pass (getSpan $1) } -- flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt flow_stmt :: { StatementSpan } flow_stmt : break_stmt { $1 } | continue_stmt { $1 } | return_stmt { $1 } | raise_stmt { $1 } | yield_stmt { $1 } -- break_stmt: 'break' break_stmt :: { StatementSpan } break_stmt : 'break' { AST.Break (getSpan $1) } -- continue_stmt: 'continue' continue_stmt :: { StatementSpan } continue_stmt : 'continue' { AST.Continue (getSpan $1) } -- return_stmt: 'return' [testlist] return_stmt :: { StatementSpan } return_stmt : 'return' optional_testlist { makeReturn $1 $2 } -- yield_stmt: yield_expr yield_stmt :: { StatementSpan } yield_stmt : yield_expr { StmtExpr $1 (getSpan $1) } -- raise_stmt: 'raise' [test ['from' test]] -- raise_stmt: 'raise' [test [',' test [',' test]]] raise_stmt :: { StatementSpan } raise_stmt : 'raise' opt(pair(test, opt(pair(right(',',test), opt(right(',', test)))))) { AST.Raise (RaiseV2 $2) (spanning $1 $2) } -- import_stmt: import_name | import_from import_stmt :: { StatementSpan } import_stmt: or(import_name, import_from) { $1 } -- import_name: 'import' dotted_as_names import_name :: { StatementSpan } import_name : 'import' dotted_as_names { AST.Import $2 (spanning $1 $2) } {- import_from: ('from' ('.'* dotted_name | '.'+) 'import' ('*' | '(' import_as_names ')' | import_as_names)) -} import_from :: { StatementSpan } import_from : 'from' import_module 'import' star_or_as_names { FromImport $2 $4 (spanning $1 $4) } import_module :: { ImportRelativeSpan } import_module: import_module_dots { makeRelative $1 } import_module_dots :: { [Either Token DottedNameSpan] } import_module_dots : '.' { [ Left $1 ] } | dotted_name { [ Right $1 ] } | '.' import_module_dots { Left $1 : $2 } star_or_as_names :: { FromItemsSpan } star_or_as_names : '*' { ImportEverything (getSpan $1) } | '(' import_as_names ')' { $2 } | import_as_names { $1 } -- import_as_name: NAME ['as' NAME] import_as_name :: { FromItemSpan } import_as_name : NAME optional_as_name { FromItem $1 $2 (spanning $1 $2) } -- dotted_as_name: dotted_name ['as' NAME] dotted_as_name :: { ImportItemSpan } dotted_as_name : dotted_name optional_as_name { ImportItem $1 $2 (spanning $1 $2) } -- import_as_names: import_as_name (',' import_as_name)* [','] import_as_names :: { FromItemsSpan } import_as_names : sepOptEndBy(import_as_name, ',') { FromItems $1 (getSpan $1) } -- dotted_as_names: dotted_as_name (',' dotted_as_name)* dotted_as_names :: { [ImportItemSpan] } dotted_as_names : sepBy(dotted_as_name,',') { $1 } -- dotted_name: NAME ('.' NAME)* dotted_name :: { DottedNameSpan } dotted_name : NAME many0(right('.', NAME)) { $1 : $2 } -- global_stmt: 'global' NAME (',' NAME)* global_stmt :: { StatementSpan } global_stmt : 'global' one_or_more_names { AST.Global $2 (spanning $1 $2) } one_or_more_names :: { [IdentSpan] } one_or_more_names: sepBy(NAME, ',') { $1 } -- exec_stmt: 'exec' expr ['in' test [',' test]] exec_stmt :: { StatementSpan } exec_stmt : 'exec' expr opt(right('in', pair(test, opt(right(',', test))))) { AST.Exec $2 $3 (spanning (spanning $1 $2) $3) } -- assert_stmt: 'assert' test [',' test] assert_stmt :: { StatementSpan } assert_stmt : 'assert' sepBy(test,',') { AST.Assert $2 (spanning $1 $2) } -- compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated compound_stmt :: { StatementSpan } compound_stmt : if_stmt { $1 } | while_stmt { $1 } | for_stmt { $1 } | try_stmt { $1 } | with_stmt { $1 } | funcdef { $1 } | classdef { $1 } | decorated { $1 } -- if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] if_stmt :: { StatementSpan } if_stmt : 'if' test ':' suite many0(elif) optional_else { Conditional (($2, $4):$5) $6 (spanning (spanning (spanning $1 $4) $5) $6) } elif :: { (ExprSpan, [StatementSpan]) } elif : 'elif' test ':' suite { ($2, $4) } optional_else :: { [StatementSpan] } optional_else : {- empty -} { [] } | 'else' ':' suite { $3 } -- while_stmt: 'while' test ':' suite ['else' ':' suite] while_stmt :: { StatementSpan } while_stmt : 'while' test ':' suite optional_else { AST.While $2 $4 $5 (spanning (spanning $1 $4) $5) } -- for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] for_stmt :: { StatementSpan } for_stmt : 'for' exprlist 'in' testlist ':' suite optional_else { AST.For $2 $4 $6 $7 (spanning (spanning $1 $6) $7) } {- try_stmt: ('try' ':' suite ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite)) -} try_stmt :: { StatementSpan } try_stmt : 'try' ':' suite handlers { makeTry $1 $3 $4 } handlers :: { ([HandlerSpan], [StatementSpan], [StatementSpan]) } handlers : one_or_more_except_clauses optional_else optional_finally { ($1, $2, $3) } | 'finally' ':' suite { ([], [], $3) } optional_finally :: { [StatementSpan] } optional_finally : {- empty -} { [] } | 'finally' ':' suite { $3 } one_or_more_except_clauses :: { [HandlerSpan] } one_or_more_except_clauses : many1(handler) { $1 } handler :: { HandlerSpan } handler : except_clause ':' suite { Handler $1 $3 (spanning $1 $3) } -- with_stmt: 'with' with_item (',' with_item)* ':' suite with_stmt :: { StatementSpan } with_stmt : 'with' sepOptEndBy(with_item, ',') ':' suite { AST.With $2 $4 (spanning $1 $4) } -- with_item: test ['as' expr] with_item :: { (ExprSpan, Maybe ExprSpan) } with_item: pair(test,opt(right('as',expr))) { $1 } -- except_clause: 'except' [test [('as' | ',') test]] except_clause :: { ExceptClauseSpan } except_clause : 'except' opt(pair(test, opt(right(or('as',','), test)))) { ExceptClause $2 (spanning $1 $2) } optional_as_name :: { Maybe IdentSpan } optional_as_name: opt(right('as', NAME)) { $1 } -- suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT -- Note: we don't have a newline before indent b/c it is redundant suite :: { [StatementSpan] } suite : simple_stmt { $1 } | {- no newline here! -} 'indent' many1(stmt) 'dedent' { concat $2 } -- testlist_safe: old_test [(',' old_test)+ [',']] testlist_safe :: { ExprSpan } testlist : old_testlistrev opt_comma { makeTupleOrExpr (reverse $1) $2 } old_testlistrev :: { [ExprSpan] } old_testlistrev : old_test { [$1] } | old_testlistrev ',' old_test { $3 : $1 } -- old_test: or_test | old_lambdef old_test :: { ExprSpan } old_test: or(or_test,old_lambdef) { $1 } -- old_lambdef: 'lambda' [varargslist] ':' old_test old_lambdef :: { ExprSpan } old_lambdef: 'lambda' opt_varargslist ':' old_test { AST.Lambda $2 $4 (spanning $1 $4) } -- test: or_test ['if' or_test 'else' test] | lambdef test :: { ExprSpan } test : or_test opt(test_if_cond) { makeConditionalExpr $1 $2 } | lambdef { $1 } test_if_cond :: { (ExprSpan, ExprSpan) } test_if_cond: 'if' or_test 'else' test { ($2, $4) } -- lambdef: 'lambda' [varargslist] ':' test lambdef :: { ExprSpan } lambdef : 'lambda' opt_varargslist ':' test { AST.Lambda $2 $4 (spanning $1 $4) } opt_varargslist :: { [ParameterSpan] } opt_varargslist: opt(varargslist) { concat (maybeToList $1) } -- or_test: and_test ('or' and_test)* or_test :: { ExprSpan } or_test : and_test many0(pair(or_op,and_test)) { makeBinOp $1 $2 } or_op :: { OpSpan } or_op: 'or' { AST.Or (getSpan $1) } -- and_test: not_test ('and' not_test)* and_test :: { ExprSpan } and_test : not_test many0(pair(and_op, not_test)) { makeBinOp $1 $2 } and_op :: { OpSpan } and_op: 'and' { AST.And (getSpan $1) } -- not_test: 'not' not_test | comparison not_test :: { ExprSpan } not_test : 'not' not_test { UnaryOp (AST.Not (getSpan $1)) $2 (spanning $1 $2) } | comparison { $1 } -- comparison: expr (comp_op expr)* comparison :: { ExprSpan } comparison : expr many0(pair(comp_op, expr)) { makeBinOp $1 $2 } -- comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' comp_op :: { OpSpan } comp_op : '<' { AST.LessThan (getSpan $1) } | '>' { AST.GreaterThan (getSpan $1) } | '==' { AST.Equality (getSpan $1) } | '>=' { AST.GreaterThanEquals (getSpan $1) } | '<=' { AST.LessThanEquals (getSpan $1) } | '!=' { AST.NotEquals (getSpan $1) } | '<>' { AST.NotEqualsV2 (getSpan $1) } | 'in' { AST.In (getSpan $1) } | 'not' 'in' { AST.NotIn (spanning $1 $2) } | 'is' { AST.Is (getSpan $1) } | 'is' 'not' { AST.IsNot (spanning $1 $2) } -- expr: xor_expr ('|' xor_expr)* expr :: { ExprSpan } expr : xor_expr many0(pair(bar_op, xor_expr)) { makeBinOp $1 $2 } bar_op :: { OpSpan } bar_op: '|' { AST.BinaryOr (getSpan $1) } -- xor_expr: and_expr ('^' and_expr)* xor_expr :: { ExprSpan } xor_expr : and_expr many0(pair(hat_op, and_expr)) { makeBinOp $1 $2 } hat_op :: { OpSpan } hat_op: '^' { AST.Xor (getSpan $1) } -- and_expr: shift_expr ('&' shift_expr)* and_expr :: { ExprSpan } and_expr : shift_expr many0(pair(ampersand, shift_expr)) { makeBinOp $1 $2 } ampersand :: { OpSpan } ampersand: '&' { AST.BinaryAnd (getSpan $1) } -- shift_expr: arith_expr (('<<'|'>>') arith_expr)* shift_expr :: { ExprSpan } shift_expr: arith_expr many0(pair(shift_op, arith_expr)) { makeBinOp $1 $2 } shift_op :: { OpSpan } shift_op : '<<' { AST.ShiftLeft (getSpan $1) } | '>>' { AST.ShiftRight (getSpan $1) } -- arith_expr: term (('+'|'-') term)* arith_expr :: { ExprSpan } arith_expr: term many0(pair(arith_op, term)) { makeBinOp $1 $2 } arith_op :: { OpSpan } arith_op : '+' { AST.Plus (getSpan $1) } | '-' { AST.Minus (getSpan $1) } -- term: factor (('*'|'/'|'%'|'//') factor)* term :: { ExprSpan } term : factor many0(pair(mult_div_mod_op, factor)) { makeBinOp $1 $2 } mult_div_mod_op :: { OpSpan } mult_div_mod_op : '*' { AST.Multiply (getSpan $1) } | '/' { AST.Divide (getSpan $1) } | '%' { AST.Modulo (getSpan $1) } | '//' { AST.FloorDivide (getSpan $1) } -- factor: ('+'|'-'|'~') factor | power factor :: { ExprSpan } factor : or(arith_op, tilde_op) factor { UnaryOp $1 $2 (spanning $1 $2) } | power { $1 } tilde_op :: { OpSpan } tilde_op: '~' { AST.Invert (getSpan $1) } -- power: atom trailer* ['**' factor] power :: { ExprSpan } power : atom many0(trailer) opt(pair(exponent_op, factor)) { makeBinOp (addTrailer $1 $2) (maybeToList $3) } exponent_op :: { OpSpan } exponent_op: '**' { AST.Exponent (getSpan $1) } {- atom: ('(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+) -} atom :: { ExprSpan } atom : '(' yield_or_testlist_gexp ')' { $2 (spanning $1 $3) } | list_atom { $1 } | '{' opt(dictmaker) '}' { AST.Dictionary (concat (maybeToList $2)) (spanning $1 $3) } | '`' testlist1 '`' { AST.StringConversion $2 (spanning $1 $3) } | NAME { AST.Var $1 (getSpan $1) } | 'integer' { AST.Int (token_integer $1) (token_literal $1) (getSpan $1) } | 'long_integer' { AST.LongInt (token_integer $1) (token_literal $1) (getSpan $1) } | 'float' { AST.Float (token_double $1) (token_literal $1) (getSpan $1) } | 'imaginary' { AST.Imaginary (token_double $1) (token_literal $1) (getSpan $1) } | many1('string') { AST.Strings (map token_literal $1) (getSpan $1) } | many1('bytestring') { AST.ByteStrings (map token_literal $1) (getSpan $1) } | many1('unicodestring') { AST.UnicodeStrings (map token_literal $1) (getSpan $1) } -- listmaker: test ( list_for | (',' test)* [','] ) list_atom :: { ExprSpan } list_atom : '[' ']' { List [] (spanning $1 $2) } | '[' testlistfor ']' { makeListForm (spanning $1 $3) $2 } testlistfor :: { Either ExprSpan (ComprehensionSpan ExprSpan) } testlistfor : testlist { Left $1 } | test list_for { Right (makeComprehension $1 $2) } yield_or_testlist_gexp :: { SrcSpan -> ExprSpan } yield_or_testlist_gexp : {- empty -} { Tuple [] } | yield_expr { Paren $1 } | testlist_gexp { either Paren Generator $1 } -- testlist_gexp: test ( gen_for | (',' test)* [','] ) testlist_gexp :: { Either ExprSpan (ComprehensionSpan ExprSpan) } testlist_gexp : testlist { Left $1 } | test gen_for { Right (makeComprehension $1 $2) } -- trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME trailer :: { Trailer } trailer : paren_arg_list { TrailerCall $1 (getSpan $1) } | '[' subscriptlist ']' { TrailerSubscript $2 (spanning $1 $3) } | '.' NAME { TrailerDot $2 (getSpan $1) (spanning $1 $2) } -- subscriptlist: subscript (',' subscript)* [','] subscriptlist :: { [Subscript] } subscriptlist : sepOptEndBy(subscript, ',') { $1 } -- subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] subscript :: { Subscript } subscript : '.' '.' '.' { SubscriptSliceEllipsis (spanning $1 $3) } | test { SubscriptExpr $1 (getSpan $1) } | opt(test) ':' opt(test) opt(sliceop) { SubscriptSlice $1 $3 $4 (spanning (spanning (spanning $1 $2) $3) $4) } -- sliceop: ':' [test] sliceop :: { Maybe ExprSpan } sliceop : ':' opt(test) { $2 } -- exprlist: expr (',' expr)* [','] exprlist :: { [ExprSpan] } exprlist: sepOptEndBy(expr, ',') { $1 } opt_comma :: { Maybe Token } opt_comma : {- empty -} { Nothing } | ',' { Just $1 } -- testlist: test (',' test)* [','] -- Some trickery here because the of the optional trailing comma, which -- could turn a normal expression into a tuple. -- Very occasionally, testlist is used to generate something which is not -- a tuple (such as the square bracket notation in list literals). Therefore -- it would seem like a good idea to not return a tuple in this case, but -- a list of expressions. However this would complicate a lot of code -- since we would have to carry around the optional comma information. -- I've decided to leave it as a tuple, and in special cases, unpack the -- tuple and pull out the list of expressions. testlist :: { ExprSpan } testlist : testlistrev opt_comma { makeTupleOrExpr (reverse $1) $2 } testlistrev :: { [ExprSpan] } testlistrev : test { [$1] } | testlistrev ',' test { $3 : $1 } -- dictmaker: test ':' test (',' test ':' test)* [','] dictmaker :: { [(ExprSpan, ExprSpan)] } dictmaker: sepOptEndBy(pair(test,right(':',test)), ',') { $1 } -- classdef: 'class' NAME ['(' [testlist] ')'] ':' suite classdef :: { StatementSpan } classdef : 'class' NAME optional_paren_testlist ':' suite { AST.Class $2 $3 $5 (spanning $1 $5) } optional_paren_testlist :: { [ArgumentSpan] } optional_paren_testlist : {- empty -} { [] } | '(' ')' { [] } | '(' testlistrev opt_comma ')' { map (\e -> ArgExpr e (getSpan e)) (reverse $2) } optional_arg_list :: { [ArgumentSpan] } optional_arg_list: opt(arglist) { concat (maybeToList $1) } {- arglist: (argument ',')* (argument [','] |'*' test (',' argument)* [',' '**' test] |'**' test) -} {- We don't follow the grammar rules directly (though we do implement something equivalent). The reason is that there is ambiguity over the optional comma. It is probably okay to allow the optional comma even after the *, and ** forms. It seems more consistent to me. -} arglist :: { [ArgumentSpan] } arglist: sepOptEndBy(oneArgument,',') {% checkArguments $1 } oneArgument : '*' test { ArgVarArgsPos $2 (spanning $1 $2) } | '**' test { ArgVarArgsKeyword $2 (spanning $1 $2) } | argument { $1 } -- argument: test [gen_for] | test '=' test argument :: { ArgumentSpan } argument : NAME '=' test { ArgKeyword $1 $3 (spanning $1 $3) } | test { ArgExpr $1 (getSpan $1) } | test gen_for { let span = spanning $1 $1 in ArgExpr (Generator (makeComprehension $1 $2) span) span } -- list_iter: list_for | list_if list_iter :: { CompIterSpan } list_iter : list_for { AST.IterFor $1 (getSpan $1) } | list_if { AST.IterIf $1 (getSpan $1) } -- list_for: 'for' exprlist 'in' testlist_safe [list_iter] list_for :: { CompForSpan } list_for: 'for' exprlist 'in' testlist_safe opt(list_iter) { AST.CompFor $2 $4 $5 (spanning (spanning $1 $4) $5) } -- list_if: 'if' old_test [list_iter] list_if :: { CompIfSpan } list_if: 'if' old_test opt(list_iter) { AST.CompIf $2 $3 (spanning (spanning $1 $2) $3) } -- gen_iter: gen_for | gen_if gen_iter :: { CompIterSpan } gen_iter : gen_for { AST.IterFor $1 (getSpan $1) } | gen_if { AST.IterIf $1 (getSpan $1) } -- gen_for: 'for' exprlist 'in' or_test [gen_iter] gen_for :: { CompForSpan } gen_for: 'for' exprlist 'in' or_test opt(gen_iter) { AST.CompFor $2 $4 $5 (spanning (spanning $1 $4) $5) } -- gen_if: 'if' old_test [gen_iter] gen_if :: { CompIfSpan } gen_if: 'if' old_test opt(gen_iter) { AST.CompIf $2 $3 (spanning (spanning $1 $2) $3) } -- testlist1: test (',' test)* testlist1 :: { ExprSpan } testlist1: sepBy(test, ',') { makeTupleOrExpr $1 Nothing } -- encoding_decl: NAME -- Not used in the rest of the grammar! -- yield_expr: 'yield' [testlist] yield_expr :: { ExprSpan } yield_expr : 'yield' optional_testlist { AST.Yield $2 (spanning $1 $2) } optional_testlist :: { Maybe ExprSpan } optional_testlist: opt(testlist) { $1 } { -- Put additional Haskell code in here if needed. } language-python-0.4.1/src/Language/Python/Version3/0000755000000000000000000000000012441515301020303 5ustar0000000000000000language-python-0.4.1/src/Language/Python/Version3/Lexer.hs0000644000000000000000000000441012441515301021715 0ustar0000000000000000{-# OPTIONS #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version3.Lexer -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Lexical analysis for Python version 3.x programs. -- See: . ----------------------------------------------------------------------------- module Language.Python.Version3.Lexer ( -- * Lexical analysis lex, lexOneToken, lexer, initLexState ) where import Prelude hiding (lex) import Language.Python.Version3.Parser.Lexer (lexToken, initStartCodeStack) import Language.Python.Common.Token as Token import Language.Python.Common.SrcLocation (initialSrcLocation) import Language.Python.Common.ParserMonad (ParseState (input), P, runParser, execParser, ParseError, initialState) initLexState :: String -> String -> ParseState initLexState input srcName = initialState (initialSrcLocation srcName) input initStartCodeStack -- | Parse a string into a list of Python Tokens, or return an error. lex :: String -- ^ The input stream (python source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError [Token] -- ^ An error or a list of tokens. lex input srcName = execParser lexer $ initLexState input srcName -- | Try to lex the first token in an input string. Return either a parse error -- or a pair containing the next token and the rest of the input after the token. lexOneToken :: String -- ^ The input stream (python source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError (Token, String) -- ^ An error or the next token and the rest of the input after the token. lexOneToken source srcName = case runParser lexToken state of Left err -> Left err Right (tok, state) -> Right (tok, input state) where state = initLexState source srcName -- | Lex a sequence of tokens. lexer :: P [Token] lexer = loop [] where loop toks = do tok <- lexToken case tok of EOFToken {} -> return (reverse toks) other -> loop (tok:toks) language-python-0.4.1/src/Language/Python/Version3/Parser.hs0000644000000000000000000000657412441515301022107 0ustar0000000000000000{-# OPTIONS #-} ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version3.Parser -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- A parser for Python version 3.x programs. Parsers are provided for -- modules, statements, and expressions. The parsers produce comment tokens -- in addition to the abstract syntax tree. -- -- See: -- -- * for an overview of the language. -- -- * for the full grammar. -- -- * for a description of -- the various Python top-levels, which correspond to the parsers provided here. ----------------------------------------------------------------------------- module Language.Python.Version3.Parser ( -- * Parsing modules parseModule, -- * Parsing statements parseStmt, -- * Parsing expressions parseExpr) where import Language.Python.Version3.Parser.Parser (parseFileInput, parseSingleInput, parseEval) import Language.Python.Version3.Parser.Lexer (initStartCodeStack) import Language.Python.Common.AST (ModuleSpan, StatementSpan, ExprSpan) import Language.Python.Common.Token (Token) import Language.Python.Common.SrcLocation (initialSrcLocation) import Language.Python.Common.ParserMonad (execParser, execParserKeepComments, ParseError, initialState) -- | Parse a whole Python source file. Return comments in addition to the parsed module. parseModule :: String -- ^ The input stream (python module source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError (ModuleSpan, [Token]) -- ^ An error or the abstract syntax tree (AST) of the python module and comment tokens. parseModule input srcName = execParserKeepComments parseFileInput state where initLoc = initialSrcLocation srcName state = initialState initLoc input initStartCodeStack -- | Parse one compound statement, or a sequence of simple statements. Generally used for interactive input, such as from the command line of an interpreter. Return comments in addition to the parsed statements. parseStmt :: String -- ^ The input stream (python statement source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError ([StatementSpan], [Token]) -- ^ An error or maybe the abstract syntax tree (AST) of zero or more python statements, plus comments. parseStmt input srcName = execParserKeepComments parseSingleInput state where initLoc = initialSrcLocation srcName state = initialState initLoc input initStartCodeStack -- | Parse an expression. Generally used as input for the \'eval\' primitive. Return comments in addition to the parsed expression. parseExpr :: String -- ^ The input stream (python statement source code). -> String -- ^ The name of the python source (filename or input device). -> Either ParseError (ExprSpan, [Token]) -- ^ An error or maybe the abstract syntax tree (AST) of the python expression, plus comment tokens. parseExpr input srcName = execParserKeepComments parseEval state where initLoc = initialSrcLocation srcName state = initialState initLoc input initStartCodeStack language-python-0.4.1/src/Language/Python/Version3/Parser/0000755000000000000000000000000012441515301021537 5ustar0000000000000000language-python-0.4.1/src/Language/Python/Version3/Parser/Lexer.x0000644000000000000000000002534012441515301023013 0ustar0000000000000000{ ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version3.Parser.Lexer -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Implementation of a lexer for Python version 3.x programs. Generated by -- alex. ----------------------------------------------------------------------------- module Language.Python.Version3.Parser.Lexer (initStartCodeStack, lexToken, endOfFileToken, lexCont) where import Language.Python.Common.Token import Language.Python.Common.ParserMonad hiding (location) import Language.Python.Common.SrcLocation import Language.Python.Common.LexerUtils import qualified Data.Map as Map import Control.Monad (liftM) import Data.List (foldl') import Numeric (readHex, readOct) } -- character sets $lf = \n -- line feed $cr = \r -- carriage return $eol_char = [$lf $cr] -- any end of line character $not_eol_char = ~$eol_char -- anything but an end of line character $white_char = [\ \n\r\f\v\t] $white_no_nl = $white_char # $eol_char $ident_letter = [a-zA-Z_] $digit = 0-9 $non_zero_digit = 1-9 $oct_digit = 0-7 $hex_digit = [$digit a-fA-F] $bin_digit = 0-1 $short_str_char = [^ \n \r ' \" \\] $long_str_char = [. \n] # [' \"] $short_byte_str_char = \0-\127 # [\n \r ' \" \\] $long_byte_str_char = \0-\127 # [' \"] $not_single_quote = [. \n] # ' $not_double_quote = [. \n] # \" -- macro definitions @exponent = (e | E) (\+ | \-)? $digit+ @fraction = \. $digit+ @int_part = $digit+ @point_float = (@int_part? @fraction) | @int_part \. @exponent_float = (@int_part | @point_float) @exponent @float_number = @point_float | @exponent_float @eol_pattern = $lf | $cr $lf | $cr $lf @one_single_quote = ' $not_single_quote @two_single_quotes = '' $not_single_quote @one_double_quote = \" $not_double_quote @two_double_quotes = \"\" $not_double_quote @byte_str_prefix = b | B @raw_str_prefix = r | R @raw_byte_str_prefix = @byte_str_prefix @raw_str_prefix | @raw_str_prefix @byte_str_prefix @backslash_pair = \\ (\\|'|\"|@eol_pattern|$short_str_char) @backslash_pair_bs = \\ (\\|'|\"|@eol_pattern|$short_byte_str_char) @short_str_item_single = $short_str_char|@backslash_pair|\" @short_str_item_double = $short_str_char|@backslash_pair|' @short_byte_str_item_single = $short_byte_str_char|@backslash_pair_bs|\" @short_byte_str_item_double = $short_byte_str_char|@backslash_pair_bs|' @long_str_item_single = $long_str_char|@backslash_pair|@one_single_quote|@two_single_quotes|\" @long_str_item_double = $long_str_char|@backslash_pair|@one_double_quote|@two_double_quotes|' @long_byte_str_item_single = $long_byte_str_char|@backslash_pair_bs|@one_single_quote|@two_single_quotes|\" @long_byte_str_item_double = $long_byte_str_char|@backslash_pair_bs|@one_double_quote|@two_double_quotes|' tokens :- -- these rules below could match inside a string literal, but they -- will not be applied because the rule for the literal will always -- match a longer sequence of characters. \# ($not_eol_char)* { token (\ span lit val -> CommentToken span lit) id } $white_no_nl+ ; -- skip whitespace -- \\ @eol_pattern ; -- line join -- \\ @eol_pattern { endOfLine lexToken } -- line join \\ @eol_pattern { lineJoin } -- line join <0> { @float_number { token FloatToken readFloat } $non_zero_digit $digit* { token IntegerToken read } (@float_number | @int_part) (j | J) { token ImaginaryToken (readFloat.init) } 0+ { token IntegerToken read } 0 (o | O) $oct_digit+ { token IntegerToken read } 0 (x | X) $hex_digit+ { token IntegerToken read } 0 (b | B) $bin_digit+ { token IntegerToken readBinary } } -- String literals <0> { ' @short_str_item_single* ' { mkString stringToken } @raw_str_prefix ' @short_str_item_single* ' { mkString rawStringToken } @byte_str_prefix ' @short_byte_str_item_single* ' { mkString byteStringToken } @raw_byte_str_prefix ' @short_byte_str_item_single* ' { mkString rawByteStringToken } \" @short_str_item_double* \" { mkString stringToken } @raw_str_prefix \" @short_str_item_double* \" { mkString rawStringToken } @byte_str_prefix \" @short_byte_str_item_double* \" { mkString byteStringToken } @raw_byte_str_prefix \" @short_byte_str_item_double* \" { mkString rawByteStringToken } ''' @long_str_item_single* ''' { mkString stringToken } @raw_str_prefix ''' @long_str_item_single* ''' { mkString rawStringToken } @byte_str_prefix ''' @long_byte_str_item_single* ''' { mkString byteStringToken } @raw_byte_str_prefix ''' @long_byte_str_item_single* ''' { mkString rawByteStringToken } \"\"\" @long_str_item_double* \"\"\" { mkString stringToken } @raw_str_prefix \"\"\" @long_str_item_double* \"\"\" { mkString rawStringToken } @byte_str_prefix \"\"\" @long_byte_str_item_double* \"\"\" { mkString byteStringToken } @raw_byte_str_prefix \"\"\" @long_byte_str_item_double* \"\"\" { mkString rawByteStringToken } } -- NOTE: we pass lexToken into some functions as an argument. -- That allows us to define those functions in a separate module, -- which increases code reuse in the lexer (because that code can -- be shared between the lexer for versions 2 and 3 of Python. -- Unfortunately lexToken must be defined in this file because -- it refers to data types which are only included by Alex in -- the generated file (this seems like a limitation in Alex -- that should be improved). <0> { @eol_pattern { bolEndOfLine lexToken bol } } () { dedentation lexToken } -- beginning of line { @eol_pattern { endOfLine lexToken } () { indentation lexToken dedent BOL } } -- beginning of file { -- @eol_pattern ; @eol_pattern { endOfLine lexToken } () { indentation lexToken dedent BOF } } <0> $ident_letter($ident_letter|$digit)* { \loc len str -> keywordOrIdent (take len str) loc } -- operators and separators -- <0> { "(" { openParen LeftRoundBracketToken } ")" { closeParen RightRoundBracketToken } "[" { openParen LeftSquareBracketToken } "]" { closeParen RightSquareBracketToken } "{" { openParen LeftBraceToken } "}" { closeParen RightBraceToken } "->" { symbolToken RightArrowToken } "." { symbolToken DotToken } "..." { symbolToken EllipsisToken } "~" { symbolToken TildeToken } "+" { symbolToken PlusToken } "-" { symbolToken MinusToken } "**" { symbolToken ExponentToken } "*" { symbolToken MultToken } "/" { symbolToken DivToken } "//" { symbolToken FloorDivToken } "%" { symbolToken ModuloToken } "<<" { symbolToken ShiftLeftToken } ">>" { symbolToken ShiftRightToken } "<" { symbolToken LessThanToken } "<=" { symbolToken LessThanEqualsToken } ">" { symbolToken GreaterThanToken } ">=" { symbolToken GreaterThanEqualsToken } "==" { symbolToken EqualityToken } "!=" { symbolToken NotEqualsToken } "^" { symbolToken XorToken } "|" { symbolToken BinaryOrToken } "&&" { symbolToken AndToken } "&" { symbolToken BinaryAndToken } "||" { symbolToken OrToken } ":" { symbolToken ColonToken } "=" { symbolToken AssignToken } "+=" { symbolToken PlusAssignToken } "-=" { symbolToken MinusAssignToken } "*=" { symbolToken MultAssignToken } "/=" { symbolToken DivAssignToken } "%=" { symbolToken ModAssignToken } "**=" { symbolToken PowAssignToken } "&=" { symbolToken BinAndAssignToken } "|=" { symbolToken BinOrAssignToken } "^=" { symbolToken BinXorAssignToken } "<<=" { symbolToken LeftShiftAssignToken } ">>=" { symbolToken RightShiftAssignToken } "//=" { symbolToken FloorDivAssignToken } "," { symbolToken CommaToken } "@" { symbolToken AtToken } \; { symbolToken SemiColonToken } } { -- The lexer starts off in the beginning of file state (bof) initStartCodeStack :: [Int] initStartCodeStack = [bof,0] lexToken :: P Token lexToken = do location <- getLocation input <- getInput startCode <- getStartCode case alexScan (location, input) startCode of AlexEOF -> do -- Ensure there is a newline token before the EOF previousToken <- getLastToken case previousToken of NewlineToken {} -> do -- Ensure that there is sufficient dedent -- tokens for the outstanding indentation -- levels depth <- getIndentStackDepth if depth <= 1 then return endOfFileToken else do popIndent return dedentToken other -> do let insertedNewlineToken = NewlineToken $ mkSrcSpan location location setLastToken insertedNewlineToken return insertedNewlineToken AlexError _ -> lexicalError AlexSkip (nextLocation, rest) len -> do setLocation nextLocation setInput rest lexToken AlexToken (nextLocation, rest) len action -> do setLocation nextLocation setInput rest token <- action (mkSrcSpan location $ decColumn 1 nextLocation) len input setLastToken token return token -- This is called by the Happy parser. lexCont :: (Token -> P a) -> P a lexCont cont = do lexLoop where -- lexLoop :: P a lexLoop = do tok <- lexToken case tok of CommentToken {} -> do addComment tok lexLoop LineJoinToken {} -> lexLoop _other -> cont tok -- a keyword or an identifier (the syntax overlaps) keywordOrIdent :: String -> SrcSpan -> P Token keywordOrIdent str location = return $ case Map.lookup str keywords of Just symbol -> symbol location Nothing -> IdentifierToken location str -- mapping from strings to keywords keywords :: Map.Map String (SrcSpan -> Token) keywords = Map.fromList keywordNames keywordNames :: [(String, SrcSpan -> Token)] keywordNames = [ ("False", FalseToken), ("class", ClassToken), ("finally", FinallyToken), ("is", IsToken), ("return", ReturnToken) , ("None", NoneToken), ("continue", ContinueToken), ("for", ForToken), ("lambda", LambdaToken), ("try", TryToken) , ("True", TrueToken), ("def", DefToken), ("from", FromToken), ("nonlocal", NonLocalToken), ("while", WhileToken) , ("and", AndToken), ("del", DeleteToken), ("global", GlobalToken), ("not", NotToken), ("with", WithToken) , ("as", AsToken), ("elif", ElifToken), ("if", IfToken), ("or", OrToken), ("yield", YieldToken) , ("assert", AssertToken), ("else", ElseToken), ("import", ImportToken), ("pass", PassToken) , ("break", BreakToken), ("except", ExceptToken), ("in", InToken), ("raise", RaiseToken) ] } language-python-0.4.1/src/Language/Python/Version3/Parser/Parser.y0000644000000000000000000007235112441515301023175 0ustar0000000000000000{ ----------------------------------------------------------------------------- -- | -- Module : Language.Python.Version3.Parser.Parser -- Copyright : (c) 2009 Bernie Pope -- License : BSD-style -- Maintainer : bjpop@csse.unimelb.edu.au -- Stability : experimental -- Portability : ghc -- -- Implementation of the Python version 3 parser. Generated by happy. ----------------------------------------------------------------------------- module Language.Python.Version3.Parser.Parser (parseFileInput, parseSingleInput, parseEval) where import Language.Python.Version3.Parser.Lexer import Language.Python.Common.Token as Token import Language.Python.Common.AST as AST import Language.Python.Common.ParserUtils import Language.Python.Common.ParserMonad import Language.Python.Common.SrcLocation import Data.Either (rights, either) import Data.Maybe (maybeToList) } %name parseFileInput file_input %name parseSingleInput single_input %name parseEval eval_input %tokentype { Token } %error { parseError } %monad { P } { thenP } { returnP } %lexer { lexCont } { EOFToken {} } %token '=' { AssignToken {} } '(' { LeftRoundBracketToken {} } ')' { RightRoundBracketToken {} } '[' { LeftSquareBracketToken {} } ']' { RightSquareBracketToken {} } '{' { LeftBraceToken {} } '}' { RightBraceToken {} } ',' { CommaToken {} } ';' { SemiColonToken {} } ':' { ColonToken {} } '+' { PlusToken {} } '-' { MinusToken {} } '*' { MultToken {} } '**' { ExponentToken {} } '/' { DivToken {} } '//' { FloorDivToken {} } '>' { GreaterThanToken {} } '<' { LessThanToken {} } '==' { EqualityToken {} } '>=' { GreaterThanEqualsToken {} } '<=' { LessThanEqualsToken {} } '|' { BinaryOrToken {} } '^' { XorToken {} } '&' { BinaryAndToken {} } '>>' { ShiftRightToken {} } '<<' { ShiftLeftToken {} } '%' { ModuloToken {} } '~' { TildeToken {} } '!=' { NotEqualsToken {} } '.' { DotToken {} } '...' { EllipsisToken {} } '+=' { PlusAssignToken {} } '-=' { MinusAssignToken {} } '*=' { MultAssignToken {} } '/=' { DivAssignToken {} } '%=' { ModAssignToken {} } '**=' { PowAssignToken {} } '&=' { BinAndAssignToken {} } '|=' { BinOrAssignToken {} } '^=' { BinXorAssignToken {} } '<<=' { LeftShiftAssignToken {} } '>>=' { RightShiftAssignToken {} } '//=' { FloorDivAssignToken {} } '@' { AtToken {} } '->' { RightArrowToken {} } 'and' { AndToken {} } 'as' { AsToken {} } 'assert' { AssertToken {} } 'break' { BreakToken {} } 'bytestring' { ByteStringToken {} } 'class' { ClassToken {} } 'continue' { ContinueToken {} } 'dedent' { DedentToken {} } 'def' { DefToken {} } 'del' { DeleteToken {} } 'elif' { ElifToken {} } 'else' { ElseToken {} } 'except' { ExceptToken {} } 'False' { FalseToken {} } 'finally' { FinallyToken {} } 'float' { FloatToken {} } 'for' { ForToken {} } 'from' { FromToken {} } 'global' { GlobalToken {} } 'ident' { IdentifierToken {} } 'if' { IfToken {} } 'imaginary' { ImaginaryToken {} } 'import' { ImportToken {} } 'indent' { IndentToken {} } 'in' { InToken {} } 'integer' { IntegerToken {} } 'is' { IsToken {} } 'lambda' { LambdaToken {} } 'NEWLINE' { NewlineToken {} } 'None' { NoneToken {} } 'nonlocal' { NonLocalToken {} } 'not' { NotToken {} } 'or' { OrToken {} } 'pass' { PassToken {} } 'raise' { RaiseToken {} } 'return' { ReturnToken {} } 'string' { StringToken {} } 'True' { TrueToken {} } 'try' { TryToken {} } 'while' { WhileToken {} } 'with' { WithToken {} } 'yield' { YieldToken {} } %% pair(p,q): p q { ($1, $2) } left(p,q): p q { $1 } right(p,q): p q { $2 } or(p,q) : p { $1 } | q { $1 } either(p,q) : p { Left $1 } | q { Right $1 } opt(p) : { Nothing } | p { Just $1 } rev_list1(p) : p { [$1] } | rev_list1(p) p { $2 : $1 } many1(p) : rev_list1(p) { reverse $1 } many0(p) : many1(p) { $1 } | { [] } sepOptEndBy(p,sep) : sepByRev(p,sep) ',' { reverse $1 } | sepByRev(p,sep) { reverse $1 } sepBy(p,sep): sepByRev(p,sep) { reverse $1 } sepByRev(p,sep) : p { [$1] } | sepByRev(p,sep) sep p { $3 : $1 } NAME :: { IdentSpan } NAME : 'ident' { Ident (token_literal $1) (getSpan $1) } {- Note: newline tokens in the grammar: It seems there are some dubious uses of NEWLINE in the grammar. This is corroborated by this posting: http://mail.python.org/pipermail/python-dev/2005-October/057014.html The general idea is that the lexer does not generate NEWLINE tokens for lines which contain only spaces or comments. However, the grammar sometimes suggests that such tokens may exist. -} -- single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE {- We don't support the newline at the end of a compound stmt because the lexer would not produce a newline there. It seems like a weirdness in the way the interactive input works. -} single_input :: { [StatementSpan] } single_input : 'NEWLINE' { [] } | simple_stmt { $1 } | compound_stmt {- No newline here! -} { [$1] } -- file_input: (NEWLINE | stmt)* ENDMARKER file_input :: { ModuleSpan } file_input : many0(either('NEWLINE',stmt)) {- No need to mention ENDMARKER -} { Module (concat (rights $1)) } -- eval_input: testlist NEWLINE* ENDMARKER eval_input :: { ExprSpan } eval_input : testlist many0('NEWLINE') {- No need to mention ENDMARKER -} { $1 } -- decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE opt_paren_arg_list :: { [ArgumentSpan] } opt_paren_arg_list: opt(paren_arg_list) { concat (maybeToList $1) } paren_arg_list :: { [ArgumentSpan] } paren_arg_list : '(' optional_arg_list ')' { $2 } decorator :: { DecoratorSpan } decorator : '@' dotted_name opt_paren_arg_list 'NEWLINE' { makeDecorator $1 $2 $3 } -- decorators: decorator+ decorators :: { [DecoratorSpan] } decorators : many1(decorator) { $1 } -- decorated: decorators (classdef | funcdef) decorated :: { StatementSpan } decorated : decorators or(classdef,funcdef) { makeDecorated $1 $2 } -- funcdef: 'def' NAME parameters ['->' test] ':' suite funcdef :: { StatementSpan } funcdef : 'def' NAME parameters opt(right('->',test)) ':' suite { makeFun $1 $2 $3 $4 $6 } -- parameters: '(' [typedargslist] ')' parameters :: { [ParameterSpan] } parameters : '(' opt(typedargslist) ')' { concat (maybeToList $2) } {- typedargslist: ((tfpdef ['=' test] ',')* ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) -} {- Same pattern as argslist and varargslist -} typedargslist :: { [ParameterSpan] } typedargslist: sepOptEndBy(one_typedargs_param,',') {% checkParameters $1 } one_typedargs_param :: { ParameterSpan } one_typedargs_param : tfpdef optional_default { makeParam $1 $2 } | '*' opt(tfpdef) { makeStarParam $1 $2 } | '**' tfpdef { makeStarStarParam $1 $2 } optional_default :: { Maybe ExprSpan } optional_default: opt(equals_test) { $1 } equals_test :: { ExprSpan } equals_test: '=' test { $2 } {- tfpdef: NAME [':' test] -} tfpdef :: { (IdentSpan, Maybe ExprSpan) } tfpdef : NAME opt(colon_test) { ($1, $2) } colon_test :: { ExprSpan } colon_test: ':' test { $2 } {- varargslist: ((vfpdef ['=' test] ',')* ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) -} {- There is some tedious similarity in these rules to the ones for TypedArgsList. varargslist is used for lambda functions, and they do not have parentheses around them (unlike function definitions). Therefore lambda parameters cannot have the optional annotations that normal functions can, because the annotations are introduced using a colon. This would cause ambibguity with the colon that marks the end of the lambda parameter list! -} varargslist :: { [ParameterSpan] } varargslist : sepOptEndBy(one_varargs_param,',') {% checkParameters $1 } one_varargs_param :: { ParameterSpan } one_varargs_param : '*' optvfpdef { makeStarParam $1 $2 } | '**' vfpdef { makeStarStarParam $1 ($2, Nothing) } | vfpdef optional_default { makeParam ($1, Nothing) $2 } -- vfpdef: NAME vfpdef :: { IdentSpan } vfpdef : NAME { $1 } optvfpdef :: { Maybe (IdentSpan, Maybe ExprSpan) } optvfpdef : {- empty -} { Nothing } | vfpdef { Just ($1, Nothing) } -- stmt: simple_stmt | compound_stmt stmt :: { [StatementSpan] } stmt : simple_stmt { $1 } | compound_stmt { [$1] } -- simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE simple_stmt :: { [StatementSpan] } simple_stmt : small_stmts opt(';') 'NEWLINE' { reverse $1 } small_stmts :: { [StatementSpan] } small_stmts : small_stmt { [$1] } | small_stmts ';' small_stmt { $3 : $1 } {- small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -} small_stmt :: { StatementSpan } small_stmt : expr_stmt { $1 } | del_stmt { $1 } | pass_stmt { $1 } | flow_stmt { $1 } | import_stmt { $1 } | global_stmt { $1 } | nonlocal_stmt { $1 } | assert_stmt { $1 } -- expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | ('=' (yield_expr|testlist_star_expr))*) expr_stmt :: { StatementSpan } expr_stmt : testlist_star_expr either(many_assign, augassign_yield_or_test_list) { makeAssignmentOrExpr $1 $2 } many_assign :: { [ExprSpan] } many_assign : many0(right('=', yield_or_test_list_star)) { $1 } yield_or_test_list :: { ExprSpan } yield_or_test_list : or(yield_expr,testlist) { $1 } yield_or_test_list_star :: { ExprSpan } yield_or_test_list_star : or(yield_expr,testlist_star_expr) { $1 } augassign_yield_or_test_list :: { (AssignOpSpan, ExprSpan) } augassign_yield_or_test_list : augassign yield_or_test_list { ($1, $2) } -- testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] testlist_star_expr :: { ExprSpan } testlist_star_expr : test_list_star_rev opt_comma { makeTupleOrExpr (reverse $1) $2 } test_list_star_rev :: { [ExprSpan] } test_list_star_rev : or(test,star_expr) { [$1] } | test_list_star_rev ',' or(test,star_expr) { $3 : $1 } {- augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=') -} augassign :: { AssignOpSpan } augassign : '+=' { AST.PlusAssign (getSpan $1) } | '-=' { AST.MinusAssign (getSpan $1) } | '*=' { AST.MultAssign (getSpan $1) } | '/=' { AST.DivAssign (getSpan $1) } | '%=' { AST.ModAssign (getSpan $1) } | '**=' { AST.PowAssign (getSpan $1) } | '&=' { AST.BinAndAssign (getSpan $1) } | '|=' { AST.BinOrAssign (getSpan $1) } | '^=' { AST.BinXorAssign (getSpan $1) } | '<<=' { AST.LeftShiftAssign (getSpan $1) } | '>>=' { AST.RightShiftAssign (getSpan $1) } | '//=' { AST.FloorDivAssign (getSpan $1) } -- del_stmt: 'del' exprlist del_stmt :: { StatementSpan } del_stmt : 'del' exprlist { AST.Delete $2 (spanning $1 $2) } -- pass_stmt: 'pass' pass_stmt :: { StatementSpan } pass_stmt : 'pass' { AST.Pass (getSpan $1) } -- flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt flow_stmt :: { StatementSpan } flow_stmt : break_stmt { $1 } | continue_stmt { $1 } | return_stmt { $1 } | raise_stmt { $1 } | yield_stmt { $1 } -- break_stmt: 'break' break_stmt :: { StatementSpan } break_stmt : 'break' { AST.Break (getSpan $1) } -- continue_stmt: 'continue' continue_stmt :: { StatementSpan } continue_stmt : 'continue' { AST.Continue (getSpan $1) } -- return_stmt: 'return' [testlist] return_stmt :: { StatementSpan } return_stmt : 'return' optional_testlist { makeReturn $1 $2 } -- yield_stmt: yield_expr yield_stmt :: { StatementSpan } yield_stmt : yield_expr { StmtExpr $1 (getSpan $1) } -- raise_stmt: 'raise' [test ['from' test]] raise_stmt :: { StatementSpan } raise_stmt : 'raise' opt(pair(test, opt(right('from', test)))) { AST.Raise (RaiseV3 $2) (spanning $1 $2) } -- import_stmt: import_name | import_from import_stmt :: { StatementSpan } import_stmt: or(import_name, import_from) { $1 } -- import_name: 'import' dotted_as_names import_name :: { StatementSpan } import_name : 'import' dotted_as_names { AST.Import $2 (spanning $1 $2) } {- # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' ('*' | '(' import_as_names ')' | import_as_names)) -} import_from :: { StatementSpan } import_from : 'from' import_module 'import' star_or_as_names { FromImport $2 $4 (spanning $1 $4) } import_module :: { ImportRelativeSpan } import_module: import_module_dots { makeRelative $1 } import_module_dots :: { [Either Token DottedNameSpan] } import_module_dots : '.' { [ Left $1 ] } | '...' { [ Left $1 ] } | dotted_name { [ Right $1 ] } | '.' import_module_dots { Left $1 : $2 } | '...' import_module_dots { Left $1 : $2 } star_or_as_names :: { FromItemsSpan } star_or_as_names : '*' { ImportEverything (getSpan $1) } | '(' import_as_names ')' { $2 } | import_as_names { $1 } -- import_as_name: NAME ['as' NAME] import_as_name :: { FromItemSpan } import_as_name : NAME optional_as_name { FromItem $1 $2 (spanning $1 $2) } -- dotted_as_name: dotted_name ['as' NAME] dotted_as_name :: { ImportItemSpan } dotted_as_name : dotted_name optional_as_name { ImportItem $1 $2 (spanning $1 $2) } -- import_as_names: import_as_name (',' import_as_name)* [','] import_as_names :: { FromItemsSpan } import_as_names : sepOptEndBy(import_as_name, ',') { FromItems $1 (getSpan $1) } -- dotted_as_names: dotted_as_name (',' dotted_as_name)* dotted_as_names :: { [ImportItemSpan] } dotted_as_names : sepBy(dotted_as_name,',') { $1 } -- dotted_name: NAME ('.' NAME)* dotted_name :: { DottedNameSpan } dotted_name : NAME many0(right('.', NAME)) { $1 : $2 } -- global_stmt: 'global' NAME (',' NAME)* global_stmt :: { StatementSpan } global_stmt : 'global' one_or_more_names { AST.Global $2 (spanning $1 $2) } one_or_more_names :: { [IdentSpan] } one_or_more_names: sepBy(NAME, ',') { $1 } -- nonlocal_stmt: 'nonlocal' NAME (',' NAME)* nonlocal_stmt :: { StatementSpan } nonlocal_stmt : 'nonlocal' one_or_more_names { AST.NonLocal $2 (spanning $1 $2) } -- assert_stmt: 'assert' test [',' test] assert_stmt :: { StatementSpan } assert_stmt : 'assert' sepBy(test,',') { AST.Assert $2 (spanning $1 $2) } -- compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated compound_stmt :: { StatementSpan } compound_stmt : if_stmt { $1 } | while_stmt { $1 } | for_stmt { $1 } | try_stmt { $1 } | with_stmt { $1 } | funcdef { $1 } | classdef { $1 } | decorated { $1 } -- if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] if_stmt :: { StatementSpan } if_stmt : 'if' test ':' suite many0(elif) optional_else { Conditional (($2, $4):$5) $6 (spanning (spanning (spanning $1 $4) $5) $6) } elif :: { (ExprSpan, [StatementSpan]) } elif : 'elif' test ':' suite { ($2, $4) } optional_else :: { [StatementSpan] } optional_else : {- empty -} { [] } | 'else' ':' suite { $3 } -- while_stmt: 'while' test ':' suite ['else' ':' suite] while_stmt :: { StatementSpan } while_stmt : 'while' test ':' suite optional_else { AST.While $2 $4 $5 (spanning (spanning $1 $4) $5) } -- for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] for_stmt :: { StatementSpan } for_stmt : 'for' exprlist 'in' testlist ':' suite optional_else { AST.For $2 $4 $6 $7 (spanning (spanning $1 $6) $7) } {- try_stmt: ('try' ':' suite ((except_clause ':' suite)+ ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite)) -} try_stmt :: { StatementSpan } try_stmt : 'try' ':' suite handlers { makeTry $1 $3 $4 } handlers :: { ([HandlerSpan], [StatementSpan], [StatementSpan]) } handlers : one_or_more_except_clauses optional_else optional_finally { ($1, $2, $3) } | 'finally' ':' suite { ([], [], $3) } optional_finally :: { [StatementSpan] } optional_finally : {- empty -} { [] } | 'finally' ':' suite { $3 } one_or_more_except_clauses :: { [HandlerSpan] } one_or_more_except_clauses : many1(handler) { $1 } handler :: { HandlerSpan } handler : except_clause ':' suite { Handler $1 $3 (spanning $1 $3) } -- with_stmt: 'with' with_item (',' with_item)* ':' suite with_stmt :: { StatementSpan } with_stmt : 'with' sepOptEndBy(with_item, ',') ':' suite { AST.With $2 $4 (spanning $1 $4) } -- with_item: test ['as' expr] with_item :: { (ExprSpan, Maybe ExprSpan) } with_item: pair(test,opt(right('as',expr))) { $1 } -- except_clause: 'except' [test ['as' NAME]] -- XXX is this a bug in the grammar? In the online does the target is more complex than a NAME. -- see: http://docs.python.org/3.1/reference/compound_stmts.html#the-try-statement except_clause :: { ExceptClauseSpan } except_clause : 'except' opt(pair(test, optional_as_expr)) { ExceptClause $2 (spanning $1 $2) } optional_as_expr :: { Maybe ExprSpan} optional_as_expr: opt(right('as', test)) { $1 } optional_as_name :: { Maybe IdentSpan } optional_as_name: opt(right('as', NAME)) { $1 } -- suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT -- Note: we don't have a newline before indent b/c it is redundant suite :: { [StatementSpan] } suite : simple_stmt { $1 } | {- no newline here! -} 'indent' many1(stmt) 'dedent' { concat $2 } -- test: or_test ['if' or_test 'else' test] | lambdef test :: { ExprSpan } test : or_test opt(test_if_cond) { makeConditionalExpr $1 $2 } | lambdef { $1 } test_if_cond :: { (ExprSpan, ExprSpan) } test_if_cond: 'if' or_test 'else' test { ($2, $4) } -- test_nocond: or_test | lambdef_nocond test_no_cond :: { ExprSpan } test_no_cond: or(or_test, lambdef_nocond) { $1 } -- lambdef: 'lambda' [varargslist] ':' test lambdef :: { ExprSpan } lambdef : 'lambda' opt_varargslist ':' test { AST.Lambda $2 $4 (spanning $1 $4) } -- lambdef_nocond: 'lambda' [varargslist] ':' test_nocond lambdef_nocond :: { ExprSpan } lambdef_nocond : 'lambda' opt_varargslist ':' test_no_cond { AST.Lambda $2 $4 (spanning $1 $4) } opt_varargslist :: { [ParameterSpan] } opt_varargslist: opt(varargslist) { concat (maybeToList $1) } -- or_test: and_test ('or' and_test)* or_test :: { ExprSpan } or_test : and_test many0(pair(or_op,and_test)) { makeBinOp $1 $2 } or_op :: { OpSpan } or_op: 'or' { AST.Or (getSpan $1) } -- and_test: not_test ('and' not_test)* and_test :: { ExprSpan } and_test : not_test many0(pair(and_op, not_test)) { makeBinOp $1 $2 } and_op :: { OpSpan } and_op: 'and' { AST.And (getSpan $1) } -- not_test: 'not' not_test | comparison not_test :: { ExprSpan } not_test : 'not' not_test { UnaryOp (AST.Not (getSpan $1)) $2 (spanning $1 $2) } | comparison { $1 } -- comparison: expr (comp_op expr)* comparison :: { ExprSpan } comparison : expr many0(pair(comp_op, expr)) { makeBinOp $1 $2 } -- comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not' comp_op :: { OpSpan } comp_op : '<' { AST.LessThan (getSpan $1) } | '>' { AST.GreaterThan (getSpan $1) } | '==' { AST.Equality (getSpan $1) } | '>=' { AST.GreaterThanEquals (getSpan $1) } | '<=' { AST.LessThanEquals (getSpan $1) } | '!=' { AST.NotEquals (getSpan $1) } | 'in' { AST.In (getSpan $1) } | 'not' 'in' { AST.NotIn (spanning $1 $2) } | 'is' { AST.Is (getSpan $1) } | 'is' 'not' { AST.IsNot (spanning $1 $2) } -- star_expr: '*' expr star_expr :: { ExprSpan } star_expr : '*' expr { Starred $2 (spanning $1 $2) } -- expr: xor_expr ('|' xor_expr)* expr :: { ExprSpan } expr : xor_expr many0(pair(bar_op, xor_expr)) { makeBinOp $1 $2 } bar_op :: { OpSpan } bar_op: '|' { AST.BinaryOr (getSpan $1) } -- xor_expr: and_expr ('^' and_expr)* xor_expr :: { ExprSpan } xor_expr : and_expr many0(pair(hat_op, and_expr)) { makeBinOp $1 $2 } hat_op :: { OpSpan } hat_op: '^' { AST.Xor (getSpan $1) } -- and_expr: shift_expr ('&' shift_expr)* and_expr :: { ExprSpan } and_expr : shift_expr many0(pair(ampersand, shift_expr)) { makeBinOp $1 $2 } ampersand :: { OpSpan } ampersand: '&' { AST.BinaryAnd (getSpan $1) } -- shift_expr: arith_expr (('<<'|'>>') arith_expr)* shift_expr :: { ExprSpan } shift_expr: arith_expr many0(pair(shift_op, arith_expr)) { makeBinOp $1 $2 } shift_op :: { OpSpan } shift_op : '<<' { AST.ShiftLeft (getSpan $1) } | '>>' { AST.ShiftRight (getSpan $1) } -- arith_expr: term (('+'|'-') term)* arith_expr :: { ExprSpan } arith_expr: term many0(pair(arith_op, term)) { makeBinOp $1 $2 } arith_op :: { OpSpan } arith_op : '+' { AST.Plus (getSpan $1) } | '-' { AST.Minus (getSpan $1) } -- term: factor (('*'|'/'|'%'|'//') factor)* term :: { ExprSpan } term : factor many0(pair(mult_div_mod_op, factor)) { makeBinOp $1 $2 } mult_div_mod_op :: { OpSpan } mult_div_mod_op : '*' { AST.Multiply (getSpan $1) } | '/' { AST.Divide (getSpan $1) } | '%' { AST.Modulo (getSpan $1) } | '//' { AST.FloorDivide (getSpan $1) } -- factor: ('+'|'-'|'~') factor | power factor :: { ExprSpan } factor : or(arith_op, tilde_op) factor { UnaryOp $1 $2 (spanning $1 $2) } | power { $1 } tilde_op :: { OpSpan } tilde_op: '~' { AST.Invert (getSpan $1) } -- power: atom trailer* ['**' factor] power :: { ExprSpan } power : atom many0(trailer) opt(pair(exponent_op, factor)) { makeBinOp (addTrailer $1 $2) (maybeToList $3) } exponent_op :: { OpSpan } exponent_op: '**' { AST.Exponent (getSpan $1) } {- atom: ('(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' | '{' [dictorsetmaker] '}' | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') -} atom :: { ExprSpan } atom : '(' yield_or_testlist_comp ')' { $2 (spanning $1 $3) } | list_atom { $1 } | dict_or_set_atom { $1 } | NAME { AST.Var $1 (getSpan $1) } | 'integer' { AST.Int (token_integer $1) (token_literal $1) (getSpan $1) } | 'float' { AST.Float (token_double $1) (token_literal $1) (getSpan $1) } | 'imaginary' { AST.Imaginary (token_double $1) (token_literal $1) (getSpan $1) } | many1('string') { AST.Strings (map token_literal $1) (getSpan $1) } | many1('bytestring') { AST.ByteStrings (map token_literal $1) (getSpan $1) } | '...' { AST.Ellipsis (getSpan $1) } | 'None' { AST.None (getSpan $1) } | 'True' { AST.Bool Prelude.True (getSpan $1) } | 'False' { AST.Bool Prelude.False (getSpan $1) } list_atom :: { ExprSpan } list_atom : '[' ']' { List [] (spanning $1 $2) } | '[' testlist_comp ']' { makeListForm (spanning $1 $3) $2 } dict_or_set_atom :: { ExprSpan } dict_or_set_atom : '{' '}' { Dictionary [] (spanning $1 $2) } | '{' dictorsetmaker '}' { $2 (spanning $1 $3) } yield_or_testlist_comp :: { SrcSpan -> ExprSpan } yield_or_testlist_comp : {- empty -} { Tuple [] } | yield_expr { Paren $1 } | testlist_comp { either Paren Generator $1 } -- testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) testlist_comp :: { Either ExprSpan (ComprehensionSpan ExprSpan) } testlist_comp : testlist_star_expr { Left $1 } | or(test,star_expr) comp_for { Right (makeComprehension $1 $2) } -- trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME trailer :: { Trailer } trailer : paren_arg_list { TrailerCall $1 (getSpan $1) } | '[' subscriptlist ']' { TrailerSubscript $2 (spanning $1 $3) } | '.' NAME { TrailerDot $2 (getSpan $1) (spanning $1 $2) } -- subscriptlist: subscript (',' subscript)* [','] subscriptlist :: { [Subscript] } subscriptlist : sepOptEndBy(subscript, ',') { $1 } -- subscript: test | [test] ':' [test] [sliceop] subscript :: { Subscript } subscript : test { SubscriptExpr $1 (getSpan $1) } | opt(test) ':' opt(test) opt(sliceop) { SubscriptSlice $1 $3 $4 (spanning (spanning (spanning $1 $2) $3) $4) } -- sliceop: ':' [test] sliceop :: { Maybe ExprSpan } sliceop : ':' opt(test) { $2 } -- exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] exprlist :: { [ExprSpan] } exprlist: sepOptEndBy(or(expr,star_expr), ',') { $1 } opt_comma :: { Maybe Token } opt_comma : {- empty -} { Nothing } | ',' { Just $1 } -- testlist: test (',' test)* [','] -- Some trickery here because the of the optional trailing comma, which -- could turn a normal expression into a tuple. -- Very occasionally, testlist is used to generate something which is not -- a tuple (such as the square bracket notation in list literals). Therefore -- it would seem like a good idea to not return a tuple in this case, but -- a list of expressions. However this would complicate a lot of code -- since we would have to carry around the optional comma information. -- I've decided to leave it as a tuple, and in special cases, unpack the -- tuple and pull out the list of expressions. testlist :: { ExprSpan } testlist : testlistrev opt_comma { makeTupleOrExpr (reverse $1) $2 } testlistrev :: { [ExprSpan] } testlistrev : test { [$1] } | testlistrev ',' test { $3 : $1 } {- dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | (test (comp_for | (',' test)* [','])) ) -} dictorsetmaker :: { SrcSpan -> ExprSpan } dictorsetmaker : test ':' test dict_rest { makeDictionary ($1, $3) $4 } | test set_rest { makeSet $1 $2 } dict_rest :: { Either CompForSpan [(ExprSpan, ExprSpan)] } dict_rest : comp_for { Left $1 } | zero_or_more_dict_mappings_rev opt_comma { Right (reverse $1) } zero_or_more_dict_mappings_rev :: { [(ExprSpan, ExprSpan)] } zero_or_more_dict_mappings_rev : {- empty -} { [] } | zero_or_more_dict_mappings_rev ',' test ':' test { ($3,$5) : $1 } set_rest :: { Either CompForSpan [ExprSpan] } set_rest : comp_for { Left $1 } | zero_or_more_comma_test_rev opt_comma { Right (reverse $1) } zero_or_more_comma_test_rev :: { [ExprSpan] } zero_or_more_comma_test_rev : {- empty -} { [] } | zero_or_more_comma_test_rev ',' test { $3 : $1 } -- classdef: 'class' NAME ['(' [arglist] ')'] ':' suite classdef :: { StatementSpan } -- classdef: 'class' NAME optional_arg_list ':' suite classdef: 'class' NAME opt_paren_arg_list ':' suite { AST.Class $2 $3 $5 (spanning $1 $5) } optional_arg_list :: { [ArgumentSpan] } optional_arg_list: opt(arglist) { concat (maybeToList $1) } {- arglist: (argument ',')* (argument [','] |'*' test (',' argument)* [',' '**' test] |'**' test) -} {- We don't follow the grammar rules directly (though we do implement something equivalent). The reason is that there is ambiguity over the optional comma. It is probably okay to allow the optional comma even after the *, and ** forms. It seems more consistent to me. -} arglist :: { [ArgumentSpan] } arglist: sepOptEndBy(oneArgument,',') {% checkArguments $1 } oneArgument : '*' test { ArgVarArgsPos $2 (spanning $1 $2) } | '**' test { ArgVarArgsKeyword $2 (spanning $1 $2) } | argument { $1 } -- argument: test [comp_for] | test '=' test # Really [keyword '='] test argument :: { ArgumentSpan } argument : NAME '=' test { ArgKeyword $1 $3 (spanning $1 $3) } | test { ArgExpr $1 (getSpan $1) } | test comp_for { let span = spanning $1 $1 in ArgExpr (Generator (makeComprehension $1 $2) span) span } -- comp_iter: comp_for | comp_if comp_iter :: { CompIterSpan } comp_iter : comp_for { IterFor $1 (getSpan $1) } | comp_if { IterIf $1 (getSpan $1) } -- comp_for: 'for' exprlist 'in' or_test [comp_iter] comp_for :: { CompForSpan } comp_for : 'for' exprlist 'in' or_test opt(comp_iter) { CompFor $2 $4 $5 (spanning (spanning $1 $4) $5) } -- comp_if: 'if' test_nocond [comp_iter] comp_if :: { CompIfSpan } comp_if : 'if' test_no_cond opt(comp_iter) { CompIf $2 $3 (spanning (spanning $1 $2) $3) } -- encoding_decl: NAME -- Not used in the rest of the grammar! -- yield_expr: 'yield' [testlist] yield_expr :: { ExprSpan } yield_expr : 'yield' optional_testlist { AST.Yield $2 (spanning $1 $2) } optional_testlist :: { Maybe ExprSpan } optional_testlist: opt(testlist) { $1 } { -- Put additional Haskell code in here if needed. }