Skip to content

Store unescaped string in AST node #65

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest
python -m pip install flake8 pytest pytest-subtests
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
Expand Down
9 changes: 6 additions & 3 deletions luaparser/astnodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,18 +628,21 @@ class String(Expression):
"""Define the Lua string expression.

Attributes:
s: String value.
s: String value in bytes.
raw: Unescaped string
delimiter: The string delimiter
"""

def __init__(
self,
s: str,
s: bytes,
raw: str,
delimiter: StringDelimiter = StringDelimiter.SINGLE_QUOTE,
**kwargs
):
super(String, self).__init__("String", **kwargs)
self.s: str = s
self.s: bytes = s
self.raw: str = raw
self.delimiter: StringDelimiter = delimiter


Expand Down
12 changes: 6 additions & 6 deletions luaparser/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from luaparser.parser.LuaLexer import LuaLexer
from luaparser.parser.LuaParser import LuaParser
from luaparser.parser.LuaParserVisitor import LuaParserVisitor
from luaparser.utils.string_literals import unescape_lua_string

TNode = TypeVar("TNode", bound=Node)

Expand Down Expand Up @@ -659,9 +660,8 @@ def visitString(self, ctx: LuaParser.StringContext):
elif p.match(lua_str):
lua_str = p.search(lua_str).group(1)

# Eval string to unescape:
try:
lua_str = ast.literal_eval(F'"{lua_str}"')
except:
pass
return String(lua_str, delimiter)
if delimiter == StringDelimiter.DOUBLE_QUOTE or delimiter == StringDelimiter.SINGLE_QUOTE:
unescaped_str = unescape_lua_string(lua_str)
else:
unescaped_str = lua_str.encode("utf-8")
return String(unescaped_str, lua_str, delimiter)
23 changes: 21 additions & 2 deletions luaparser/parser/LuaLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,30 @@ fragment EscapeSequence:
| UtfEscape
;

fragment DecimalEscape: '\\' Digit | '\\' Digit Digit | '\\' [0-2] Digit Digit;
fragment DecimalEscape:
'\\'
( Digit
| Digit Digit
| [0-1] Digit Digit
| '2' ('5' [0-5] | [0-4] Digit)
)
;

fragment HexEscape: '\\' 'x' HexDigit HexDigit;

fragment UtfEscape: '\\' 'u{' HexDigit+ '}';
fragment UtfEscape:
'\\' 'u{'
( HexDigit
| HexDigit HexDigit
| HexDigit HexDigit HexDigit
| HexDigit HexDigit HexDigit HexDigit
| HexDigit HexDigit HexDigit HexDigit HexDigit
| HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit
| HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit
| [0-7] HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit
)
'}'
;

fragment Digit: [0-9];

Expand Down
2 changes: 1 addition & 1 deletion luaparser/parser/LuaLexer.interp

Large diffs are not rendered by default.

396 changes: 207 additions & 189 deletions luaparser/parser/LuaLexer.py

Large diffs are not rendered by default.

53 changes: 15 additions & 38 deletions luaparser/printers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ def __init__(self, indent):
def visit(self, node):
return repr(node)

@visitor(bytes)
def visit(self, node):
return repr(node)

@visitor(float)
def visit(self, node):
return str(node)
Expand Down Expand Up @@ -124,41 +128,6 @@ def visit(self, node):
return res


escape_dict = {
"\a": r"\a",
"\b": r"\b",
"\c": r"\c",
"\f": r"\f",
"\n": r"\n",
"\r": r"\r",
"\t": r"\t",
"\v": r"\v",
"'": r"\'",
'"': r"\"",
"\0": r"\0",
"\1": r"\1",
"\2": r"\2",
"\3": r"\3",
"\4": r"\4",
"\5": r"\5",
"\6": r"\6",
"\7": r"\7",
"\8": r"\8",
"\9": r"\9",
}


def raw(text):
"""Returns a raw string representation of text"""
new_string = ""
for char in text:
try:
new_string += escape_dict[char]
except KeyError:
new_string += char
return new_string


class HTMLStyleVisitor:
def __init__(self):
pass
Expand All @@ -172,6 +141,12 @@ def get_xml_string(self, tree):

return minidom.parseString(ElementTree.tostring(doc)).toprettyxml(indent=" ")

@visitor(bytes)
def visit(self, node):
if node.startswith(b'"') and node.endswith(b'"'):
node = node[1:-1]
return node

@visitor(str)
def visit(self, node):
if node.startswith('"') and node.endswith('"'):
Expand Down Expand Up @@ -208,6 +183,8 @@ def visit(self, node):
child_node = self.visit(attrValue)
if type(child_node) is str:
xml_attr.text = child_node
elif type(child_node) is bytes:
pass
elif type(child_node) is list:
xml_attr.extend(child_node)
else:
Expand Down Expand Up @@ -420,11 +397,11 @@ def visit(self, node: Number) -> str:
@visit.register
def visit(self, node: String) -> str:
if node.delimiter == StringDelimiter.SINGLE_QUOTE:
return "'" + self.do_visit(node.s) + "'"
return "'" + self.do_visit(node.raw) + "'"
elif node.delimiter == StringDelimiter.DOUBLE_QUOTE:
return '"' + self.do_visit(node.s) + '"'
return '"' + self.do_visit(node.raw) + '"'
else:
return "[[" + self.do_visit(node.s) + "]]"
return "[[" + self.do_visit(node.raw) + "]]"

@visit.register
def visit(self, node: Table):
Expand Down
2 changes: 1 addition & 1 deletion luaparser/tests/test_comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def test_comment_in_table(self):
),
Field(
Number(1),
String("foo", StringDelimiter.DOUBLE_QUOTE),
String(b"foo", "foo", StringDelimiter.DOUBLE_QUOTE),
comments=[Comment("-- just a value")],
between_brackets=True,
),
Expand Down
26 changes: 13 additions & 13 deletions luaparser/tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,8 +368,8 @@ def test_string_concatenation(self):
targets=[Name("str")],
values=[
Concat(
left=String("begin", StringDelimiter.DOUBLE_QUOTE),
right=String("end", StringDelimiter.DOUBLE_QUOTE),
left=String(b"begin", "begin", StringDelimiter.DOUBLE_QUOTE),
right=String(b"end", "end", StringDelimiter.DOUBLE_QUOTE),
)
],
)
Expand Down Expand Up @@ -429,11 +429,11 @@ def test_dict(self):
[
Field(
Name("foo"),
String("bar", StringDelimiter.DOUBLE_QUOTE),
String(b"bar", "bar", StringDelimiter.DOUBLE_QUOTE),
),
Field(
Name("bar"),
String("foo", StringDelimiter.DOUBLE_QUOTE),
String(b"foo", "foo", StringDelimiter.DOUBLE_QUOTE),
),
]
)
Expand Down Expand Up @@ -467,7 +467,7 @@ def test_nested_dict(self):
[
Field(
Name("car"),
Table([Field(Name("name"), String("bmw"))]),
Table([Field(Name("name"), String(b"bmw", "bmw"))]),
),
Field(
Name("options"),
Expand Down Expand Up @@ -555,15 +555,15 @@ def test_mix_dict_array(self):
),
Field(
Number(1),
String("enabled", StringDelimiter.DOUBLE_QUOTE),
String(b"enabled", "enabled", StringDelimiter.DOUBLE_QUOTE),
between_brackets=True,
),
Field(Number(2), Number(157), between_brackets=True),
Field(
TrueExpr(), FalseExpr(), between_brackets=True
),
Field(
String("true"),
String(b"true", "true"),
TrueExpr(),
between_brackets=True,
),
Expand All @@ -588,7 +588,7 @@ def test_function_call_simple(self):
[
Call(
func=Name("print"),
args=[String("hello", StringDelimiter.DOUBLE_QUOTE)],
args=[String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE)],
)
]
)
Expand All @@ -602,7 +602,7 @@ def test_function_call_no_par_string(self):
[
Call(
func=Name("print"),
args=[String("hello", StringDelimiter.DOUBLE_QUOTE)],
args=[String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE)],
style=CallStyle.NO_PARENTHESIS,
)
]
Expand Down Expand Up @@ -630,7 +630,7 @@ def test_function_invoke(self):
Invoke(
source=Name("foo"),
func=Name("print"),
args=[String("hello", StringDelimiter.DOUBLE_QUOTE)],
args=[String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE)],
)
]
)
Expand All @@ -645,7 +645,7 @@ def test_function_nested_invoke(self):
Invoke(
source=Invoke(source=Name("foo"), func=Name("bar"), args=[]),
func=Name("print"),
args=[String("hello", StringDelimiter.DOUBLE_QUOTE)],
args=[String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE)],
)
]
)
Expand All @@ -660,7 +660,7 @@ def test_function_call_args(self):
Call(
func=Name("print"),
args=[
String("hello", StringDelimiter.DOUBLE_QUOTE),
String(b"hello", "hello", StringDelimiter.DOUBLE_QUOTE),
Number(n=42),
],
)
Expand Down Expand Up @@ -708,7 +708,7 @@ def test_function_exp_invoke(self):
notation=IndexNotation.SQUARE
),
func=Name("hello"),
args=[String("ok", delimiter=StringDelimiter.DOUBLE_QUOTE)],
args=[String(b"ok", "ok", delimiter=StringDelimiter.DOUBLE_QUOTE)],
style=CallStyle.NO_PARENTHESIS,
)
]
Expand Down
14 changes: 8 additions & 6 deletions luaparser/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_cont_int_1(self):
Call(
Name("describe"),
[
String("", StringDelimiter.DOUBLE_QUOTE),
String(b"", "", StringDelimiter.DOUBLE_QUOTE),
AnonymousFunction(
[],
Block(
Expand Down Expand Up @@ -119,7 +119,7 @@ def test_cont_int_3(self):
tree = ast.parse(textwrap.dedent(r"""print(x['a'])"""))
exp = Chunk(Block([Call(
func=Name("print"),
args=[Index(idx=String("a"), value=Name("x"), notation=IndexNotation.SQUARE)],
args=[Index(idx=String(b"a", "a"), value=Name("x"), notation=IndexNotation.SQUARE)],
)]))
self.assertEqual(exp, tree)

Expand Down Expand Up @@ -171,9 +171,10 @@ def test_cont_int_4(self):
id: 'print'
args: [] 1 item
0: {} 1 key
String: {} 4 keys
String: {} 5 keys
wrapped: False
s: 'hello world !'
s: b'hello world !'
raw: 'hello world !'
delimiter: SINGLE_QUOTE
style: DEFAULT
1: {} 1 key
Expand Down Expand Up @@ -255,6 +256,7 @@ def test_cont_int_6(self):
Field(
Name("mykey"),
String(
b"myvalue",
"myvalue",
delimiter=StringDelimiter.DOUBLE_QUOTE,
),
Expand Down Expand Up @@ -323,7 +325,7 @@ def test_cont_int_10(self):
If(
test=AndLoOp(left=GreaterThanOp(left=ULengthOP(Name("setting")), right=Number(10)),
right=EqToOp(left=Name("setting_name"),
right=String("user", StringDelimiter.DOUBLE_QUOTE))),
right=String(b"user", "user", StringDelimiter.DOUBLE_QUOTE))),
body=Block([
Return([Number(100)])
]),
Expand All @@ -340,7 +342,7 @@ def test_cont_int_12(self):
"""))
exp = Chunk(
Block([
Assign([Name("a")], [String("\x00\n\ta", StringDelimiter.SINGLE_QUOTE)])
Assign([Name("a")], [String(b"\x00\n\ta", "\\0\\n\\ta", StringDelimiter.SINGLE_QUOTE)])
])
)
self.assertEqual(exp, tree)
Expand Down
30 changes: 30 additions & 0 deletions luaparser/tests/test_lua_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,33 @@ def test_int_24(self):
def test_parenthesis(self):
source = "a = (1 * 2) + 3"
self.assertEqual(source, ast.to_lua_source(ast.parse(source)))

def test_escapes(self):
escapes = [
r"\a",
r"\b",
r"\f",
r"\n",
r"\r",
r"\t",
r"\v",
r"\\",
r"\"",
r"\'",
r"\z",
r"\x0A",
r"\1",
r"\12",
r"\123",
r"\u{13AA}",
r"\u{1F34B}",
r"\u{7FFFFFFF}",
]
for escape in escapes:
with self.subTest(escape=escape):
source = rf's = "{escape}"'
self.assertEqual(source, ast.to_lua_source(ast.parse(source)))

def test_unicode_string_literal(self):
source = 's = "🖥️"'
self.assertEqual(source, ast.to_lua_source(ast.parse(source)))
Loading