aauss commited on
Commit
3fa3bf9
·
1 Parent(s): 5ec6ad7

Expand escaping of \n \r \t

Browse files
test_of_time_accuracy.py CHANGED
@@ -89,43 +89,65 @@ class TestOfTimeAccuracy(evaluate.Metric):
89
  @staticmethod
90
  def _extract_first_json_object(text: str) -> dict | None:
91
  """
92
- Extracts the first valid JSON object from a string.
93
 
94
- Scans through the text and returns the first valid JSON dictionary found.
95
- This is useful for parsing LLM outputs that may contain JSON mixed with
96
- other text or markdown formatting.
97
 
98
  Args:
99
  text: String that may contain JSON objects
100
 
101
  Returns:
102
- The first JSON dictionary found, or None if no valid JSON dict exists
103
  """
 
 
 
104
  decoder = json.JSONDecoder()
105
- idx, end = 0, len(text)
106
-
107
- while idx < end:
108
- try:
109
- obj, next_idx = decoder.raw_decode(text, idx)
110
- if isinstance(obj, dict):
111
- return obj
112
- idx = next_idx
113
- except json.JSONDecodeError:
114
- # Try escaping newlines and parsing again from this position
115
  try:
116
- # Find the potential JSON substring and escape newlines
117
- remaining = text[idx:]
118
- fixed = remaining.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
119
- obj, _ = decoder.raw_decode(fixed, 0)
120
  if isinstance(obj, dict):
121
  return obj
122
- except (json.JSONDecodeError, ValueError):
123
  pass
124
- idx += 1
125
- except ValueError:
126
- idx += 1
127
  return None
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  @staticmethod
130
  def _parse_reference_label(label_str: str) -> dict | None:
131
  """
 
89
  @staticmethod
90
  def _extract_first_json_object(text: str) -> dict | None:
91
  """
92
+ Extract the first valid JSON object from text.
93
 
94
+ Handles common LLM output issues like unescaped newlines in string
95
+ values (LLMs produce human-readable output, not strict JSON).
 
96
 
97
  Args:
98
  text: String that may contain JSON objects
99
 
100
  Returns:
101
+ The first JSON dictionary found, or None if no valid JSON exists
102
  """
103
+ # Fix unescaped control chars in strings (common LLM issue)
104
+ text = TestOfTimeAccuracy._escape_control_chars_in_strings(text)
105
+
106
  decoder = json.JSONDecoder()
107
+ idx = 0
108
+ while idx < len(text):
109
+ if text[idx] == '{':
 
 
 
 
 
 
 
110
  try:
111
+ obj, _ = decoder.raw_decode(text, idx)
 
 
 
112
  if isinstance(obj, dict):
113
  return obj
114
+ except json.JSONDecodeError:
115
  pass
116
+ idx += 1
 
 
117
  return None
118
 
119
+ @staticmethod
120
+ def _escape_control_chars_in_strings(text: str) -> str:
121
+ """
122
+ Escape literal control characters inside JSON string values.
123
+
124
+ LLMs produce newlines/tabs for readability, but JSON requires them
125
+ to be escaped within strings.
126
+ """
127
+ result = []
128
+ in_string = False
129
+ i = 0
130
+ while i < len(text):
131
+ char = text[i]
132
+ if char == '\\' and in_string and i + 1 < len(text):
133
+ # Preserve existing escape sequences
134
+ result.append(char)
135
+ result.append(text[i + 1])
136
+ i += 2
137
+ continue
138
+ if char == '"':
139
+ in_string = not in_string
140
+ if in_string and char == '\n':
141
+ result.append('\\n')
142
+ elif in_string and char == '\r':
143
+ result.append('\\r')
144
+ elif in_string and char == '\t':
145
+ result.append('\\t')
146
+ else:
147
+ result.append(char)
148
+ i += 1
149
+ return ''.join(result)
150
+
151
  @staticmethod
152
  def _parse_reference_label(label_str: str) -> dict | None:
153
  """
tests/test_arithmetic_scoring.py CHANGED
@@ -12,8 +12,8 @@ arithmetic_test_cases = {
12
  ' "To find the date of the second most important game, we need to subtract 7 days from the date of the most important game. We can do this by counting back 7 days from April 14, 2005. April 14 - 7 days = April 7, 2005", "answer": "2005-04-07"}',
13
  '\n```json\n{\n "explanation": "Step 1: Determine the time it takes the robot to carry a single box. The robot takes 4 hours, 34 minutes, and 30 seconds to carry 2 boxes. We divide this time by 2 to find the time per box.\\n- Hours: 4 / 2 = 2 hours\\n- Minutes: 34 / 2 = 17 minutes\\n- Seconds: 30 / 2 = 15 seconds\\nSo, it takes the robot 2 hours, 17 minutes, and 15 seconds to carry one box.\\n\\nStep 2: Calculate the total time to carry 25 boxes. We multiply the time per box by the total number of boxes (25).\\n- Total Hours: 2 hours/box * 25 boxes = 50 hours\\n- Total Minutes: 17 minutes/box * 25 boxes = 425 minutes\\n- Total Seconds: 15 seconds/box * 25 boxes = 375 seconds\\n\\nStep 3: Convert the calculated time into the standard H:M:S format by carrying over excess seconds and minutes.\\n- Convert seconds to minutes: 375 seconds is equal to 6 minutes and 15 seconds (since 375 / 60 = 6 with a remainder of 15). We add the 6 minutes to our minutes total.\\n- New total: 50 hours, (425 + 6) minutes, 15 seconds -> 50 hours, 431 minutes, 15 seconds.\\n- Convert minutes to hours: 431 minutes is equal to 7 hours and 11 minutes (since 431 / 60 = 7 with a remainder of 11). We add the 7 hours to our hours total.\\n- New total: (50 + 7) hours, 11 minutes, 15 seconds -> 57 hours, 11 minutes, 15 seconds.\\n\\nThe final time is 57 hours, 11 minutes, and 15 seconds.",\n "H": 57,\n "M": 11,\n "S": 15\n}\n```',
14
  '{"explanation": "some explanation", "age": "3319"}',
15
- 'JSON = {"explanation": "Let\'s analyze the available time slots for a 30-minute meeting, considering the constraints and schedules of Clara, William, and Alice.\n\n**1. Define Working Hours:** 9:00 AM to 5:00 PM\n\n**2. Identify Booked Time Slots:**\n\n* Clara: 9:30 AM - 12:30 PM\n* William: 1:30 PM - 3:30 PM\n* Alice: 9:00 AM - 11:00 AM (flexible, but this constraint impacts the meeting time)\n\n**3. Determine Available Time Slots for Clara and William:**\n\n* Clara is available: 9:00 AM - 9:30 AM and 12:30 PM - 5:00 PM\n* William is available: 9:00 AM - 1:30 PM and 3:30 PM - 5:00 PM\n\n**4. Find the Intersection of Clara and William\'s Availability:**\n\n* From 9:00 AM to 9:30 AM: Both are available.\n* From 9:30 AM to 12:30 PM: Clara is booked.\n* From 12:30 PM to 1:30 PM: Both are available.\n* From 1:30 PM to 3:30 PM: William is booked.\n* From 3:30 PM to 5:00 PM: Both are available.\n\nSo, the available time slots for both Clara and William are:\n* 9:00 AM - 9:30 AM\n* 12:30 PM - 1:30 PM\n* 3:30 PM - 5:00 PM\n\n**5. Consider Alice\'s Flexibility:** Since Alice is flexible and can shift her meetings, we don\'t need to consider her booked time slot (9:00 AM - 11:00 AM) when determining the possibilities.\n\n**6. Calculate Possible Meeting Start Times:**\n\n* **9:00 AM - 9:30 AM:** Possible start time: 9:00 AM. (1 possibility)\n* **12:30 PM - 1:30 PM:** Possible start times: 12:30 PM, 1:00 PM. (2 possibilities)\n* **3:30 PM - 5:00 PM:** Possible start times: 3:30 PM, 4:00 PM, 4:30 PM. (3 possibilities)\n\n**7. Total Possible Meeting Times:** 1 + 2 + 3 = 6\n\n", "answer": 6}'
16
-
17
  ],
18
  "references": [
19
  '{"answer": "352 BC"}',
@@ -25,9 +25,21 @@ arithmetic_test_cases = {
25
  '{"H": 57.0, "M": 11.0, "S": 15.0}',
26
  '{"answer": 3319}',
27
  '{"answer": 6}',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ],
29
- "result": {"accuracy": 6 / 9},
30
- "per_item_accuracy": [True, True, True, False,False, False, True, True, True],
31
  }
32
 
33
 
 
12
  ' "To find the date of the second most important game, we need to subtract 7 days from the date of the most important game. We can do this by counting back 7 days from April 14, 2005. April 14 - 7 days = April 7, 2005", "answer": "2005-04-07"}',
13
  '\n```json\n{\n "explanation": "Step 1: Determine the time it takes the robot to carry a single box. The robot takes 4 hours, 34 minutes, and 30 seconds to carry 2 boxes. We divide this time by 2 to find the time per box.\\n- Hours: 4 / 2 = 2 hours\\n- Minutes: 34 / 2 = 17 minutes\\n- Seconds: 30 / 2 = 15 seconds\\nSo, it takes the robot 2 hours, 17 minutes, and 15 seconds to carry one box.\\n\\nStep 2: Calculate the total time to carry 25 boxes. We multiply the time per box by the total number of boxes (25).\\n- Total Hours: 2 hours/box * 25 boxes = 50 hours\\n- Total Minutes: 17 minutes/box * 25 boxes = 425 minutes\\n- Total Seconds: 15 seconds/box * 25 boxes = 375 seconds\\n\\nStep 3: Convert the calculated time into the standard H:M:S format by carrying over excess seconds and minutes.\\n- Convert seconds to minutes: 375 seconds is equal to 6 minutes and 15 seconds (since 375 / 60 = 6 with a remainder of 15). We add the 6 minutes to our minutes total.\\n- New total: 50 hours, (425 + 6) minutes, 15 seconds -> 50 hours, 431 minutes, 15 seconds.\\n- Convert minutes to hours: 431 minutes is equal to 7 hours and 11 minutes (since 431 / 60 = 7 with a remainder of 11). We add the 7 hours to our hours total.\\n- New total: (50 + 7) hours, 11 minutes, 15 seconds -> 57 hours, 11 minutes, 15 seconds.\\n\\nThe final time is 57 hours, 11 minutes, and 15 seconds.",\n "H": 57,\n "M": 11,\n "S": 15\n}\n```',
14
  '{"explanation": "some explanation", "age": "3319"}',
15
+ 'JSON = {"explanation": "Let\'s analyze the available time slots for a 30-minute meeting, considering the constraints and schedules of Clara, William, and Alice.\n\n**1. Define Working Hours:** 9:00 AM to 5:00 PM\n\n**2. Identify Booked Time Slots:**\n\n* Clara: 9:30 AM - 12:30 PM\n* William: 1:30 PM - 3:30 PM\n* Alice: 9:00 AM - 11:00 AM (flexible, but this constraint impacts the meeting time)\n\n**3. Determine Available Time Slots for Clara and William:**\n\n* Clara is available: 9:00 AM - 9:30 AM and 12:30 PM - 5:00 PM\n* William is available: 9:00 AM - 1:30 PM and 3:30 PM - 5:00 PM\n\n**4. Find the Intersection of Clara and William\'s Availability:**\n\n* From 9:00 AM to 9:30 AM: Both are available.\n* From 9:30 AM to 12:30 PM: Clara is booked.\n* From 12:30 PM to 1:30 PM: Both are available.\n* From 1:30 PM to 3:30 PM: William is booked.\n* From 3:30 PM to 5:00 PM: Both are available.\n\nSo, the available time slots for both Clara and William are:\n* 9:00 AM - 9:30 AM\n* 12:30 PM - 1:30 PM\n* 3:30 PM - 5:00 PM\n\n**5. Consider Alice\'s Flexibility:** Since Alice is flexible and can shift her meetings, we don\'t need to consider her booked time slot (9:00 AM - 11:00 AM) when determining the possibilities.\n\n**6. Calculate Possible Meeting Start Times:**\n\n* **9:00 AM - 9:30 AM:** Possible start time: 9:00 AM. (1 possibility)\n* **12:30 PM - 1:30 PM:** Possible start times: 12:30 PM, 1:00 PM. (2 possibilities)\n* **3:30 PM - 5:00 PM:** Possible start times: 3:30 PM, 4:00 PM, 4:30 PM. (3 possibilities)\n\n**7. Total Possible Meeting Times:** 1 + 2 + 3 = 6\n\n", "answer": 6}',
16
+ '{\n "explanation": "First, we calculate the difference in hours, minutes, and seconds between the two times. The start time is 08:30:33 and the end time is 14:33:31. Since the end time is later than the start time, we do not need to consider the next day scenario. \n Hours difference: 14 - 8 = 6\n Minutes difference: 33 - 30 = 3\n Seconds difference: 31 - 33 = -2, but since we cannot have negative seconds, we borrow 1 minute from the minutes difference, making it 2 and adding 60 to the seconds difference, resulting in 59 - 2 = 59",\n "hours": 6,\n "minutes": 2,\n "seconds": 58\n}',
17
  ],
18
  "references": [
19
  '{"answer": "352 BC"}',
 
25
  '{"H": 57.0, "M": 11.0, "S": 15.0}',
26
  '{"answer": 3319}',
27
  '{"answer": 6}',
28
+ "{'hours': 6, 'minutes': 2, 'seconds': 58}",
29
+ ],
30
+ "result": {"accuracy": 7 / 10},
31
+ "per_item_accuracy": [
32
+ True,
33
+ True,
34
+ True,
35
+ False,
36
+ False,
37
+ False,
38
+ True,
39
+ True,
40
+ True,
41
+ True,
42
  ],
 
 
43
  }
44
 
45