Update paper link and citation in model card

#1
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +21 -29
README.md CHANGED
@@ -1,22 +1,22 @@
1
  ---
2
- license: apache-2.0
 
3
  datasets:
4
  - OpenGVLab/ScaleCUA-Data
5
  language:
6
  - en
 
 
7
  metrics:
8
  - accuracy
9
- base_model:
10
- - Qwen/Qwen2.5-VL-32B-Instruct
11
  pipeline_tag: image-text-to-text
12
- library_name: transformers
13
  tags:
14
  - agent
15
  ---
16
 
17
  # SCALECUA: SCALING UP COMPUTER USE AGENTS WITH CROSS-PLATFORM DATA
18
 
19
- [\[πŸ“‚ GitHub\]](https://github.com/OpenGVLab/ScaleCUA) [\[πŸ“œ Paper\]](https://github.com/OpenGVLab/ScaleCUA) [\[πŸš€ Quick Start\]](#model-loading)
20
 
21
 
22
 
@@ -120,19 +120,13 @@ messages = [
120
  {
121
  "role": "system",
122
  "content":[
123
- {
124
- "type": "text",
125
- "text": SCALECUA_SYSTEM_PROMPT_GROUNDER,
126
- }
127
  ]
128
  },
129
  {
130
  "role": "user",
131
  "content": [
132
- {
133
- "type": "image",
134
- "image": "/path/to/your/image",
135
- },
136
  {"type": "text", "text": low_level_instruction},
137
  ],
138
  }
@@ -171,7 +165,7 @@ def parse_scalecua_grounder_response(response, image_width: int, image_height: i
171
  logger.info(f"Extracting coordinates from: {response}")
172
  match = re.search(r"\((\d+),\s*(\d+)\)", response)
173
  if not match:
174
- pattern = r'\((?:x=)?([-+]?\d*\.\d+|\d+)(?:,\s*(?:y=)?([-+]?\d*\.\d+|\d+))?\)'
175
  match = re.search(pattern, response)
176
  x = int(float(match.group(1)) / resized_width * width)
177
  y = int(float(match.group(2)) / resized_height * height) if match.group(2) else None
@@ -315,7 +309,8 @@ Previous operations:
315
  def format_history(history):
316
  if len(history) > 0:
317
  actions_history = [f"Step {i+1}: {low_level}" for i, low_level in enumerate(history)]
318
- return "\n".join(actions_history)
 
319
  else:
320
  return None
321
 
@@ -333,19 +328,13 @@ messages = [
333
  {
334
  "role": "system",
335
  "content":[
336
- {
337
- "type": "text",
338
- "text": SCALECUA_SYSTEM_PROMPT_AGENT,
339
- }
340
  ]
341
  },
342
  {
343
  "role": "user",
344
  "content": [
345
- {
346
- "type": "image",
347
- "image": "/path/to/your/image",
348
- },
349
  {"type": "text", "text": user_prompt},
350
  ],
351
  }
@@ -382,7 +371,8 @@ def parse_response(response: str) -> Dict:
382
  if action_matches:
383
  for match in action_matches:
384
  # Split each match by newline and strip whitespace from each line
385
- lines = [line.strip() for line in match.split('\n') if line.strip()]
 
386
  actions.extend(lines)
387
  operation_match = re.search(r'<operation>\s*(.*?)\s*</operation>', response, re.DOTALL)
388
  operation = operation_match.group(1).strip() if operation_match else None
@@ -456,12 +446,12 @@ def parse_actions(self, actions):
456
 
457
  else:
458
  if "=" in args_str:
459
- for arg in re.finditer(r"(\w+)=\[([^\]]+)\]", args_str):
460
  param = arg.group(1)
461
  list_str = arg.group(2)
462
 
463
  list_items = []
464
- for item in re.finditer(r"'([^']*)'|\"([^\"]*)\"|([^,\]]+)", list_str):
465
  val = (item.group(1) or item.group(2) or item.group(3)).strip()
466
  if val:
467
  list_items.append(val.strip('"\''))
@@ -529,9 +519,11 @@ If you find our project useful in your research, please consider citing:
529
 
530
  ```bibtex
531
  @article{liu2025scalecua,
532
- title = {ScaleCUA: Scaling Open-Source Computer Use Agents with Cross-Platform Data},
533
- author = {Liu, Zhaoyang and Xie, Jingjing and Ding, Zichen and Li, Zehao and Yang, Bowen and Wu, Zhenyu and Wang, Xuehui and Sun, Qiushi and Liu, Shi and Wang, Weiyun and Ye, Shenglong and Li, Qingyun and Dong, Xuan and Yu, Yue and Lu, Chenyu and Mo, YunXiang and Yan, Yao and Tian, Zeyue and Zhang, Xiao and Huang, Yuan and Liu, Yiqian and Su, Weijie and Luo, Gen and Yue, Xiangyu and Qi, Biqing and Chen, Kai and Zhou, Bowen and Qiao, Yu and Chen, Qifeng and Wang, Wenhai},
534
- year = {2025},
 
 
535
  url = {https://github.com/OpenGVLab/ScaleCUA}
536
  }
537
  ```
 
1
  ---
2
+ base_model:
3
+ - Qwen/Qwen2.5-VL-32B-Instruct
4
  datasets:
5
  - OpenGVLab/ScaleCUA-Data
6
  language:
7
  - en
8
+ library_name: transformers
9
+ license: apache-2.0
10
  metrics:
11
  - accuracy
 
 
12
  pipeline_tag: image-text-to-text
 
13
  tags:
14
  - agent
15
  ---
16
 
17
  # SCALECUA: SCALING UP COMPUTER USE AGENTS WITH CROSS-PLATFORM DATA
18
 
19
+ [\[πŸ“‚ GitHub\]](https://github.com/OpenGVLab/ScaleCUA) [\[πŸ“œ Paper\]](https://huggingface.co/papers/2509.15221) [\[πŸš€ Quick Start\]](#model-loading)
20
 
21
 
22
 
 
120
  {
121
  "role": "system",
122
  "content":[
123
+ {"type": "text", "text": SCALECUA_SYSTEM_PROMPT_GROUNDER,}
 
 
 
124
  ]
125
  },
126
  {
127
  "role": "user",
128
  "content": [
129
+ {"type": "image", "image": "/path/to/your/image",},
 
 
 
130
  {"type": "text", "text": low_level_instruction},
131
  ],
132
  }
 
165
  logger.info(f"Extracting coordinates from: {response}")
166
  match = re.search(r"\((\d+),\s*(\d+)\)", response)
167
  if not match:
168
+ pattern = r'\((?:x=)?([-+]?\d*\.\d+|\d+)(?:,\s*(?:y=)?([-+]?\d*\\.\\d+|\\d+))?\)'
169
  match = re.search(pattern, response)
170
  x = int(float(match.group(1)) / resized_width * width)
171
  y = int(float(match.group(2)) / resized_height * height) if match.group(2) else None
 
309
  def format_history(history):
310
  if len(history) > 0:
311
  actions_history = [f"Step {i+1}: {low_level}" for i, low_level in enumerate(history)]
312
+ return "
313
+ ".join(actions_history)
314
  else:
315
  return None
316
 
 
328
  {
329
  "role": "system",
330
  "content":[
331
+ {"type": "text", "text": SCALECUA_SYSTEM_PROMPT_AGENT,}
 
 
 
332
  ]
333
  },
334
  {
335
  "role": "user",
336
  "content": [
337
+ {"type": "image", "image": "/path/to/your/image",},
 
 
 
338
  {"type": "text", "text": user_prompt},
339
  ],
340
  }
 
371
  if action_matches:
372
  for match in action_matches:
373
  # Split each match by newline and strip whitespace from each line
374
+ lines = [line.strip() for line in match.split('
375
+ ') if line.strip()]
376
  actions.extend(lines)
377
  operation_match = re.search(r'<operation>\s*(.*?)\s*</operation>', response, re.DOTALL)
378
  operation = operation_match.group(1).strip() if operation_match else None
 
446
 
447
  else:
448
  if "=" in args_str:
449
+ for arg in re.finditer(r"(\w+)=\\[([^\\]]+)\\]", args_str):
450
  param = arg.group(1)
451
  list_str = arg.group(2)
452
 
453
  list_items = []
454
+ for item in re.finditer(r"'([^']*)'|\"([^\"]*)\"|([^,\\]]+)", list_str):
455
  val = (item.group(1) or item.group(2) or item.group(3)).strip()
456
  if val:
457
  list_items.append(val.strip('"\''))
 
519
 
520
  ```bibtex
521
  @article{liu2025scalecua,
522
+ title = {ScaleCUA: Scaling Open-Source Computer Use Agents with Cross-Platform Data},
523
+ author = {Liu, Zhaoyang and Xie, Jingjing and Ding, Zichen and Li, Zehao and Yang, Bowen and Wu, Zhenyu and Wang, Xuehui and Sun, Qiushi and Liu, Shi and Wang, Weiyun and Ye, Shenglong and Li, Qingyun and Dong, Xuan and Yu, Yue and Lu, Chenyu and Mo, YunXiang and Yan, Yao and Tian, Zeyue and Zhang, Xiao and Huang, Yuan and Liu, Yiqian and Su, Weijie and Luo, Gen and Yue, Xiangyu and Qi, Biqing and Chen, Kai and Zhou, Bowen and Qiao, Yu and Chen, Qifeng and Wang, Wenhai},
524
+ journal = {arXiv preprint arXiv:2509.15221},
525
+ year = {2025},
526
+ note = {Preprint},
527
  url = {https://github.com/OpenGVLab/ScaleCUA}
528
  }
529
  ```