root/tools/firmware_load_ng.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. addBufRange
  2. findRanges
  3. getBufRangeForIndex
  4. find_Nth_str
  5. find_str
  6. find_next_bytes_range
  7. find_bytes_all
  8. find_next_substr_bytes
  9. find_next_str_bytes_range
  10. find_next_str_bytes_main_fw
  11. find_str_bytes_main_fw
  12. find_next_str_bytes
  13. find_str_bytes
  14. isASCIIstring
  15. adr_get_range
  16. adr_get_range_type
  17. ptr2adr
  18. adr2ptr
  19. adr2ptr_with_data
  20. adr_range_type_str
  21. adr_range_desc_str
  22. adr_is_var
  23. adr_is_main_fw_code
  24. find_u32_adr_range
  25. find_u32_adr
  26. fw_u32
  27. fw_memcmp
  28. adr_hist_reset
  29. adr_hist_index
  30. adr_hist_add
  31. adr_hist_get
  32. isARM
  33. isLDR_PC
  34. isLDR_PC_PC
  35. isSUBW_PC
  36. isADDW_PC
  37. isADD_PC
  38. isSUB_PC
  39. isRETx
  40. isPUSH_LR
  41. isPOP_LR
  42. isPOP_PC
  43. isADDx_imm
  44. isSUBx_imm
  45. isADRx
  46. LDR_PC2valptr_thumb
  47. LDR_PC2valptr_arm
  48. LDR_PC2valptr
  49. LDR_PC2adr
  50. ADRx2adr
  51. ADR2adr
  52. ADR2valptr
  53. LDR_PC2val
  54. LDR_PC_PC_target
  55. B_target
  56. CBx_target
  57. BLXimm_target
  58. BL_target
  59. B_BL_target
  60. B_BL_BLXimm_target
  61. BX_PC_target
  62. get_TBx_PC_info
  63. disasm_iter_new
  64. disasm_iter_free
  65. disasm_iter_set
  66. disasm_iter_init
  67. disasm_iter
  68. disasm_iter_redo
  69. fw_disasm_iter_start
  70. fw_disasm_iter
  71. fw_disasm_iter_single
  72. fw_disasm_adr
  73. fw_search_insn
  74. search_disasm_const_ref
  75. search_disasm_str_ref
  76. search_disasm_calls
  77. search_calls_multi_end
  78. search_disasm_calls_multi
  79. search_disasm_calls_veneer_multi
  80. get_call_const_args
  81. get_direct_jump_target
  82. get_branch_call_insn_target
  83. find_and_get_var_ldr
  84. find_const_ref_match
  85. find_const_ref_call
  86. check_simple_func
  87. find_last_call_from_func
  88. insn_match_seq
  89. reg_in_range
  90. insn_match
  91. insn_match_any
  92. insn_match_find_next
  93. insn_match_find_nth
  94. insn_match_find_next_seq
  95. fw_search_bytes
  96. fw_add_adr_range
  97. find_dryos_vers
  98. firmware_load
  99. do_blx_check
  100. firmware_init_capstone
  101. find_startup_copy
  102. find_exception_vec
  103. firmware_init_data_ranges
  104. firmware_unload

   1 #include <inttypes.h>
   2 #include <stdio.h>
   3 #include <stdint.h>
   4 #include <string.h>
   5 
   6 #include <capstone.h>
   7 
   8 #include "stubs_load.h" // needed for sv in fw struct
   9 #include "firmware_load_ng.h"
  10 
  11 
  12 // Add a valid range to the list
  13 static void addBufRange(firmware *fw, int o, int l)
  14 {
  15     BufRange *n = malloc(sizeof(BufRange));
  16     n->p = fw->buf32 + o;
  17     n->off = o;
  18     n->len = l;
  19     n->next = 0;
  20     if (fw->br == 0)
  21     {
  22         fw->br = n;
  23     }
  24     else
  25     {
  26         fw->last->next = n;
  27     }
  28     fw->last = n;
  29 }
  30 
  31 // Find valid ranges for the firmware dump
  32 static void findRanges(firmware *fw)
  33 {
  34     int i, j, k;
  35 
  36     // Find all the valid ranges for checking (skips over large blocks of 0xFFFFFFFF)
  37     fw->br = 0; fw->last = 0;
  38     k = -1; j = 0;
  39     for (i = 0; i < fw->size32; i++)
  40     {
  41         if (fw->buf32[i] == 0xFFFFFFFF)   // Possible start of block to skip
  42         {
  43             if (k == -1)            // Mark start of possible skip block
  44             {
  45                 k = i;
  46             }
  47         }
  48         else                        // Found end of block ?
  49         {
  50             if (k != -1)
  51             {
  52                 if (i - k > 32)     // If block more than 32 words then we want to skip it
  53                 {
  54                     if (k - j > 8)
  55                     {
  56                         // Add a range record for the previous valid range (ignore short ranges)
  57                         addBufRange(fw,j,k - j);
  58                     }
  59                     j = i;          // Reset valid range start to current position
  60                 }
  61                 k = -1;             // Reset marker for skip block
  62             }
  63         }
  64     }
  65     // Add range for last valid block
  66     if (k != -1)
  67     {
  68         if (k - j > 8)
  69         {
  70             addBufRange(fw,j,k - j);
  71         }
  72     }
  73     else
  74     {
  75         if (i - j > 8)
  76         {
  77             addBufRange(fw,j,i - j);
  78         }
  79     }
  80 }
  81 
  82 // return the buffrange for a given offset or null if not found
  83 BufRange *getBufRangeForIndex(firmware *fw,int i)
  84 {
  85     BufRange *br = fw->br;
  86     while (br) {
  87         if(i >= br->off && i < br->off + br->len) {
  88             return br;
  89         }
  90         br = br->next;
  91     }
  92     return NULL;
  93 }
  94 
  95 // Find the index of a string in the firmware
  96 // Assumes the string starts on a 32bit boundary.
  97 // String + terminating zero byte should be at least 4 bytes long
  98 // Handles multiple string instances
  99 int find_Nth_str(firmware *fw, char *str, int N)
 100 {
 101     int nlen = strlen(str);
 102     uint32_t nm0 = *((uint32_t*)str);
 103     uint32_t *p;
 104     int j;
 105 
 106     BufRange *br = fw->br;
 107     while (br)
 108     {
 109         for (p = br->p, j = 0; j < br->len - nlen/4; j++, p++)
 110         {
 111             if ((nm0 == *p) && ((nlen<=4) || (memcmp(p+1,str+4,nlen-4) == 0)) )
 112             {
 113                 if (--N == 0)
 114                     return j+br->off;
 115             }
 116         }
 117         br = br->next;
 118     }
 119 
 120     return -1;
 121 }
 122 
 123 int find_str(firmware *fw, char *str)
 124 {
 125     return find_Nth_str(fw, str, 1);
 126 }
 127 
 128 // find sequence of bytes, starting from star_adr, up to max_adr, any alignment
 129 // returns firmware address or 0
 130 // use repeated calls to find multiple
 131 // NOTE only handles ROM addresses
 132 uint32_t find_next_bytes_range(firmware *fw, const void *bytes, size_t len, uint32_t start_adr, uint32_t max_adr)
 133 {
 134     if(!start_adr) {
 135         start_adr = fw->base;
 136     }
 137     if(start_adr < fw->base || start_adr >= fw->base + fw->size8) {
 138         fprintf(stderr,"find_next_bytes_range invalid start_adr 0x%08x\n",start_adr);
 139         return 0;
 140     }
 141     if(!max_adr) {
 142         max_adr = fw->base + fw->size8-1;
 143     }
 144     if(max_adr < fw->base || max_adr >= fw->base + fw->size8) {
 145         fprintf(stderr,"find_next_bytes_range invalid max_adr 0x%08x\n",max_adr);
 146         return 0;
 147     }
 148     int end_k = (max_adr - fw->base);
 149     BufRange *p = getBufRangeForIndex(fw,(start_adr - fw->base)/4);
 150     if(!p) {
 151         return 0;
 152     }
 153     int k = start_adr - fw->base;
 154 
 155     while (k < end_k)
 156     {
 157         for (; k < (p->off + p->len)*4; k++)
 158         {
 159             if (memcmp(fw->buf8+k,bytes,len) == 0) {
 160                 return fw->base+k;
 161             }
 162         }
 163         p = p->next;
 164         if(!p) {
 165             break;
 166         }
 167         k = p->off*4;
 168     }
 169     return 0;
 170 }
 171 
 172 // find up to max matching byte sequences, storing addresses in result
 173 // returns count
 174 int find_bytes_all(firmware *fw, const void *bytes, size_t len, uint32_t adr, uint32_t *result, int max)
 175 {
 176     int i;
 177     for(i=0,adr=find_next_bytes_range(fw,bytes,len,0,0); adr && (i < max); adr=find_next_bytes_range(fw,bytes,len,adr+len,0),i++) {
 178         result[i] = adr;
 179     }
 180     return i;
 181 }
 182 
 183 uint32_t find_next_substr_bytes(firmware *fw, const char *str, uint32_t adr)
 184 {
 185     //fprintf(stderr,"find_next_substr_bytes 0x%08x\n",adr);
 186     // strlen excludes null
 187     return find_next_bytes_range(fw,str,strlen(str),adr,0);
 188 }
 189 
 190 uint32_t find_next_str_bytes_range(firmware *fw, const char *str, uint32_t adr,uint32_t max_adr)
 191 {
 192     // +1 to include the null in memcmp
 193     return find_next_bytes_range(fw,str,strlen(str)+1,adr,max_adr);
 194 }
 195 
 196 uint32_t find_next_str_bytes_main_fw(firmware *fw, const char *str, uint32_t adr)
 197 {
 198     // max is end of fw code + 4096, assuming it fits in fw
 199     // while early code could technically load from base - 1k, unlikely
 200     uint32_t max_adr;
 201     if(fw->base + fw->size8 - 4096 > fw->rom_code_search_max_adr) {
 202         max_adr = fw->rom_code_search_max_adr + 4096;
 203     } else {
 204         max_adr = fw->base + fw->size8;
 205     }
 206     return find_next_bytes_range(fw,str,strlen(str)+1,adr,max_adr);
 207 }
 208 
 209 // find a string within range of LDR pc or ADR, starting from main fw
 210 uint32_t find_str_bytes_main_fw(firmware *fw, const char *str)
 211 {
 212     return find_next_str_bytes_main_fw(fw,str,fw->rom_code_search_min_adr);
 213 }
 214 
 215 uint32_t find_next_str_bytes(firmware *fw, const char *str, uint32_t adr)
 216 {
 217     // +1 to include the null in memcmp
 218     return find_next_bytes_range(fw,str,strlen(str)+1,adr,0);
 219 }
 220 
 221 // Find the index of a string in the firmware, can start at any address
 222 // returns firmware address
 223 uint32_t find_str_bytes(firmware *fw, const char *str)
 224 {
 225     return find_next_str_bytes(fw,str,fw->base);
 226 }
 227 
 228 int isASCIIstring(firmware *fw, uint32_t adr)
 229 {
 230     unsigned char *p = (unsigned char*)adr2ptr_with_data(fw, adr);
 231     if(!p) {
 232         return 0;
 233     }
 234     // TODO should avoid running off end of dump
 235     int i;
 236     for (i = 0; (i < 100) && (p[i] != 0); i++)
 237     {
 238         if (!((p[i] == '\r') || (p[i] == '\n') || (p[i] == '\t') || ((p[i] >= 0x20) && (p[i] <= 0x7f))))
 239         {
 240             return 0;
 241         }
 242     }
 243     if ((i >= 2) && (p[i] == 0))
 244         return 1;
 245     return 0;
 246 }
 247 
 248 // return address range struct for adr, or NULL if not in known range
 249 adr_range_t *adr_get_range(firmware *fw, uint32_t adr)
 250 {
 251     int i;
 252     adr_range_t *r=fw->adr_ranges;
 253     for(i=0;i<fw->adr_range_count;i++) {
 254         if(adr >= r->start && adr < r->start + r->bytes) {
 255             return r;
 256         }
 257         r++;
 258     }
 259     return NULL;
 260 }
 261 
 262 // return what kind of range adr is in
 263 int adr_get_range_type(firmware *fw, uint32_t adr)
 264 {
 265     adr_range_t *r=adr_get_range(fw,adr);
 266     if(!r) {
 267         return ADR_RANGE_INVALID;
 268     }
 269     return r->type;
 270 }
 271 
 272 uint32_t ptr2adr(firmware *fw, uint8_t *ptr)
 273 {
 274     // TODO handle copied, or maybe another func to convert?
 275     return (ptr-fw->buf8)+fw->base;
 276 }
 277 
 278 uint8_t* adr2ptr(firmware *fw, uint32_t adr)
 279 {
 280     adr_range_t *r=adr_get_range(fw,adr);
 281     if(!r) {
 282         return NULL;
 283     }
 284     switch(r->type) {
 285         case ADR_RANGE_RAM_CODE:
 286         case ADR_RANGE_ROM:
 287             return (r->buf)+(adr - r->start);
 288         default:
 289             return NULL;
 290     }
 291 }
 292 
 293 uint8_t* adr2ptr_with_data(firmware *fw, uint32_t adr)
 294 {
 295     adr_range_t *r=adr_get_range(fw,adr);
 296     if(!r) {
 297         return NULL;
 298     }
 299     switch(r->type) {
 300         case ADR_RANGE_RAM_CODE:
 301         case ADR_RANGE_INIT_DATA:
 302         case ADR_RANGE_ROM:
 303             return (r->buf)+(adr - r->start);
 304         default:
 305             return NULL;
 306     }
 307 }
 308 
 309 // return constant string describing type
 310 const char* adr_range_type_str(int type)
 311 {
 312     switch(type) {
 313         case ADR_RANGE_INVALID:
 314             return "(invalid)";
 315         case ADR_RANGE_ROM:
 316             return "ROM";
 317         case ADR_RANGE_RAM_CODE:
 318             return "RAM code";
 319         case ADR_RANGE_INIT_DATA:
 320             return "RAM data";
 321         default:
 322             return "(unknown)";
 323     }
 324 }
 325 
 326 // return constant string describing type and flags
 327 const char* adr_range_desc_str(adr_range_t *r)
 328 {
 329     switch(r->type) {
 330         case ADR_RANGE_INVALID:
 331             return "(invalid)";
 332         case ADR_RANGE_ROM:
 333             return "ROM";
 334         case ADR_RANGE_RAM_CODE:
 335             if(r->flags & ADR_RANGE_FL_EVEC) {
 336                 return "EVEC";
 337             } else if(r->flags & ADR_RANGE_FL_TCM) {
 338                 return "TCM code";
 339             }
 340             return "RAM code";
 341         case ADR_RANGE_INIT_DATA:
 342             return "RAM data";
 343         default:
 344             return "(unknown)";
 345     }
 346 }
 347 
 348 // return true if adr is in firmware DATA or BSS
 349 int adr_is_var(firmware *fw, uint32_t adr)
 350 {
 351     return (adr > fw->data_start && adr < fw->memisostart);
 352 }
 353 
 354 // return true if adr is in the ROM search range, or one of the copied RAM code regions
 355 int adr_is_main_fw_code(firmware *fw, uint32_t adr)
 356 {
 357     int adr_type = adr_get_range_type(fw,adr);
 358     if(adr_type == ADR_RANGE_RAM_CODE) {
 359         return 1;
 360     }
 361     if(adr_type != ADR_RANGE_ROM) {
 362         return 0;
 363     }
 364     if(adr < fw->rom_code_search_min_adr  || adr > fw->rom_code_search_max_adr) {
 365         return 0;
 366     }
 367     return 1;
 368 }
 369 
 370 /*
 371 return firmware address of 32 bit value, starting at address "start", up to max
 372 */
 373 uint32_t find_u32_adr_range(firmware *fw, uint32_t val, uint32_t start,uint32_t maxadr)
 374 {
 375     // TODO
 376     if(start == 0) {
 377         start=fw->base;
 378     }
 379     if(start & 3) {
 380         fprintf(stderr,"find_u32_adr unaligned start 0x%08x\n",start);
 381         return 0;
 382     }
 383     uint32_t *p=(uint32_t *)adr2ptr(fw,start);
 384     if(!p) {
 385         fprintf(stderr,"find_u32_adr bad start 0x%08x\n",start);
 386         return 0;
 387     }
 388     uint32_t *p_end;
 389     if(maxadr) {
 390         p_end = (uint32_t *)adr2ptr(fw,maxadr);
 391     } else {
 392         p_end = fw->buf32 + fw->size32 - 1;
 393     }
 394     // TODO should use buf ranges
 395     while(p<=p_end) {
 396         if(*p==val) {
 397             return ptr2adr(fw,(uint8_t *)p);
 398         }
 399         p++;
 400     }
 401     return 0;
 402 }
 403 
 404 // as above, full to end of fw
 405 uint32_t find_u32_adr(firmware *fw, uint32_t val, uint32_t start)
 406 {
 407     return find_u32_adr_range(fw,val,start, fw->base + (fw->size8 -4));
 408 }
 409 
 410 // return u32 value at adr
 411 uint32_t fw_u32(firmware *fw, uint32_t adr)
 412 {
 413     uint32_t *p=(uint32_t *)adr2ptr(fw,adr);
 414     if(!p) {
 415         fprintf(stderr,"fw_u32 bad adr 0x%08x\n",adr);
 416         return 0;
 417     }
 418     return *p;
 419 }
 420 
 421 // memcmp, but using a firmware address, returning 1 adr/size out of range
 422 int fw_memcmp(firmware *fw, uint32_t adr,const void *cmp, size_t n)
 423 {
 424     uint32_t *p=(uint32_t *)adr2ptr(fw,adr);
 425     if(!p) {
 426         return 1;
 427     }
 428     if(n >= fw->size8 - (adr - fw->base)) {
 429         return 1;
 430     }
 431     return memcmp(p,cmp,n);
 432 }
 433 
 434 
 435 // ****** address history functions ******
 436 // reset address history to empty
 437 void adr_hist_reset(adr_hist_t *ah)
 438 {
 439     ah->cur=0;
 440     ah->count=0;
 441     // memset shouldn't be needed
 442     // memset(ah->adrs,0,ADR_HIST_SIZE*4);
 443 }
 444 
 445 // return the index of current entry + i. may be negative or positive, wraps. Does not check validity
 446 int adr_hist_index(adr_hist_t *ah, int i)
 447 {
 448     int r=(ah->cur+i)%ADR_HIST_SIZE;
 449     if(r < 0) {
 450         return ADR_HIST_SIZE + r;
 451     }
 452     return r;
 453 }
 454 
 455 // add an entry to address history
 456 void adr_hist_add(adr_hist_t *ah, uint32_t adr)
 457 {
 458     ah->cur=adr_hist_index(ah,1);
 459     ah->adrs[ah->cur]=adr;
 460     if(ah->count < ADR_HIST_SIZE)  {
 461         ah->count++;
 462     }
 463 }
 464 
 465 // return the i'th previous entry in this history, or 0 if not valid (maybe should be -1?)
 466 // i= 0 = most recently disassembled instruction, if any
 467 uint32_t adr_hist_get(adr_hist_t *ah, int i)
 468 {
 469     if(!ah->count || i > ah->count) {
 470         return 0;
 471     }
 472     return ah->adrs[adr_hist_index(ah,-i)];
 473 }
 474 
 475 // ****** instruction analysis utilities ******
 476 // is insn an ARM instruction?
 477 // like cs_insn_group(cs_handle,insn,ARM_GRP_ARM) but doesn't require handle and doesn't check or report errors
 478 int isARM(cs_insn *insn)
 479 {
 480     int i;
 481     for(i=0;i<insn->detail->groups_count;i++) {
 482         if(insn->detail->groups[i] == ARM_GRP_ARM) {
 483             return 1;
 484         }
 485     }
 486     return 0;
 487 }
 488 
 489 /*
 490 is insn a PC relative load?
 491 */
 492 int isLDR_PC(cs_insn *insn)
 493 {
 494     return insn->id == ARM_INS_LDR
 495            && insn->detail->arm.op_count == 2
 496            && insn->detail->arm.operands[0].type == ARM_OP_REG
 497            && insn->detail->arm.operands[1].type == ARM_OP_MEM
 498            && insn->detail->arm.operands[1].mem.base == ARM_REG_PC;
 499 
 500 }
 501 
 502 /*
 503 is insn a PC relative load to PC?
 504 */
 505 int isLDR_PC_PC(cs_insn *insn)
 506 {
 507     if(!isLDR_PC(insn)) {
 508         return 0;
 509     }
 510     return (insn->detail->arm.operands[0].reg == ARM_REG_PC);
 511 }
 512 
 513 //  subw    rd, pc, #x?
 514 int isSUBW_PC(cs_insn *insn)
 515 {
 516     return(insn->id == ARM_INS_SUBW
 517        && insn->detail->arm.op_count == 3
 518        && insn->detail->arm.operands[0].type == ARM_OP_REG
 519        && insn->detail->arm.operands[0].reg != ARM_REG_PC
 520        && insn->detail->arm.operands[1].type == ARM_OP_REG
 521        && insn->detail->arm.operands[1].reg == ARM_REG_PC
 522        && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 523 }
 524 
 525 //  addw    rd, pc, #x?
 526 int isADDW_PC(cs_insn *insn)
 527 {
 528     return(insn->id == ARM_INS_ADDW
 529        && insn->detail->arm.op_count == 3
 530        && insn->detail->arm.operands[0].type == ARM_OP_REG
 531        && insn->detail->arm.operands[0].reg != ARM_REG_PC
 532        && insn->detail->arm.operands[1].type == ARM_OP_REG
 533        && insn->detail->arm.operands[1].reg == ARM_REG_PC
 534        && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 535 }
 536 
 537 // is insn ADD rd, pc, #x  (only generated for ARM in capstone)
 538 int isADD_PC(cs_insn *insn)
 539 {
 540     return (insn->id == ARM_INS_ADD
 541             && insn->detail->arm.op_count == 3
 542             && insn->detail->arm.operands[0].reg != ARM_REG_PC
 543             && insn->detail->arm.operands[1].type == ARM_OP_REG
 544             && insn->detail->arm.operands[1].reg == ARM_REG_PC
 545             && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 546 }
 547 
 548 // is insn SUB rd, pc, #x  (only generated for ARM in capstone)
 549 int isSUB_PC(cs_insn *insn)
 550 {
 551     return (insn->id == ARM_INS_SUB
 552             && insn->detail->arm.op_count == 3
 553             && insn->detail->arm.operands[0].reg != ARM_REG_PC
 554             && insn->detail->arm.operands[1].type == ARM_OP_REG
 555             && insn->detail->arm.operands[1].reg == ARM_REG_PC
 556             && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 557 }
 558 
 559 // does insn look like a function return?
 560 int isRETx(cs_insn *insn)
 561 {
 562     // BX LR
 563     if(insn->id == ARM_INS_BX
 564             && insn->detail->arm.op_count == 1
 565             && insn->detail->arm.operands[0].type == ARM_OP_REG
 566             && insn->detail->arm.operands[0].reg == ARM_REG_LR) {
 567         return 1;
 568     }
 569 
 570     // TODO LDR pc, [sp], imm is somewhat common, but could also be function pointer call
 571 
 572     // POP. capstone translates LDMFD   SP!,... in arm code to pop
 573     if(insn->id == ARM_INS_POP) {
 574         int i;
 575         for(i=0; i < insn->detail->arm.op_count; i++) {
 576             if(insn->detail->arm.operands[i].type == ARM_OP_REG
 577                 && insn->detail->arm.operands[i].reg == ARM_REG_PC) {
 578                 return 1;
 579             }
 580         }
 581     }
 582     // MOV PC, LR (some tools translate this to RET)
 583     if(insn->id == ARM_INS_MOV
 584             && insn->detail->arm.operands[0].type == ARM_OP_REG
 585             && insn->detail->arm.operands[0].reg == ARM_REG_PC
 586             && insn->detail->arm.operands[1].type == ARM_OP_REG
 587             && insn->detail->arm.operands[1].reg == ARM_REG_LR) {
 588         return 1;
 589     }
 590     return 0;
 591 }
 592 
 593 // does insn push LR (function start -ish)
 594 int isPUSH_LR(cs_insn *insn)
 595 {
 596     if(insn->id != ARM_INS_PUSH) {
 597         return 0;
 598     }
 599     int i;
 600     for(i=0; i < insn->detail->arm.op_count; i++) {
 601         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 602             && insn->detail->arm.operands[i].reg == ARM_REG_LR) {
 603             return 1;
 604         }
 605     }
 606     return 0;
 607 }
 608 
 609 // does insn pop LR (func end before tail call)
 610 int isPOP_LR(cs_insn *insn)
 611 {
 612     if(insn->id != ARM_INS_POP) {
 613         return 0;
 614     }
 615     int i;
 616     for(i=0; i < insn->detail->arm.op_count; i++) {
 617         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 618             && insn->detail->arm.operands[i].reg == ARM_REG_LR) {
 619             return 1;
 620         }
 621     }
 622     return 0;
 623 }
 624 
 625 // does insn pop PC
 626 int isPOP_PC(cs_insn *insn)
 627 {
 628     if(insn->id != ARM_INS_POP) {
 629         return 0;
 630     }
 631     int i;
 632     for(i=0; i < insn->detail->arm.op_count; i++) {
 633         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 634             && insn->detail->arm.operands[i].reg == ARM_REG_PC) {
 635             return 1;
 636         }
 637     }
 638     return 0;
 639 }
 640 
 641 // is the instruction ADD* rx, imm
 642 int isADDx_imm(cs_insn *insn)
 643 {
 644     return ((insn->id == ARM_INS_ADD || insn->id == ARM_INS_ADDW) && insn->detail->arm.operands[1].type == ARM_OP_IMM);
 645 }
 646 // is the instruction SUB* rx, imm
 647 int isSUBx_imm(cs_insn *insn)
 648 {
 649     return (IS_INSN_ID_SUBx(insn->id) && insn->detail->arm.operands[1].type == ARM_OP_IMM);
 650 }
 651 
 652 // is the instruction an ADR or ADR-like instruction?
 653 int isADRx(cs_insn *insn)
 654 {
 655     return ((insn->id == ARM_INS_ADR)
 656         || isSUBW_PC(insn)
 657         || isADDW_PC(insn)
 658         || (isARM(insn) && (isADD_PC(insn) || isSUB_PC(insn))));
 659 }
 660 
 661 // if insn is LDR Rn, [pc,#x] return pointer to value, otherwise null
 662 uint32_t* LDR_PC2valptr_thumb(firmware *fw, cs_insn *insn)
 663 {
 664     if(!isLDR_PC(insn)) {
 665         return NULL;
 666     }
 667     uint32_t adr;
 668     // TODO NOTE doesn't do anything with scale (which can supposedly be neg?),
 669     // appears correct for examples seen so far
 670     adr=(insn->address&~3)+4+insn->detail->arm.operands[1].mem.disp;
 671     return (uint32_t *)adr2ptr(fw,adr);
 672 }
 673 
 674 uint32_t* LDR_PC2valptr_arm(firmware *fw, cs_insn *insn)
 675 {
 676     if(!isLDR_PC(insn)) {
 677         return NULL;
 678     }
 679     uint32_t adr;
 680     // TODO NOTE doesn't do anything with scale (which can supposedly be neg?),
 681     // appears correct for examples seen so far
 682     adr=insn->address+8+insn->detail->arm.operands[1].mem.disp;
 683     return (uint32_t *)adr2ptr(fw,adr);
 684 }
 685 
 686 uint32_t* LDR_PC2valptr(firmware *fw, cs_insn *insn)
 687 {
 688     if(isARM(insn)) {
 689        return LDR_PC2valptr_arm(fw,insn);
 690     } else {
 691        return LDR_PC2valptr_thumb(fw,insn);
 692     }
 693 }
 694 
 695 // return the address of value loaded by LDR rd, [pc, #x] or 0 if not LDR PC
 696 uint32_t LDR_PC2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 697 {
 698     if(!isLDR_PC(insn)) {
 699         return 0;
 700     }
 701     if(isARM(insn)) {
 702        return insn->address+8+insn->detail->arm.operands[1].mem.disp;
 703     } else {
 704        return (insn->address&~3)+4+insn->detail->arm.operands[1].mem.disp;
 705     }
 706 }
 707 
 708 // return value generated by an ADR or ADR-like instruction, or 0 (which should be rarely generated by ADR)
 709 uint32_t ADRx2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 710 {
 711     if(insn->id == ARM_INS_ADR) {
 712         return (insn->address&~3)+4+insn->detail->arm.operands[1].imm;
 713     }
 714     if(isSUBW_PC(insn)) {
 715         return (insn->address&~3)+4-insn->detail->arm.operands[2].imm;
 716     }
 717     if(isADDW_PC(insn)) {
 718         return (insn->address&~3)+4+insn->detail->arm.operands[2].imm;
 719     }
 720     if(isARM(insn)) {
 721         if(isADD_PC(insn)) {
 722             return insn->address+8+insn->detail->arm.operands[2].imm;
 723         }
 724         if(isSUB_PC(insn)) {
 725             return insn->address+8-insn->detail->arm.operands[2].imm;
 726         }
 727     }
 728     return 0;
 729 }
 730 
 731 // return the value generated by an ADR (ie, the location of the value as a firmware address)
 732 // NOTE not checked if it is in dump
 733 uint32_t ADR2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 734 {
 735     if(insn->id != ARM_INS_ADR) {
 736         return 0;
 737     }
 738     // TODO - capstone doesn't appear to generate ADR for ARM
 739     /*
 740     if(cs_insn_group(fw->cs_handle,insn,ARM_GRP_ARM)) {
 741        return 0;
 742     }
 743     */
 744     return (insn->address&~3)+4+insn->detail->arm.operands[1].imm;
 745 }
 746 
 747 // if insn is adr/ AKA ADD Rn, pc,#x return pointer to value, otherwise null
 748 uint32_t* ADR2valptr(firmware *fw, cs_insn *insn)
 749 {
 750     uint32_t adr=ADR2adr(fw,insn);
 751     return (uint32_t *)adr2ptr(fw,adr);
 752 }
 753 
 754 // return value loaded by PC relative LDR instruction, or 0 if out of range
 755 uint32_t LDR_PC2val(firmware *fw, cs_insn *insn)
 756 {
 757     uint32_t *p=LDR_PC2valptr(fw,insn);
 758     if(p) {
 759         return *p;
 760     }
 761     return 0;
 762 }
 763 
 764 // return value loaded by PC relative LDR pc..., or 0 if not matching or out of range
 765 uint32_t LDR_PC_PC_target(firmware *fw, cs_insn *insn)
 766 {
 767     if(!isLDR_PC_PC(insn)) {
 768         return 0;
 769     }
 770     return LDR_PC2val(fw,insn);
 771 }
 772 
 773 // return the target of B instruction, or 0 if current instruction isn't BL
 774 uint32_t B_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 775 {
 776     if(insn->id == ARM_INS_B) {
 777         return insn->detail->arm.operands[0].imm;
 778     }
 779     return 0; // TODO could be valid
 780 }
 781 
 782 
 783 // return the target of CBZ / CBNZ instruction, or 0 if current instruction isn't CBx
 784 uint32_t CBx_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 785 {
 786     if(insn->id == ARM_INS_CBZ || insn->id == ARM_INS_CBNZ) {
 787         return insn->detail->arm.operands[1].imm;
 788     }
 789     return 0; // TODO could be valid
 790 }
 791 
 792 // return the target of BLX instruction, or 0 if current instruction isn't BLX imm
 793 uint32_t BLXimm_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 794 {
 795     if(insn->id == ARM_INS_BLX && insn->detail->arm.operands[0].type == ARM_OP_IMM) {
 796         return insn->detail->arm.operands[0].imm;
 797     }
 798     return 0; // TODO could be valid
 799 }
 800 
 801 
 802 // return the target of BL instruction, or 0 if current instruction isn't BL
 803 uint32_t BL_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 804 {
 805     if(insn->id == ARM_INS_BL) {
 806         return insn->detail->arm.operands[0].imm;
 807     }
 808     return 0; // TODO could be valid
 809 }
 810 
 811 // as above, but also including B for tail calls
 812 uint32_t B_BL_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 813 {
 814     if(insn->id == ARM_INS_B || insn->id == ARM_INS_BL) {
 815         return insn->detail->arm.operands[0].imm;
 816     }
 817     return 0; // TODO could be valid
 818 }
 819 
 820 //
 821 // as above, but also including BLX imm
 822 uint32_t B_BL_BLXimm_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 823 {
 824     if(insn->id == ARM_INS_B
 825         || insn->id == ARM_INS_BL
 826         || (insn->id == ARM_INS_BLX && insn->detail->arm.operands[0].type == ARM_OP_IMM)) {
 827         return insn->detail->arm.operands[0].imm;
 828     }
 829     return 0; // TODO could be valid
 830 }
 831 
 832 // BX PC (mode change, small jump) Does NOT set thumb bit
 833 uint32_t BX_PC_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 834 {
 835     if(insn->id == ARM_INS_BX
 836         && insn->detail->arm.operands[0].type == ARM_OP_REG
 837         && insn->detail->arm.operands[0].reg == ARM_REG_PC) {
 838         if(insn->size == 2) { // thumb
 839             // per arms docs, thumb bx pc from unaligned address is "undefined"
 840             // assume non-instruction
 841             if((insn->address & 2) == 2) {
 842                 return 0;
 843             }
 844             return (uint32_t)(insn->address) + 4;
 845         } else {
 846             return (uint32_t)(insn->address) + 8;
 847         }
 848     }
 849     return 0;
 850 }
 851 
 852 // get the (likely) range of jumptable entries from a pc relative TBB or TBH instruction
 853 // returns 0 on error or if instruction is not TBB/TBH
 854 // returns 1 if instruction is TBB/TBH [PC,...]
 855 int get_TBx_PC_info(firmware *fw,iter_state_t *is, tbx_info_t *ti)
 856 {
 857     if(!(is->insn->id == ARM_INS_TBH || is->insn->id == ARM_INS_TBB) || is->insn->detail->arm.operands[0].mem.base != ARM_REG_PC) {
 858         return 0;
 859     }
 860     ti->start=(uint32_t)is->adr; // after current instruction
 861     ti->first_target=0;
 862     ti->bytes=(is->insn->id == ARM_INS_TBH)?2:1;
 863 
 864     uint32_t max_adr;
 865     // max possible (assuming jumptable is contiguous)
 866     if(ti->bytes==1) {
 867         max_adr=ti->start+(2*255);
 868     } else {
 869         max_adr=ti->start+(2*65535);
 870     }
 871     arm_reg i_reg=is->insn->detail->arm.operands[0].mem.index;
 872     // backtrack looking for
 873     // cmp index reg,#imm
 874     // ...
 875     // bhs ...
 876     int max_backtrack = 8;
 877     if(is->ah.count - 1 < max_backtrack) {
 878         max_backtrack = is->ah.count-1;
 879     }
 880 
 881     int max_count=0;
 882     int found_bhs=0;
 883     int i;
 884     for(i=1;i<=max_backtrack;i++) {
 885         fw_disasm_iter_single(fw,adr_hist_get(&is->ah,i)); // thumb state comes from hist
 886         if(fw->is->insn->id == ARM_INS_B && fw->is->insn->detail->arm.cc == ARM_CC_HS) {
 887             found_bhs=1;
 888             continue;
 889         }
 890         // TODO lots of other ways condition code or reg could be changed in between
 891         if(found_bhs && fw->is->insn->id == ARM_INS_CMP) {
 892             // cmp with correct operands, assume number of jumptable entries
 893             if((arm_reg)fw->is->insn->detail->arm.operands[0].reg == i_reg
 894                 || fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
 895                 max_count = fw->is->insn->detail->arm.operands[1].imm;
 896             }
 897             // otherwise, give up
 898             break;
 899         }
 900     }
 901     if(max_count) {
 902         max_adr = ti->start+max_count*ti->bytes;
 903         //printf("get_TBx_PC_info: max_count %d start 0x%08x max_adr=0x%08x\n",max_count,ti->start,max_adr);
 904     }
 905     uint32_t adr=ti->start;
 906     while(adr < max_adr) {
 907         uint8_t *p=adr2ptr(fw,adr);
 908         if(!p) {
 909             fprintf(stderr,"get_TBx_PC_info: jumptable outside of valid address range at 0x%08x\n",adr);
 910             return 0;
 911         }
 912         uint16_t off;
 913         if(ti->bytes==1) {
 914             off=(uint16_t)*p;
 915         } else {
 916             off=*(uint16_t *)p;
 917         }
 918 
 919         // 0, probably padding at the end (could probably break here)
 920         // note shouldn't be padding on tbh, since aligned for thumb
 921         if(!off) {
 922             break;
 923         }
 924         uint32_t target = ti->start+2*off;
 925         // may indicate non-jumptable entry, if count not found, so don't increment adr
 926         if(target <= adr) {
 927             fprintf(stderr,"get_TBx_PC_info: jumptable target 0x%08x inside jumptable %d at 0x%08x\n",target,off,adr);
 928             break;
 929         }
 930         if(!ti->first_target || target < ti->first_target) {
 931             ti->first_target=target;
 932             if(target < max_adr) {
 933                 max_adr=target; // assume jump table ends at/before first target
 934             }
 935         }
 936         adr+=ti->bytes;
 937     }
 938     // if found count, assume it's right
 939     if(max_count) {
 940         ti->count=max_count;
 941     } else {
 942         // otherwise, use final address
 943         ti->count=(adr-ti->start)/ti->bytes;
 944     }
 945     return 1;
 946 }
 947 
 948 // TODO should have variants of above including LDR pc, [pc, #x] for some of the above
 949 
 950 // ****** disassembly iterator utilities ******
 951 // allocate a new iterator state, optionally initializing at adr (0/invalid OK)
 952 iter_state_t *disasm_iter_new(firmware *fw, uint32_t adr)
 953 {
 954     iter_state_t *is=(iter_state_t *)malloc(sizeof(iter_state_t));
 955     // it doesn't currently appear to matter which handle is used to allocate
 956     // only used for overridable malloc functions and error reporting
 957     is->insn=cs_malloc(fw->cs_handle_arm);
 958     disasm_iter_init(fw,is,adr);
 959     return is;
 960 }
 961 
 962 // free iterator state and associated resources
 963 void disasm_iter_free(iter_state_t *is)
 964 {
 965     cs_free(is->insn,1);
 966     free(is);
 967     return;
 968 }
 969 
 970 // set iterator to adr, without clearing history (for branch following)
 971 // thumb bit in adr sets mode
 972 int disasm_iter_set(firmware *fw, iter_state_t *is, uint32_t adr)
 973 {
 974     // set handle based on thumb bit to allow disassembly
 975     if(ADR_IS_THUMB(adr)) {
 976         is->cs_handle=fw->cs_handle_thumb;
 977         is->thumb=1;
 978         is->insn_min_size=2;
 979         adr=ADR_CLEAR_THUMB(adr);// ADR used for iteration must not contain thumb bit
 980     } else {
 981         is->cs_handle=fw->cs_handle_arm;
 982         is->thumb=0;
 983         is->insn_min_size=4;
 984         if(!ADR_IS_ALIGN4(adr)) {
 985             fprintf(stderr,"disasm_iter_set: unaligned ARM address 0x%08x\n",adr);
 986             is->code=NULL;
 987             is->size=0;
 988             is->adr=0;
 989             return 0;
 990         }
 991     }
 992     uint8_t *p=adr2ptr(fw,adr);
 993     if(!p) {
 994 // TODO invalid currently allowed, for new
 995 //        fprintf(stderr,"disasm_iter_set: bad address 0x%08x\n",adr);
 996         is->code=NULL; // make first iter fail
 997         is->size=0;
 998         is->adr=0;
 999         return 0;
1000     }
1001     // TODO should maybe mark is.insn invalid?
1002     is->code=p;
1003     is->size=fw->size8 - (p-fw->buf8);
1004     is->adr=adr;
1005     return 1;
1006 }
1007 
1008 // initialize iterator state at adr, clearing history
1009 int disasm_iter_init(__attribute__ ((unused))firmware *fw, iter_state_t *is, uint32_t adr)
1010 {
1011     adr_hist_reset(&is->ah);
1012     return disasm_iter_set(fw,is,adr);
1013 }
1014 
1015 // disassemble next instruction, recording address in history
1016 // returns false if state invalid or disassembly fails
1017 // if disassembly fails, is->adr is not incremented
1018 int disasm_iter(__attribute__ ((unused))firmware *fw, iter_state_t *is)
1019 {
1020     // iter_start not called or invalid
1021     if(!is->code) {
1022         return 0;
1023     }
1024     adr_hist_add(&is->ah,(uint32_t)is->adr | is->thumb); // record thumb state to allow backtracking through state changes
1025     return cs_disasm_iter(is->cs_handle, &is->code, &is->size, &is->adr, is->insn);
1026 }
1027 
1028 // re-disassemble the current instruction
1029 // could be useful if turning detail off/on but doesn't seem to help perf much
1030 // NOTE out of date
1031 #if 0
1032 int disasm_iter_redo(firmware *fw,iter_state_t *is) {
1033     if(!is->code || !is->ah.count) {
1034         return 0;
1035     }
1036     is->code -= is->insn->size;
1037     is->adr -= is->insn->size;
1038     is->size += is->insn->size;
1039     // call iter directly, to avoid touching history
1040     return cs_disasm_iter(is->cs_handle, &is->code, &is->size, &is->adr, is->insn);
1041 }
1042 #endif
1043 
1044 // ***** disassembly utilities operating on the default iterator state *****
1045 /*
1046 initialize iter state to begin iterating at adr
1047 history is cleared
1048 */
1049 int fw_disasm_iter_start(firmware *fw, uint32_t adr)
1050 {
1051     return disasm_iter_init(fw,fw->is,adr);
1052 }
1053 
1054 // disassemble the next instruction, updating cached state
1055 int fw_disasm_iter(firmware *fw)
1056 {
1057     return disasm_iter(fw,fw->is);
1058 }
1059 
1060 // disassemble single instruction at given adr, updating cached values
1061 // history is cleared
1062 int fw_disasm_iter_single(firmware *fw, uint32_t adr)
1063 {
1064     fw_disasm_iter_start(fw,adr);
1065     return fw_disasm_iter(fw);
1066 }
1067 
1068 
1069 // ****** standalone disassembly without an iter_state ******
1070 /*
1071 disassemble up to count instructions starting at firmware address adr
1072 allocates and returns insns in insn, can be freed with cs_free(insn, count)
1073 */
1074 #if 0
1075 size_t fw_disasm_adr(firmware *fw, uint32_t adr, unsigned count, cs_insn **insn)
1076 {
1077     uint8_t *p=adr2ptr(fw,adr);
1078     if(!p) {
1079         *insn=NULL; // ?
1080         return 0;
1081     }
1082     return cs_disasm(fw->cs_handle, p, fw->size8 - (p-fw->buf8), adr, count, insn);
1083 }
1084 #endif
1085 
1086 // ***** utilities for searching disassembly over large ranges ******
1087 /*
1088 iterate over firmware disassembling, calling callback described above after each
1089 successful disassembly iteration.  If disassembly fails, the iter state is advanced
1090 minimum instruction size without calling the callback.
1091 starts at address is taken from the iter_state, which should be initialized with
1092 disasm_iter_new(), disasm_iter_init(), or a previous search or iter call.
1093 end defaults to end of ram code or rom code (before init data, if known), based on start
1094 v1 and udata are provided to the callback
1095 */
1096 uint32_t fw_search_insn(firmware *fw, iter_state_t *is, search_insn_fn f, uint32_t v1, void *udata, uint32_t adr_end)
1097 {
1098     uint32_t adr_start=is->adr;
1099     adr_range_t *r_start=adr_get_range(fw,adr_start);
1100     if(!r_start) {
1101         fprintf(stderr,"fw_search_insn: invalid start address 0x%08x\n",adr_start);
1102         return 0;
1103     }
1104 
1105     // default to end of start range
1106     if(!adr_end) {
1107         if(r_start->type == ADR_RANGE_ROM) {
1108             adr_end = fw->rom_code_search_max_adr;
1109         } else {
1110             adr_end=r_start->start + r_start->bytes - is->insn_min_size;
1111         }
1112     }
1113     adr_range_t *r_end=adr_get_range(fw,adr_end);
1114 
1115     if(!r_end) {
1116         fprintf(stderr,"fw_search_insn: invalid end address 0x%08x\n",adr_end);
1117         return 0;
1118     }
1119     // ignore thumb bit on end adr
1120     adr_end=ADR_CLEAR_THUMB(adr_end);
1121 
1122     if((r_start != r_end) || (adr_end < adr_start)) {
1123         fprintf(stderr,"fw_search_insn: invalid address range 0x%08x 0x%08x\n",adr_start,adr_end);
1124         return 0;
1125     }
1126 
1127     uint32_t adr=adr_start;
1128     // don't bother with buf ranges for RAM code
1129     if(r_start->type != ADR_RANGE_ROM) {
1130         while(adr < adr_end) {
1131             if(disasm_iter(fw,is)) {
1132                 uint32_t r=f(fw,is,v1,udata);
1133                 if(r) {
1134                     return r;
1135                 }
1136                 adr=(uint32_t)is->adr; // adr was updated by iter or called sub
1137             } else {
1138                 // disassembly failed
1139                 // increment by minimum instruction size and re-init
1140                 adr=adr+is->insn_min_size;
1141                 if(!disasm_iter_init(fw,is,adr|is->thumb)) {
1142                     fprintf(stderr,"fw_search_insn: disasm_iter_init failed\n");
1143                     return 0;
1144                 }
1145              }
1146         }
1147         return 0;
1148     }
1149     BufRange *br=fw->br;
1150     // TODO might want to (optionally?) turn off details? For now, caller can set, doesn't seem to help perf much
1151     // TODO when searching ROM, could skip over RAM copied areas (currently just limit default range)
1152     while(br && adr < adr_end) {
1153         uint32_t *p_adr=(uint32_t *)adr2ptr(fw,(uint32_t)adr);
1154         uint32_t *br_end = br->p + br->len;
1155         uint32_t adr_chunk_end = ptr2adr(fw,(uint8_t*)br_end);
1156         if(adr_end < adr_chunk_end) {
1157             adr_chunk_end = adr_end;
1158         }
1159         // address is before start of current range, adjust
1160         if(p_adr < br->p) {
1161             adr=ptr2adr(fw,(uint8_t *)br->p);
1162             if(!disasm_iter_init(fw,is,(uint32_t)adr | is->thumb)) {
1163                 return 0;
1164             }
1165             p_adr=(uint32_t *)adr2ptr(fw,(uint32_t)adr);
1166         }
1167         //printf("br:0x%08x-0x%08x\n",ptr2adr(fw,(uint8_t *)br->p),ptr2adr(fw,(uint8_t *)(br->p+br->len)));
1168         while(adr < adr_chunk_end) {
1169             if(disasm_iter(fw,is)) {
1170                 uint32_t r=f(fw,is,v1,udata);
1171                 if(r) {
1172                     return r;
1173                 }
1174                 adr=(uint32_t)is->adr; // adr was updated by iter or called sub
1175             } else {
1176                 // disassembly failed. cs_disarm_iter does not update address
1177                 // increment by half word and re-init
1178                 adr=adr+is->insn_min_size;
1179                 if(!disasm_iter_init(fw,is,adr|is->thumb)) {
1180                     fprintf(stderr,"fw_search_insn: disasm_iter_init failed\n");
1181                     return 0;
1182                 }
1183              }
1184         }
1185         // next range
1186         br=br->next;
1187     }
1188     return 0;
1189 }
1190 
1191 // ****** callbacks for use with fw_search_insn ******
1192 
1193 // search for constant references
1194 uint32_t search_disasm_const_ref(firmware *fw, iter_state_t *is, uint32_t val, __attribute__ ((unused))void *unused)
1195 {
1196 //    printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1197     uint32_t av=ADRx2adr(fw,is->insn);
1198     if(av) {
1199 //        printf("adr 0x%08x\n",av);
1200         if(av == val) {
1201             return (uint32_t)is->insn->address;
1202         }
1203         return 0;
1204     }
1205     uint32_t *pv=LDR_PC2valptr(fw,is->insn);
1206     if(pv) {
1207 //        printf("ldr 0x%08x\n",*pv);
1208         if(*pv == val) {
1209             return (uint32_t)is->insn->address;
1210         }
1211     }
1212     return 0;
1213 }
1214 
1215 // search for string ref
1216 uint32_t search_disasm_str_ref(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t val, void *udata)
1217 {
1218     const char *str=(const char *)udata;
1219 //    printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1220     uint32_t av=ADRx2adr(fw,is->insn);
1221     if(av) {
1222 //        printf("adr 0x%08x\n",av);
1223         char *cmp=(char *)adr2ptr_with_data(fw,av);
1224         if(cmp && (strcmp(cmp,str) == 0)) {
1225             return (uint32_t)is->insn->address;
1226         }
1227         return 0;
1228     }
1229     uint32_t *pv=LDR_PC2valptr(fw,is->insn);
1230     if(pv) {
1231 //        printf("ldr 0x%08x\n",*pv);
1232         char *cmp=(char *)adr2ptr_with_data(fw,*pv);
1233         if(cmp && (strcmp(cmp,str) == 0)) {
1234             return (uint32_t)is->insn->address;
1235         }
1236     }
1237     return 0;
1238 }
1239 
1240 // search for calls/jumps to immediate addresses
1241 // thumb bit in address should be set appropriately
1242 // returns 1 if found, address can be obtained from insn
1243 uint32_t search_disasm_calls(firmware *fw, iter_state_t *is, uint32_t val, __attribute__ ((unused))void *unused)
1244 {
1245     //printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1246     uint32_t sub=get_branch_call_insn_target(fw,is);
1247     if(sub) {
1248         if(sub == val) {
1249             return 1;
1250         }
1251     }
1252     return 0;
1253 }
1254 
1255 // a search_calls_multi_fn that just returns 1
1256 int search_calls_multi_end(__attribute__ ((unused))firmware *fw, __attribute__ ((unused))iter_state_t *is, __attribute__ ((unused))uint32_t adr) {
1257     return 1;
1258 }
1259 
1260 
1261 // Search for calls to multiple functions (more efficient than multiple passes)
1262 // if adr is found in null terminated search_calls_multi_data array, returns fn return value
1263 // otherwise 0
1264 uint32_t search_disasm_calls_multi(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t unused, void *userdata)
1265 {
1266     search_calls_multi_data_t *data=(search_calls_multi_data_t *)userdata;
1267     uint32_t sub=get_branch_call_insn_target(fw,is);
1268     if(sub) {
1269         while(data->adr) {
1270             if(data->adr == sub) {
1271                 return data->fn(fw,is,sub);
1272             }
1273             data++;
1274         }
1275     }
1276     return 0;
1277 }
1278 
1279 // as above, but check for single level of veneer
1280 uint32_t search_disasm_calls_veneer_multi(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t unused, void *userdata)
1281 {
1282     search_calls_multi_data_t *data=(search_calls_multi_data_t *)userdata;
1283     uint32_t sub=get_branch_call_insn_target(fw,is);
1284     if(sub) {
1285         while(data->adr) {
1286             if(data->adr == sub) {
1287                 return data->fn(fw,is,sub);
1288             }
1289             data++;
1290         }
1291         uint32_t veneer=0;
1292         fw_disasm_iter_single(fw,sub);
1293         veneer=get_branch_call_insn_target(fw,fw->is);
1294         data=(search_calls_multi_data_t *)userdata;
1295         while(data->adr) {
1296             if(data->adr == veneer) {
1297                 return data->fn(fw,is,sub);
1298             }
1299             data++;
1300         }
1301     }
1302     return 0;
1303 }
1304 
1305 // ****** utilities for extracting register values ******
1306 /*
1307 backtrack through is_init state history picking up constants loaded into r0-r3
1308 return bitmask of regs with values found
1309 affects fw->is, does not affect is_init
1310 
1311 NOTE values may be inaccurate for many reasons, doesn't track all reg affecting ops,
1312 doesn't account for branches landing in the middle of inspected code
1313 doesn't account for many conditional cases
1314 */
1315 int get_call_const_args(firmware *fw, iter_state_t *is_init, int max_backtrack, uint32_t *res)
1316 {
1317     int i;
1318     /*
1319     static int dbg_count=0;
1320     if(is_init->insn->address==...) {
1321         dbg_count=1;
1322     } else {
1323         dbg_count=0;
1324     }
1325     */
1326 
1327     // init regs to zero (to support adds etc)
1328     for (i=0;i<4;i++) {
1329         res[i]=0;
1330     }
1331 
1332     // count includes current instruction (i.e. BL of call)
1333     if(is_init->ah.count <= 1) {
1334         return 0;
1335     }
1336     if(is_init->ah.count - 1 < max_backtrack) {
1337         /*
1338         if(dbg_count > 0) {
1339             printf("max_backtrack %d hist count %d\n",max_backtrack,is_init->ah.count);
1340         }
1341         */
1342         max_backtrack = is_init->ah.count-1;
1343     }
1344     uint32_t found_bits=0; // registers with known const values
1345     uint32_t known_bits=0; // registers with some value
1346 
1347     for(i=1;i<=max_backtrack && known_bits !=0xf;i++) {
1348         // TODO going backwards and calling start each time inefficient
1349         // forward could also find multi-instruction constants in some cases (e.g mov + add, movw + movt)
1350         fw_disasm_iter_single(fw,adr_hist_get(&is_init->ah,i)); // thumb state comes from hist
1351         /*
1352         if(dbg_count > 0) {
1353             printf("backtrack %d:%d  ",dbg_count,i);
1354             printf("%"PRIx64" %s %s\n",fw->is->insn->address,fw->is->insn->mnemonic, fw->is->insn->op_str);
1355         }
1356         */
1357         arm_insn insn_id = fw->is->insn->id;
1358         // BL, BLX etc will trash r0-r3
1359         // only break on unconditional - optimistic, could produce incorrect results
1360         if((insn_id == ARM_INS_BL || insn_id == ARM_INS_BLX
1361             // B/BX could mean execution goes somewhere totally different, but in practice it often just skipping over a word of data...
1362              /*|| insn_id == ARM_INS_B || insn_id == ARM_INS_BX*/)
1363              && fw->is->insn->detail->arm.cc == ARM_CC_AL) {
1364             break;
1365         }
1366 
1367         // if the first op isn't REG, continue
1368         // TODO lots of instructions could affect reg even if not first op
1369         if(fw->is->insn->detail->arm.operands[0].type != ARM_OP_REG) {
1370             continue;
1371         }
1372         arm_reg rd = fw->is->insn->detail->arm.operands[0].reg;
1373         // capstone arm.h regs enum R0-R12 are ordered
1374         // enum has entries before R0
1375         if(rd < ARM_REG_R0 || rd > ARM_REG_R3) {
1376             continue;
1377         }
1378 
1379         int rd_i = rd - ARM_REG_R0;
1380         uint32_t rd_bit = 1 << rd_i;
1381         // if we don't already have something for this reg
1382         if(!(known_bits & rd_bit)) {
1383             // know something has been done to this reg
1384             // note doesn't account for conditionals
1385             known_bits |=rd_bit;
1386             // is it an LDR
1387             uint32_t *pv=LDR_PC2valptr(fw,fw->is->insn);
1388             if(pv) {
1389                 res[rd_i] += *pv;
1390 //                if(dbg_count) printf("found ldr r%d,=0x%08x\n",rd_i,res[rd_i]);
1391                 found_bits |=rd_bit;
1392                 continue;
1393             }
1394             uint32_t v=ADRx2adr(fw,fw->is->insn); // assumes ADR doesn't generate 0, probably safe
1395             if(v) {
1396                 res[rd_i] += v;
1397 //                 if(dbg_count) printf("found adrx r%d,0x%08x\n",rd_i,res[rd_i]);
1398                 found_bits |=rd_bit;
1399                 continue;
1400             }
1401             // immediate MOV note MOVT combinations, not accounted for, some handled ADDs below
1402             if( IS_INSN_ID_MOVx(insn_id)
1403                 && fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
1404                 res[rd_i] += fw->is->insn->detail->arm.operands[1].imm;
1405 //                if(dbg_count) printf("found move r%d,#0x%08x\n",rd_i,res[rd_i]);
1406                 found_bits |=rd_bit;
1407             } else if(isADDx_imm(fw->is->insn)) {
1408                 res[rd_i] += fw->is->insn->detail->arm.operands[1].imm;
1409 //                if(dbg_count) printf("found add r%d,#0x%08x\n",rd_i,res[rd_i]);
1410                 // pretend reg is not known
1411                 known_bits ^=rd_bit;
1412                 // do not set found bit here
1413             } else if(isSUBx_imm(fw->is->insn)) {
1414                 res[rd_i] = (int)(res[rd_i]) - fw->is->insn->detail->arm.operands[1].imm;
1415 //                if(dbg_count) printf("found add r%d,#0x%08x\n",rd_i,res[rd_i]);
1416                 // pretend reg is not known
1417                 known_bits ^=rd_bit;
1418                 // do not set found bit here
1419             }/* else {
1420             }
1421             */
1422         }
1423     }
1424 //    if(dbg_count) printf("get_call_const_args found 0x%08x\n",found_bits);
1425     return found_bits;
1426 }
1427 
1428 /*
1429 starting from is_init, look for a direct jump, such as
1430  B <target>
1431  LDR PC, [pc, #x]
1432  BX PC
1433  movw ip, #x
1434  movt ip, #x
1435  bx ip
1436 if found, return target address with thumb bit set appropriately
1437 NOTE does not check for conditional
1438 uses fw->is
1439 does not check CBx, since it would generally be part of a function not a veneer
1440 */
1441 uint32_t get_direct_jump_target(firmware *fw, iter_state_t *is_init)
1442 {
1443     uint32_t adr=B_target(fw,is_init->insn);
1444     // B ... return with thumb set to current mode
1445     if(adr) {
1446         return (adr | is_init->thumb);
1447     }
1448     adr=LDR_PC_PC_target(fw,is_init->insn);
1449     // LDR pc #... thumb is set in the loaded address
1450     if(adr) {
1451         return adr;
1452     }
1453     // BX PC
1454     adr=BX_PC_target(fw,is_init->insn);
1455     if(adr) {
1456         // bx swaps mode
1457         if(is_init->thumb) {
1458             return ADR_CLEAR_THUMB(adr);
1459         } else {
1460             return ADR_SET_THUMB(adr);
1461         }
1462     }
1463     // an immediate move to ip (R12), candidate for multi-instruction veneer
1464     if((is_init->insn->id == ARM_INS_MOV || is_init->insn->id == ARM_INS_MOVW)
1465         && is_init->insn->detail->arm.operands[0].reg == ARM_REG_IP
1466         && is_init->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
1467         adr = is_init->insn->detail->arm.operands[1].imm;
1468         // iter in default state, starting from is_init
1469         if(!fw_disasm_iter_single(fw,is_init->adr | is_init->thumb)) {
1470             fprintf(stderr,"get_direct_jump_target: disasm single failed at 0x%"PRIx64"\n",fw->is->insn->address);
1471             return 0;
1472         }
1473         // check for MOVT ip, #x
1474         if(!(fw->is->insn->id == ARM_INS_MOVT
1475             && fw->is->insn->detail->arm.operands[0].reg == ARM_REG_IP
1476             && fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM)) {
1477 // doesn't match second two insn veneer, not really an error
1478 //            fprintf(stderr,"get_direct_jump_target: not 2 insn ip veneer 0x%"PRIx64"\n",fw->is->insn->address);
1479             return 0;
1480         }
1481         // thumb set in loaded adr
1482         adr = (fw->is->insn->detail->arm.operands[1].imm << 16) | (adr&0xFFFF);
1483         if(!fw_disasm_iter(fw)) {
1484             fprintf(stderr,"get_direct_jump_target: disasm 2 failed at 0x%"PRIx64"\n",fw->is->insn->address);
1485             return 0;
1486         }
1487         // BX ip ?
1488         if(fw->is->insn->id == ARM_INS_BX
1489             && fw->is->insn->detail->arm.operands[0].type == ARM_OP_REG
1490             && fw->is->insn->detail->arm.operands[0].reg == ARM_REG_IP) {
1491             return adr;
1492         }
1493     }
1494     return 0;
1495 }
1496 
1497 /*
1498 return target of any single instruction branch or function call instruction,
1499 with thumb bit set appropriately
1500 returns 0 if current instruction not branch/call
1501 */
1502 uint32_t get_branch_call_insn_target(firmware *fw, iter_state_t *is)
1503 {
1504     uint32_t adr=B_BL_target(fw,is->insn);
1505     if(adr) {
1506         return (adr | is->thumb);
1507     }
1508     // CBx only exists in thumb
1509     if(is->thumb) {
1510         adr=CBx_target(fw,is->insn);
1511         if(adr) {
1512             return ADR_SET_THUMB(adr);
1513         }
1514     }
1515 
1516     adr=BLXimm_target(fw,is->insn);
1517     if(adr) {
1518         if(is->thumb) {
1519             return adr;
1520         } else {
1521             return adr | is->thumb;
1522         }
1523     }
1524 
1525     adr=LDR_PC_PC_target(fw,is->insn);
1526     if(adr) {
1527         return adr;
1528     }
1529     adr=BX_PC_target(fw,is->insn);
1530     if(adr) {
1531         // bx swaps mode
1532         if(is->thumb) {
1533             return ADR_CLEAR_THUMB(adr);
1534         } else {
1535             return ADR_SET_THUMB(adr);
1536         }
1537     }
1538     return 0;
1539 }
1540 
1541 /*
1542 search up to max_search_ins for first LDR, =value
1543 and then match up to max_seq_insns for a sequence like
1544 LDR Rbase,=adr
1545 ... possible intervening ins
1546 SUB Rbase,#adj // optional, may be any add/sub variant
1547 ... possible intervening ins
1548 LDR Rval,[Rbase + #off]
1549 
1550 returns 1 if found, 0 if not
1551 stores registers and constants in *result if successful
1552 
1553 NOTE bad values are possible with intervening ins, short sequences recommended
1554 
1555 TODO similar code for STR would be useful, but in many cases would have to handle load or move into reg_val
1556 */
1557 int find_and_get_var_ldr(firmware *fw,
1558                             iter_state_t *is,
1559                             int max_search_insns,
1560                             int max_seq_insns,
1561                             arm_reg match_val_reg, // ARM_REG_INVALID for any
1562                             var_ldr_desc_t *result)
1563 
1564 {
1565     if(!insn_match_find_next(fw,is,max_search_insns,match_ldr_pc)) {
1566         // printf("find_and_get_var_ldr: LDR PC not found\n");
1567         return 0;
1568     }
1569     var_ldr_desc_t r;
1570     memset(&r,0,sizeof(r));
1571     r.reg_base=is->insn->detail->arm.operands[0].reg;
1572     r.adr_base=LDR_PC2val(fw,is->insn);
1573     int seq_count=1;
1574 
1575     while(seq_count < max_seq_insns) {
1576         // disassembly failed, no match (could ignore..)
1577         if(!disasm_iter(fw,is)) {
1578             return 0;
1579         }
1580         // assume first encountered LDR x,[pc] is the one to use
1581         // give up if we encounter another. Don't know beforehand which reg is base
1582         // NOTE: backward search would allow matching base that eventually ends up in desired reg
1583         if(isLDR_PC(is->insn)) {
1584             // printf("find_and_get_var_ldr: second ldr pc\n");
1585             return  0;
1586         }
1587         seq_count++;
1588         // firmware may use add/sub to get actual firmware base address
1589         if(isADDx_imm(is->insn) || isSUBx_imm(is->insn)) {
1590             if((arm_reg)is->insn->detail->arm.operands[0].reg != r.reg_base) {
1591                 continue;
1592             }
1593             if(isADDx_imm(is->insn)) {
1594                 r.adj=is->insn->detail->arm.operands[1].imm;
1595             } else {
1596                 r.adj=-is->insn->detail->arm.operands[1].imm;
1597             }
1598             if(!disasm_iter(fw,is)) {
1599                 return 0;
1600             }
1601             seq_count++;
1602         } else {
1603             r.adj=0;
1604         }
1605         // try to bail out if base reg trashed
1606         // BL, BLX etc will trash r0-r3, B, BX go somewhere else
1607         // only break on unconditional - optimistic, could produce incorrect results
1608         // can't account for branches into searched code
1609         if((r.reg_base >= ARM_REG_R0 && r.reg_base <= ARM_REG_R3)
1610                 && (is->insn->id == ARM_INS_BL || is->insn->id == ARM_INS_BLX
1611                     || is->insn->id == ARM_INS_B || is->insn->id == ARM_INS_BX)
1612                 && is->insn->detail->arm.cc == ARM_CC_AL) {
1613             // printf("find_and_get_var_ldr: bail B*\n");
1614             return 0;
1615         }
1616         if(is->insn->id != ARM_INS_LDR || (arm_reg)is->insn->detail->arm.operands[1].reg != r.reg_base) {
1617             // other operation on with base reg as first operand, give up
1618             // simplistic, many other things could affect reg
1619             if(is->insn->detail->arm.operands[0].type == ARM_OP_REG && (arm_reg)is->insn->detail->arm.operands[0].reg == r.reg_base) {
1620                 // printf("find_and_get_var_ldr: bail mod base\n");
1621                 return 0;
1622             }
1623             continue;
1624         }
1625         r.reg_val = is->insn->detail->arm.operands[0].reg;
1626         if(match_val_reg != ARM_REG_INVALID && (r.reg_val != match_val_reg)) {
1627             continue;
1628         }
1629         r.off = is->insn->detail->arm.operands[1].mem.disp;
1630         r.adr_adj = r.adr_base + r.adj;
1631         r.adr_final = r.adr_adj + r.off;
1632         memcpy(result,&r,sizeof(r));
1633         return 1;
1634     }
1635     return 0;
1636 }
1637 
1638 /*
1639 find instruction or sequence that receives specified constant in specified r0-r3 reg
1640 search starting from is to max_search_bytes
1641 allow up to max_gap_insns between constant load and match, generally small (4-8 max)
1642 returns address of match with thumb bit set according to mode, or 0 on failure
1643 */
1644 int find_const_ref_match(firmware *fw,
1645                             iter_state_t *is,
1646                             int max_search_bytes,
1647                             int max_gap_insns,
1648                             arm_reg match_reg, // must be R0-R3
1649                             uint32_t val,
1650                             const insn_match_t *match,
1651                             int match_type)
1652 {
1653     if(match_reg < ARM_REG_R0 || match_reg > ARM_REG_R3) {
1654         fprintf(stderr,"find_const_ref_match: invalid match_reg %d\n",match_reg);
1655         return 0;
1656     }
1657     if(max_gap_insns >= ADR_HIST_SIZE) {
1658         fprintf(stderr,"find_const_ref_match: invalid max_gap_insns %d\n",max_gap_insns);
1659         return 0;
1660     }
1661     int (*match_fn)(firmware *fw, iter_state_t *is, int max_insns, const insn_match_t *match);
1662     if(match_type == FIND_CONST_REF_MATCH_SEQ) {
1663         match_fn = insn_match_find_next_seq;
1664     } else if(match_type == FIND_CONST_REF_MATCH_ANY){
1665         match_fn = insn_match_find_next;
1666     } else {
1667         fprintf(stderr,"find_const_ref_match: invalid match_type %d\n",match_type);
1668         return 0;
1669     }
1670     // search for a ref to constant
1671     while(fw_search_insn(fw,is,search_disasm_const_ref,val,NULL,(uint32_t)(is->adr+max_search_bytes))) {
1672         //printf("find_const_ref_match: match str 0x%"PRIx64"\n",is->adr);
1673         uint32_t next_adr = (uint32_t)is->adr;
1674         // search for next bl / blx
1675         // could search include b for tail calls, but hard to distinguish
1676         if(match_fn(fw,is,max_gap_insns,match)) {
1677             uint32_t reg_num = match_reg - ARM_REG_R0;
1678             uint32_t reg_bit = 1 << reg_num;
1679             uint32_t regs[4];
1680             //printf("find_const_ref_match: match insn 0x%"PRIx64"\n",reg_num,is->adr);
1681             // backtrack to find out if const ref ends up in desired reg
1682             if((get_call_const_args(fw,is,max_gap_insns,regs)&reg_bit)==reg_bit) {
1683                 //printf("find_const_ref_match: match reg r%d 0x%"PRIx64"\n",reg_num,is->adr);
1684                 if(regs[reg_num] == val) {
1685                     return iter_state_adr(is);
1686                 }
1687             }
1688         }
1689         // not matched, restore is and advance one instruction
1690         disasm_iter_init(fw,is,next_adr | is->thumb);
1691     }
1692     return 0;
1693 }
1694 
1695 /*
1696 find call that receives specified constant in specified r0-r3 reg
1697 search starting from is to max_search_bytes
1698 allow up to max_gap_insns between constant load and call, generally small (4-8 max)
1699 returns address of call with thumb bit set according to mode, or 0 on failure
1700 */
1701 int find_const_ref_call(firmware *fw,
1702                             iter_state_t *is,
1703                             int max_search_bytes,
1704                             int max_gap_insns,
1705                             arm_reg match_reg, // must be R0-R3
1706                             uint32_t val)
1707 
1708 {
1709     return find_const_ref_match(fw,is,max_search_bytes,max_gap_insns,match_reg,val,match_bl_blximm,FIND_CONST_REF_MATCH_ANY);
1710 }
1711 
1712 /*
1713 check for, and optionally return information about
1714 functions with return values that can be completely determined
1715 from disassembly
1716 uses fw->is
1717 */
1718 // constants below may  as flags on input, and as return valaue
1719 // no simple function found
1720 #define MATCH_SIMPLE_FUNC_NONE    0x0
1721 // immediately returns, with no value
1722 #define MATCH_SIMPLE_FUNC_NULLSUB 0x1
1723 // immediately returns with a MOV constant
1724 #define MATCH_SIMPLE_FUNC_IMM     0x2
1725 // TODO LDR pc, =const,  ADR
1726 // TODO could also do pointer derefs and return pointer info without val
1727 #define MATCH_SIMPLE_FUNC_ANY     0x3
1728 int check_simple_func(firmware *fw, uint32_t adr, int match_ftype, simple_func_desc_t *info)
1729 {
1730     const insn_match_t match_mov_r0_imm[]={
1731         {MATCH_INS(MOV,   2),  {MATCH_OP_REG(R0),  MATCH_OP_IMM_ANY}},
1732 #if CS_API_MAJOR < 4
1733         {MATCH_INS(MOVS,  2),  {MATCH_OP_REG(R0),  MATCH_OP_IMM_ANY}},
1734 #endif
1735         {ARM_INS_ENDING}
1736     };
1737 
1738     int found = 0;
1739     int found_val = 0;
1740     if(info) {
1741         info->ftype = MATCH_SIMPLE_FUNC_NONE;
1742         info->retval = 0;
1743     }
1744     if(!fw_disasm_iter_single(fw,adr)) {
1745         //fprintf(stderr,"check_simple_func: disasm_iter_single failed 0x%x\n",adr);
1746         return 0;
1747     }
1748     if(match_ftype & MATCH_SIMPLE_FUNC_IMM) {
1749         // check mov r0, #imm
1750         if(insn_match_any(fw->is->insn,match_mov_r0_imm)) {
1751             found_val = fw->is->insn->detail->arm.operands[1].imm;
1752             found = MATCH_SIMPLE_FUNC_IMM;
1753             // fprintf(stderr,"check_simple_func: found IMM\n");
1754             if(!fw_disasm_iter(fw)) {
1755                 //fprintf(stderr,"check_simple_func: disasm_iter failed 0x%x\n",adr);
1756                 return 0;
1757             }
1758         }
1759     }
1760     if(!isRETx(fw->is->insn)) {
1761         // fprintf(stderr,"check_simple_func: no ret\n");
1762         return 0;
1763     }
1764     // no previous found, check if ret alone
1765     if(!found && (match_ftype & MATCH_SIMPLE_FUNC_NULLSUB)) {
1766         found = MATCH_SIMPLE_FUNC_NULLSUB;
1767         // fprintf(stderr,"check_simple_func: found nullsub\n");
1768     }
1769     if(found) {
1770         if(info) {
1771             info->ftype = found;
1772             info->retval = found_val;
1773         }
1774     }
1775     return found;
1776 }
1777 
1778 /*
1779 advance iter_state is trying to find the last function called by a function
1780 function assumed to PUSH LR, POP LR or PC (many small functions don't!)
1781 either the last BL/BLXimm before pop {... PC}
1782 or B after POP {... LR}
1783 MOV or LDR to R0-R3 are allowed between POP LR and the final B
1784 If a POP occurs before min_insns, the match fails
1785 Calls before min_insns are ignored
1786 */
1787 uint32_t find_last_call_from_func(firmware *fw, iter_state_t *is,int min_insns, int max_insns)
1788 {
1789     int push_found=0;
1790     uint32_t last_adr=0;
1791     int count;
1792     for(count=0; count < max_insns; count++) {
1793         if(!disasm_iter(fw,is)) {
1794             fprintf(stderr,"find_last_call_from_func: disasm failed 0x%"PRIx64"\n",is->adr);
1795             return 0;
1796         }
1797         // TODO could match push regs with pop
1798         if(isPUSH_LR(is->insn)) {
1799             // already found a PUSH LR, probably in new function
1800             if(push_found) {
1801                 //printf("find_last_call_from_func: second push pc 0x%"PRIx64"\n",is->adr);
1802                 return 0;
1803             }
1804             push_found=1;
1805             continue;
1806         }
1807         // ignore everything before push (could be some mov/ldr, shouldn't be any calls)
1808         // TODO may want to allow starting in the middle of a function
1809         if(!push_found) {
1810             continue;
1811         }
1812         // found a potential call, store
1813         if(insn_match_any(is->insn,match_bl_blximm) && count >= min_insns) {
1814             //printf("find_last_call_from_func: found call 0x%"PRIx64"\n",is->adr);
1815             last_adr=get_branch_call_insn_target(fw,is);
1816             continue;
1817         }
1818         // found pop PC, can only be stored call if present
1819         if(isPOP_PC(is->insn)) {
1820             // printf("find_last_call_from_func: found pop PC 0x%"PRIx64"\n",is->adr);
1821             if(last_adr) {
1822                 return last_adr;
1823             }
1824             // no call found, or not found within min
1825             return 0;
1826         }
1827         // found pop LR, check if next is allowed tail sequence followed by unconditional B
1828         if(isPOP_LR(is->insn)) {
1829             // hit func end with less than min, no match
1830             if(count < min_insns) {
1831                 // printf("find_last_call_from_func: pop before min 0x%"PRIx64"\n",is->adr);
1832                 return 0;
1833             }
1834             if(!disasm_iter(fw,is)) {
1835                 fprintf(stderr,"find_last_call_from_func: disasm failed 0x%"PRIx64"\n",is->adr);
1836                 return 0;
1837             }
1838             // allow instructions likely to appear between pop and tail call
1839             // MOV or LDR to r0-r3
1840             // others are possible e.g arithmetic or LDR r4,=const; LDR r0,[r4, #offset]
1841             const insn_match_t match_tail[]={
1842                 {MATCH_INS(MOV, MATCH_OPCOUNT_ANY), {MATCH_OP_REG_RANGE(R0,R3), MATCH_OP_REST_ANY}},
1843 // MOVS unlikely to be valid, though possible if followed by additional conditional instructions
1844 // in any case, want to match capstone 4 behavior
1845 #if CS_API_MAJOR < 4
1846                 {MATCH_INS(MOV, MATCH_OPCOUNT_ANY), {MATCH_OP_REG_RANGE(R0,R3), MATCH_OP_REST_ANY}},
1847 #endif
1848 
1849                 {MATCH_INS(LDR, 2), {MATCH_OP_REG_RANGE(R0,R3), MATCH_OP_ANY}},
1850                 {ARM_INS_ENDING}
1851             };
1852             while(insn_match_any(is->insn,match_tail) && count < max_insns) {
1853                 if(!disasm_iter(fw,is)) {
1854                     fprintf(stderr,"find_last_call_from_func: disasm failed 0x%"PRIx64"\n",is->adr);
1855                     return 0;
1856                 }
1857                 count++;
1858             }
1859             if(is->insn->id == ARM_INS_B && is->insn->detail->arm.cc == ARM_CC_AL) {
1860                 return get_branch_call_insn_target(fw,is);
1861             }
1862             // don't go more than one insn after pop (could be more, but uncommon)
1863             // printf("find_last_call_from_func: more than one insn after pop 0x%"PRIx64"\n",is->adr);
1864             return 0;
1865         }
1866         // found another kind of ret, give up
1867         if(isRETx(is->insn)) {
1868             // printf("find_last_call_from_func: other ret 0x%"PRIx64"\n",is->adr);
1869             return 0;
1870         }
1871     }
1872     // printf("find_last_call_from_func: no match in range 0x%"PRIx64"\n",is->adr);
1873     return 0;
1874 }
1875 
1876 // ****** utilities for matching instructions and instruction sequences ******
1877 
1878 // some common matches for insn_match_find_next
1879 const insn_match_t match_b[]={
1880     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1881     {ARM_INS_ENDING}
1882 };
1883 const insn_match_t match_bl[]={
1884     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1885     {ARM_INS_ENDING}
1886 };
1887 const insn_match_t match_b_bl[]={
1888     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1889     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1890     {ARM_INS_ENDING}
1891 };
1892 
1893 const insn_match_t match_b_bl_blximm[]={
1894     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1895     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1896     {MATCH_INS(BLX, 1), {MATCH_OP_IMM_ANY}},
1897     {ARM_INS_ENDING}
1898 };
1899 
1900 const insn_match_t match_bl_blximm[]={
1901     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1902     {MATCH_INS(BLX, 1), {MATCH_OP_IMM_ANY}},
1903     {ARM_INS_ENDING}
1904 };
1905 
1906 const insn_match_t match_bxlr[]={
1907     {MATCH_INS(BX, 1), {MATCH_OP_REG(LR)}},
1908     {ARM_INS_ENDING}
1909 };
1910 
1911 const insn_match_t match_bxreg[]={
1912     {MATCH_INS(BX, 1), {MATCH_OP_REG_ANY}},
1913     {ARM_INS_ENDING}
1914 };
1915 
1916 const insn_match_t match_blxreg[]={
1917     {MATCH_INS(BLX, 1), {MATCH_OP_REG_ANY}},
1918     {ARM_INS_ENDING}
1919 };
1920 
1921 const insn_match_t match_ldr_pc[]={
1922     {MATCH_INS(LDR, 2), {MATCH_OP_REG_ANY,  MATCH_OP_MEM_BASE(PC)}},
1923     {ARM_INS_ENDING}
1924 };
1925 
1926 // iterate as long as sequence of instructions matches sequence defined in match
1927 int insn_match_seq(firmware *fw, iter_state_t *is, const insn_match_t *match)
1928 {
1929     //printf("%"PRIx64" insn_match_seq %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1930     while(match->id != ARM_INS_ENDING && disasm_iter(fw,is) && insn_match(is->insn,match)) {
1931         //printf("%"PRIx64" insn_match_seq next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1932         match++;
1933     }
1934     return (match->id == ARM_INS_ENDING);
1935 }
1936 
1937 // capstone enum isn't in numeric order, (SP through PC in capstone 4, but probably shouldn't assume)
1938 static const arm_reg reg_order[] = {
1939     ARM_REG_R0,
1940     ARM_REG_R1,
1941     ARM_REG_R2,
1942     ARM_REG_R3,
1943     ARM_REG_R4,
1944     ARM_REG_R5,
1945     ARM_REG_R6,
1946     ARM_REG_R7,
1947     ARM_REG_R8,
1948     ARM_REG_R9,
1949     ARM_REG_R10,
1950     ARM_REG_R11,
1951     ARM_REG_R12,
1952     ARM_REG_SP,
1953     ARM_REG_LR,
1954     ARM_REG_PC,
1955 };
1956 
1957 int reg_in_range(arm_reg r, arm_reg min_reg, arm_reg max_reg)
1958 {
1959     int c = -1, c_min = -1, c_max = -1;
1960     int i;
1961     for(i=0; i<(int)(sizeof(reg_order)/sizeof(arm_reg)); i++) {
1962         if(reg_order[i] == r) {
1963             c = i;
1964         }
1965         if(reg_order[i] == min_reg) {
1966             c_min = i;
1967         }
1968         if(reg_order[i] == max_reg) {
1969             c_max = i;
1970         }
1971     }
1972     // any invalid / unlisted regs, false
1973     if( c < 0 || c_min < 0 || c_max < 0) {
1974         return 0;
1975     }
1976     return (c >= c_min && c <= c_max);
1977 }
1978 
1979 // check if single insn matches values defined by match
1980 int insn_match(cs_insn *insn,const insn_match_t *match)
1981 {
1982     // specific instruction ID requested, check
1983     if(match->id != ARM_INS_INVALID && insn->id != match->id) {
1984         return 0;
1985     }
1986     // condition code requested, check
1987     if(match->cc != ARM_CC_INVALID && insn->detail->arm.cc != match->cc) {
1988         return 0;
1989     }
1990     // no op checks, done
1991     if(match->op_count == MATCH_OPCOUNT_IGNORE) {
1992         return 1;
1993     }
1994     // operand count requested, check
1995     if(match->op_count >= 0 && insn->detail->arm.op_count != match->op_count) {
1996         return 0;
1997     }
1998     int i;
1999     // operands
2000     for(i=0; i<MATCH_MAX_OPS && i < insn->detail->arm.op_count; i++) {
2001         // specific type requested?
2002         if(match->operands[i].type != ARM_OP_INVALID && insn->detail->arm.operands[i].type != match->operands[i].type) {
2003             return 0;
2004         }
2005         // specific registers requested?
2006         if(match->operands[i].reg1 != ARM_REG_INVALID) {
2007             if(insn->detail->arm.operands[i].type == ARM_OP_REG) {
2008                 // range requested
2009                 if(match->operands[i].reg2 != ARM_REG_INVALID) {
2010                     if(!reg_in_range((arm_reg)insn->detail->arm.operands[i].reg,
2011                                         match->operands[i].reg1, match->operands[i].reg2)) {
2012                         return 0;
2013                     }
2014                 } else if((arm_reg)insn->detail->arm.operands[i].reg != match->operands[i].reg1) {
2015                     return 0;
2016                 }
2017             } else if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
2018                 if(insn->detail->arm.operands[i].mem.base != match->operands[i].reg1) {
2019                     return 0;
2020                 }
2021             } else {
2022                 fprintf(stderr,"insn_match: reg1 match requested on operand not reg or mem %d\n",
2023                         insn->detail->arm.operands[i].type);
2024             }
2025         }
2026         if(match->operands[i].reg2 != ARM_REG_INVALID) {
2027             if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
2028                 if(insn->detail->arm.operands[i].mem.index != match->operands[i].reg2) {
2029                     return 0;
2030                 }
2031             } else if(insn->detail->arm.operands[i].type != ARM_OP_REG) { // reg handled above
2032                 fprintf(stderr,"insn_match: reg2 match requested on operand not reg or mem %d\n",
2033                         insn->detail->arm.operands[i].type);
2034             }
2035         }
2036         if(match->operands[i].flags & MATCH_OP_FL_IMM) {
2037             if(insn->detail->arm.operands[i].type == ARM_OP_IMM
2038                     || insn->detail->arm.operands[i].type == ARM_OP_PIMM
2039                     || insn->detail->arm.operands[i].type == ARM_OP_CIMM) {
2040                 if(insn->detail->arm.operands[i].imm != match->operands[i].imm) {
2041                     return  0;
2042                 }
2043             } else if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
2044                 if(insn->detail->arm.operands[i].mem.disp != match->operands[i].imm) {
2045                     return  0;
2046                 }
2047             } else {
2048                 fprintf(stderr,"insn_match: imm match requested on operand not imm or mem %d\n",
2049                         insn->detail->arm.operands[i].type);
2050             }
2051         }
2052         if(match->operands[i].flags & MATCH_OP_FL_LAST) {
2053             break;
2054         }
2055     }
2056     return 1;
2057 }
2058 
2059 // check if single insn matches any of the provided matches
2060 int insn_match_any(cs_insn *insn,const insn_match_t *match)
2061 {
2062     const insn_match_t *m;
2063     // check matches
2064     for(m=match;m->id != ARM_INS_ENDING;m++) {
2065         if(insn_match(insn,m)) {
2066             return 1;
2067         }
2068     }
2069     return 0;
2070 }
2071 
2072 // iterate is until current instruction matches any of the provided matches or until limit reached
2073 int insn_match_find_next(firmware *fw, iter_state_t *is, int max_insns, const insn_match_t *match)
2074 {
2075     int i=0;
2076     while(i < max_insns) {
2077         // disassembly failed, no match (could ignore..)
2078         if(!disasm_iter(fw,is)) {
2079             return 0;
2080         }
2081         // printf("%"PRIx64" insn_match_find_next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
2082         if(insn_match_any(is->insn,match)) {
2083             return 1;
2084         }
2085         i++;
2086     }
2087     // limit hit
2088     return 0;
2089 }
2090 
2091 // iterate is until current has matched any of the provided matches N times or until max_insns reached
2092 int insn_match_find_nth(firmware *fw, iter_state_t *is, int max_insns, int num_to_match, const insn_match_t *match)
2093 {
2094     int i=0;
2095     int num_matched=0;
2096     while(i < max_insns) {
2097         // disassembly failed, no match (could ignore..)
2098         if(!disasm_iter(fw,is)) {
2099             return 0;
2100         }
2101         // printf("%"PRIx64" insn_match_find_next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
2102 
2103         const insn_match_t *m;
2104         // check matches
2105         for(m=match;m->id != ARM_INS_ENDING;m++) {
2106             if(insn_match(is->insn,m)) {
2107                 num_matched++;
2108             }
2109         }
2110         if(num_matched == num_to_match) {
2111             return 1;
2112         }
2113         i++;
2114     }
2115     // limit hit
2116     return 0;
2117 }
2118 
2119 // find next matching sequence starting within max_insns
2120 int insn_match_find_next_seq(firmware *fw, iter_state_t *is, int max_insns, const insn_match_t *match)
2121 {
2122     int count=0;
2123     while(count < max_insns) {
2124         const insn_match_t *m=match;
2125         //printf("%"PRIx64" insn_match_find_next_seq %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
2126         while(m->id != ARM_INS_ENDING && disasm_iter(fw,is) && insn_match(is->insn,m)) {
2127             m++;
2128             count++;
2129         }
2130         if(m->id == ARM_INS_ENDING) {
2131             return 1;
2132         }
2133         // non-matching
2134         count++;
2135     }
2136     return 0;
2137 }
2138 
2139 
2140 // Search the firmware for something. The desired matching is performed using the supplied 'func' function.
2141 // Continues searching until 'func' returns non-zero - then returns 1
2142 // otherwise returns 0.
2143 // Uses the BufRange structs to speed up searching
2144 // Note: this version searches byte by byte in the firmware dump instead of by words
2145 int fw_search_bytes(firmware *fw, search_bytes_fn func)
2146 {
2147     BufRange *p = fw->br;
2148     while (p)
2149     {
2150         int k;
2151         for (k = p->off*4; k < (p->off + p->len)*4; k++)
2152         {
2153             if (func(fw,k))
2154                 return 1;
2155         }
2156         p = p->next;
2157     }
2158     return 0;
2159 }
2160 
2161 
2162 // ****** firmware loading / initialization / de-allocation ******
2163 // add given address range
2164 void fw_add_adr_range(firmware *fw, uint32_t start, uint32_t end, uint32_t src_start, int type, int flags)
2165 {
2166     if(fw->adr_range_count == FW_MAX_ADR_RANGES) {
2167         fprintf(stderr,"fw_add_adr_range: FW_MAX_ADR_RANGES hit\n");
2168         return;
2169     }
2170     if(src_start < fw->base) {
2171         fprintf(stderr,"fw_add_adr_range: src_start 0x%08x < base 0x%08x\n",src_start,fw->base);
2172         return;
2173     }
2174     if(src_start >= fw->base+fw->size8) {
2175         fprintf(stderr,"fw_add_adr_range: src_start 0x%08x outside dump end 0x%08x\n",src_start,fw->base+fw->size8);
2176         return;
2177     }
2178     if(end <= start) {
2179         fprintf(stderr,"fw_add_adr_range: end 0x%08x <= start 0x%08x\n",end,start);
2180         return;
2181     }
2182     uint32_t len=end-start;
2183     if(len > 0xFFFFFFFF - src_start) {
2184         fprintf(stderr,"fw_add_adr_range: range too long %d\n",len);
2185         return;
2186     }
2187     if(len > fw->size8 - (start - fw->base)) {
2188         fprintf(stderr,"fw_add_adr_range: range outside of dump %d\n",len);
2189         return;
2190     }
2191     adr_range_t *r=&fw->adr_ranges[fw->adr_range_count];
2192     // TODO some firmware copies (i.e. g5x code 2) may end on non-word aligned address even though copy is words
2193     r->start=start;
2194     r->src_start=src_start;
2195     r->bytes=len;
2196     r->type=type;
2197     r->flags=flags;
2198     r->buf=fw->buf8 + (r->src_start - fw->base);
2199 
2200     fw->adr_range_count++;
2201 }
2202 
2203 void find_dryos_vers(firmware *fw)
2204 {
2205     const char *sig="DRYOS version 2.3, release #";
2206     fw->dryos_ver_count = find_bytes_all(fw,sig,strlen(sig),fw->base,fw->dryos_ver_list,FW_MAX_DRYOS_VERS);
2207     /*
2208     int i;
2209     for(i=0;i<fw->dryos_ver_count;i++) {
2210         fprintf(stderr,"found %s (%d) @0x%08x\n",
2211             (char *)adr2ptr(fw,fw->dryos_ver_list[i]),
2212             atoi((char *)adr2ptr(fw,fw->dryos_ver_list[i]+strlen(sig))),
2213             fw->dryos_ver_list[i]);
2214     }
2215     */
2216     if(fw->dryos_ver_count) {
2217         if(fw->dryos_ver_count == FW_MAX_DRYOS_VERS) {
2218             fprintf(stderr,"WARNING hit FW_MAX_DRYOS_VERS\n");
2219         }
2220         uint32_t i;
2221         int match_i;
2222         uint32_t min_adr = 0xFFFFFFFF;
2223 
2224         // ref should easily be in the first 8M (most near start but g7x2 at >0x500000)
2225         uint32_t maxadr = (fw->rom_code_search_max_adr - 0x800000 > fw->base)?fw->base + 0x800000:fw->rom_code_search_max_adr;
2226         // look for pointer to dryos version nearest to main ROM start, before the string itself
2227         // NOTE it's the *pointer* that must be nearest, the string may not be the first
2228         for(i=0; i<fw->dryos_ver_count; i++) {
2229             // TODO could limit range more, ctypes should be ref'd a lot
2230             // could sanity check not a random value that happens to match
2231             uint32_t adr = find_u32_adr_range(fw,fw->dryos_ver_list[i],fw->rom_code_search_min_adr,maxadr);
2232             if(adr && adr < min_adr) {
2233                 min_adr = adr;
2234                 match_i = i;
2235             }
2236         }
2237         if(min_adr == 0xFFFFFFFF) {
2238             fprintf(stderr,"WARNING dryos version pointer not found, defaulting to first\n");
2239             match_i = 0;
2240             min_adr = 0;
2241         }
2242         fw->dryos_ver_str = (const char *)adr2ptr(fw,fw->dryos_ver_list[match_i]);
2243         const char *s = (const char *)adr2ptr(fw,fw->dryos_ver_list[match_i]+strlen(sig));
2244         fw->dryos_ver = atoi(s);
2245         if(s[4] == '+' && s[5] == 'p') {
2246             fw->dryos_ver_patch = atoi(s+6);
2247             if(fw->dryos_ver_patch >= FW_DRYOS_VER_MUL) {
2248                 fprintf(stderr,"WARNING unexpected patch revision %d\n",fw->dryos_ver_patch);
2249             }
2250         } else {
2251             fw->dryos_ver_patch = 0;
2252         }
2253         fw->dryos_ver_full = fw->dryos_ver * FW_DRYOS_VER_MUL + fw->dryos_ver_patch;
2254         fw->dryos_ver_adr = fw->dryos_ver_list[match_i];
2255         fw->dryos_ver_ref_adr = min_adr;
2256         // fprintf(stderr,"main firmware version %s @ 0x%08x ptr 0x%08x\n",fw->dryos_ver_str,fw->dryos_ver_adr,min_adr);
2257     } else {
2258         fw->dryos_ver = 0;
2259         fw->dryos_ver_patch = 0;
2260         fw->dryos_ver_full = 0;
2261         fw->dryos_ver_str = NULL;
2262         fw->dryos_ver_adr = 0;
2263     }
2264 }
2265 
2266 // load firmware and initialize stuff that doesn't require disassembly
2267 void firmware_load(firmware *fw, const char *filename, uint32_t base_adr,int fw_arch)
2268 {
2269     FILE *f = fopen(filename, "rb");
2270     if (f == NULL)
2271     {
2272         fprintf(stderr,"Error opening %s\n",filename);
2273         exit(1);
2274     }
2275     fseek(f,0,SEEK_END);
2276     fw->size8 = ftell(f);
2277     fseek(f,0,SEEK_SET);
2278     // dumps should be an integral number of 32 bit words
2279     // ensures accessing as 32 bit ints safe
2280     if(fw->size8&3) {
2281         fprintf(stderr,"WARNING: dump size %d is not divisible by 4, truncating\n",fw->size8);
2282         fw->size8 &= ~3;
2283     }
2284 
2285     // adjust to ensure base_adr + size doesn't overflow
2286     if((int)(0xFFFFFFFF - base_adr) < fw->size8) {
2287         fprintf(stderr,"adjusted dump size 0x%08x->",fw->size8);
2288         fw->size8 = 0xFFFFFFFC - base_adr;
2289         fprintf(stderr,"0x%08x\n",fw->size8);
2290     }
2291 
2292     fw->arch=fw_arch;
2293     fw->size32=fw->size8/4;
2294 
2295     fw->base = base_adr;
2296 
2297     fw->buf8 = malloc(fw->size8);
2298     if(!fw->buf8) {
2299         fprintf(stderr,"malloc %d failed\n",fw->size8);
2300         exit(1);
2301     }
2302     fread(fw->buf8, 1, fw->size8, f);
2303     fclose(f);
2304     findRanges(fw);
2305 
2306     fw->adr_range_count=0;
2307     // add ROM
2308     fw_add_adr_range(fw,fw->base, fw->base+fw->size8, fw->base, ADR_RANGE_ROM, ADR_RANGE_FL_NONE);
2309 
2310     fw->main_offs = 0;
2311     int k = find_str(fw, "gaonisoy");
2312     // assume firmware start is 32 bit jump over goanisoy
2313     if(k == -1) {
2314         // suppress warning on vxworks, main firmware start is always offset 0
2315         if(find_str(fw,"VxWorks") == -1) {
2316             fprintf(stderr,"WARNING gaonisoy string not found, assuming code start offset 0\n");
2317         }
2318     } else if (k != 1) {
2319         // check at 0x20004 - note doesn't just use offset of first gaonisoy, because could be ref'd in romstarter
2320         if(fw_memcmp(fw,fw->base+0x20004,"gaonisoy",8) == 0) {
2321             fw->main_offs = 0x20000;
2322         } else if (fw_memcmp(fw,fw->base+0x10004,"gaonisoy",8) == 0) { // newer armv5 firmwares base ff81000 start at ff820000
2323             fw->main_offs = 0x10000;
2324         } else {
2325             fprintf(stderr,"WARNING code start offset not found, assuming 0\n");
2326         }
2327     }
2328 
2329     fw->rom_code_search_min_adr = fw->base + fw->main_offs; // 0 if not found
2330     fw->rom_code_search_max_adr=fw->base+fw->size8 - 4; // default == end of fw, may be adjusted by firmware_init_data_ranges
2331 
2332     find_dryos_vers(fw);
2333 
2334     fw->firmware_ver_str = 0;
2335     k = find_str(fw, "Firmware Ver ");
2336     if (k != -1)
2337     {
2338         fw->firmware_ver_str = (char *)fw->buf8 + k*4;
2339     }
2340     // set expected instruction set
2341     if(fw->arch==FW_ARCH_ARMv5) {
2342         fw->thumb_default = 0;
2343     } else if(fw->arch==FW_ARCH_ARMv7) {
2344         fw->thumb_default = 1;
2345     } else {
2346         fprintf(stderr,"firmware_init_capstone: invalid arch\n");
2347     }
2348 }
2349 
2350 // test to verify thumb blx bug is patched in linked capstone
2351 int do_blx_check(firmware *fw)
2352 {
2353 /*
2354 test code blxbork.S
2355 .syntax unified
2356 .globl arm_code
2357 .globl _start
2358 _start:
2359 .code 16
2360 blx arm_code
2361 movs r0, #1
2362 blx arm_code
2363 .align 4
2364 .code 32
2365 arm_code:
2366 bx lr
2367 
2368 arm-none-eabi-gcc -nostdlib blxbork.S -o blxbork.elf
2369 */
2370 
2371 static const uint8_t code[]=
2372     "\x00\xf0\x06\xe8" // blx arm_code (start + 0x10)
2373     "\x01\x20" // movs r0,#1, to cause non-word align
2374     "\x00\xf0\x04\xe8" // blx arm_code
2375 ;
2376     cs_insn *insn;
2377     size_t count;
2378     count = cs_disasm(fw->cs_handle_thumb, code, sizeof(code), 0xFF000000, 3, &insn);
2379 
2380     if(!(count == 3 && insn[0].id == ARM_INS_BLX && insn[2].id == ARM_INS_BLX)) {
2381         fprintf(stderr,"do_blx_check: disassembly failed\n");
2382         return 0;
2383     }
2384 
2385     int r=(insn[0].detail->arm.operands[0].imm == insn[2].detail->arm.operands[0].imm);
2386 
2387 
2388     if(!r) {
2389         fprintf(stderr,"WARNING! Incorrect disassembly is likely\n");
2390     }
2391     cs_free(insn,count);
2392     return r;
2393 }
2394 
2395 // initialize capstone state for loaded fw
2396 int firmware_init_capstone(firmware *fw)
2397 {
2398     if (cs_open(CS_ARCH_ARM, CS_MODE_ARM, &fw->cs_handle_arm) != CS_ERR_OK) {
2399         fprintf(stderr,"cs_open ARM failed\n");
2400         return 0;
2401     }
2402     cs_option(fw->cs_handle_arm, CS_OPT_DETAIL, CS_OPT_ON);
2403     if (cs_open(CS_ARCH_ARM, CS_MODE_THUMB, &fw->cs_handle_thumb) != CS_ERR_OK) {
2404         fprintf(stderr,"cs_open thumb failed\n");
2405         return 0;
2406     }
2407     cs_option(fw->cs_handle_thumb, CS_OPT_DETAIL, CS_OPT_ON);
2408     fw->is=disasm_iter_new(fw,0);
2409     do_blx_check(fw);
2410     return 1;
2411 }
2412 
2413 /*
2414 look for
2415 ldr rx, =ROM ADR
2416 ldr ry, =non-rom adr
2417 ldr rz, =non ROM adr > ry
2418 leave is pointing at last LDR, or last checked instruction
2419 */
2420 
2421 int find_startup_copy(firmware *fw,
2422                          iter_state_t *is,
2423                          int max_search,
2424                          uint32_t *src_start,
2425                          uint32_t *dst_start,
2426                          uint32_t *dst_end)
2427 {
2428     int count=0;
2429     uint32_t *fptr = NULL;
2430     uint32_t *dptr = NULL;
2431     uint32_t *eptr = NULL;
2432     *src_start=0;
2433     *dst_start=0;
2434     *dst_end=0;
2435 
2436     while(disasm_iter(fw,is) && count < max_search) {
2437         uint32_t *pv=LDR_PC2valptr(fw,is->insn);
2438         // not an LDR pc, reset
2439         // TODO some firmwares might use other instructions
2440         if(!pv) {
2441             fptr=dptr=eptr=NULL;
2442         }else if(!fptr) {
2443             // only candidate if in ROM
2444             if(*pv > fw->base) {
2445                 fptr=pv;
2446             }
2447         } else if(!dptr) {
2448             if(*pv < fw->base) {
2449                 dptr=pv;
2450             } else {
2451                 fptr=NULL; // dest address in ROM, reset
2452             }
2453         } else if(!eptr) {
2454             if(*pv < fw->base && *pv > *dptr) {
2455                 eptr=pv;
2456             } else { // dest end address in ROM, or before source, reset
2457                     // TODO maybe should swap instead if < source
2458                 fptr=dptr=NULL;
2459             }
2460         }
2461         if(fptr && dptr && eptr) {
2462             *src_start=*fptr;
2463             *dst_start=*dptr;
2464             *dst_end=*eptr;
2465             return 1;
2466         }
2467         count++;
2468     }
2469     return 0;
2470 }
2471 
2472 void find_exception_vec(firmware *fw, iter_state_t *is)
2473 {
2474     // check for exception vector, d7 id
2475     // only on thumb2 for now
2476     if(fw->arch != FW_ARCH_ARMv7) {
2477         return;
2478     }
2479 
2480     const insn_match_t match_bl_mcr[]={
2481         {MATCH_INS(BL,  1), {MATCH_OP_IMM_ANY}},
2482         // Vector Base Address Register MCR p15, 0, <Rt>, c12, c0, 0 - not present on PMSA
2483         {MATCH_INS(MCR, 6), {MATCH_OP_PIMM(15),MATCH_OP_IMM(0),MATCH_OP_REG_ANY,MATCH_OP_CIMM(12),MATCH_OP_CIMM(0),MATCH_OP_IMM(0)}},
2484         {ARM_INS_ENDING}
2485     };
2486 
2487     // reset to main fw start
2488     disasm_iter_init(fw, is, fw->base + fw->main_offs + 12 + fw->thumb_default);
2489     if(!insn_match_find_next(fw,is,4,match_bl_mcr)) {
2490         // printf("no match!\n");
2491         return;
2492     }
2493     // check which instruction we matched
2494     uint32_t faddr = get_branch_call_insn_target(fw,is);
2495     if(faddr) {
2496         // bl = digic6, has function to set up exception vector
2497         disasm_iter_init(fw, is, faddr);
2498         disasm_iter(fw, is);
2499         int ra,rb;
2500         uint32_t va, vb;
2501         if(!IS_INSN_ID_MOVx(is->insn->id) || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2502             return;
2503         }
2504         ra = is->insn->detail->arm.operands[0].reg;
2505         va = is->insn->detail->arm.operands[1].imm;
2506         disasm_iter(fw, is);
2507         if(is->insn->id != ARM_INS_MOVT
2508             || is->insn->detail->arm.operands[0].reg != ra
2509             || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2510             return;
2511         }
2512         va = (is->insn->detail->arm.operands[1].imm << 16) | (va & 0xFFFF);
2513         // fw has BIC
2514         va = va & ~1;
2515         if(adr_get_range_type(fw,va) != ADR_RANGE_ROM) {
2516             return;
2517         }
2518         disasm_iter(fw, is);
2519         if(!IS_INSN_ID_MOVx(is->insn->id) || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2520             return;
2521         }
2522         rb = is->insn->detail->arm.operands[0].reg;
2523         vb = is->insn->detail->arm.operands[1].imm;
2524         disasm_iter(fw, is);
2525         if(is->insn->id != ARM_INS_MOVT
2526             || is->insn->detail->arm.operands[0].reg != rb
2527             || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2528             return;
2529         }
2530         vb = (is->insn->detail->arm.operands[1].imm << 16) | (vb & 0xFFFF);
2531         vb = vb & ~1;
2532         if(adr_get_range_type(fw,vb) != ADR_RANGE_ROM) {
2533             return;
2534         }
2535         if(va >= vb) {
2536             return;
2537         }
2538         fw_add_adr_range(fw,0,vb - va, va, ADR_RANGE_RAM_CODE, ADR_RANGE_FL_EVEC | ADR_RANGE_FL_TCM);
2539         // printf("ex vec 0x%08x-0x%08x\n",va,vb);
2540 
2541     } else if(is->insn->id == ARM_INS_MCR) {
2542         // digic 7 = mcr ...
2543         fw->arch_flags |= FW_ARCH_FL_VMSA;
2544         // rewind 1
2545         disasm_iter_init(fw, is, adr_hist_get(&is->ah,1));
2546         disasm_iter(fw, is);
2547         // uint32_t ex_vec = LDR_PC2val(fw,is->insn);
2548         //printf("found MCR @ 0x%"PRIx64" ex vec at 0x%08x\n",is->insn->address,ex_vec);
2549     }
2550 }
2551 
2552 // init basic copied RAM code / data ranges
2553 void firmware_init_data_ranges(firmware *fw)
2554 {
2555 //TODO maybe should return status
2556     uint32_t src_start, dst_start, dst_end;
2557     uint32_t data_found_copy = 0;
2558 
2559     // start at fw start  + 12 (32 bit jump, gaonisoy)
2560     iter_state_t *is=disasm_iter_new(fw, fw->base + fw->main_offs + 12 + fw->thumb_default);
2561 
2562     fw->data_init_start=0;
2563     fw->data_start=0;
2564     fw->data_len=0;
2565 
2566     fw->memisostart=0;
2567 
2568     int base2_found=0;
2569     int base3_found=0;
2570 
2571     // TODO  pre-d6 ROMs have a lot more stuff before first copy
2572     int max_search=100;
2573     while(find_startup_copy(fw,is,max_search,&src_start,&dst_start,&dst_end)) {
2574         // all known copied code is 3f1000 or higher, guess data
2575         if(dst_start < 0x100000) {
2576             // fprintf(stderr, "data?  @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2577             if(fw->data_init_start) {
2578                 fprintf(stderr,"firmware_init_data_ranges: data already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2579                         dst_start,src_start,dst_end);
2580                 continue;
2581             }
2582 
2583             // not a known value, warn
2584             if(dst_start != 0x1900 && dst_start != 0x8000) {
2585                 fprintf(stderr,"firmware_init_data_ranges: guess unknown ROM data_start 0x%08x src 0x%08x end 0x%08x\n",
2586                         dst_start,src_start,dst_end);
2587             }
2588             fw->data_init_start=src_start;
2589             fw->data_start=dst_start;
2590             fw->data_len=dst_end-dst_start;
2591             fw_add_adr_range(fw,dst_start,dst_end,src_start, ADR_RANGE_INIT_DATA, ADR_RANGE_FL_NONE);
2592             data_found_copy=is->adr;
2593         } else if(dst_start < 0x08000000) { /// highest known first copied ram code 0x01900000
2594             // fprintf(stderr,"code1? @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2595             if(base2_found) {
2596                 fprintf(stderr,"firmware_init_data_ranges: base2 already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2597                         dst_start,src_start,dst_end);
2598                 continue;
2599             }
2600             base2_found=1;
2601             // known values
2602             if( dst_start != 0x003f1000 &&
2603                 dst_start != 0x00431000 &&
2604                 dst_start != 0x00471000 &&
2605                 dst_start != 0x00685000 &&
2606                 dst_start != 0x00671000 &&
2607                 dst_start != 0x006b1000 &&
2608                 dst_start != 0x010c1000 &&
2609                 dst_start != 0x010e1000 &&
2610                 dst_start != 0x01900000) {
2611                 fprintf(stderr,"firmware_init_data_ranges: guess unknown base2 0x%08x src 0x%08x end 0x%08x\n",
2612                         dst_start,src_start,dst_end);
2613             }
2614             fw_add_adr_range(fw,dst_start,dst_end,src_start,ADR_RANGE_RAM_CODE, ADR_RANGE_FL_NONE);
2615         } else { // know < ROM based on match, assume second copied code
2616             // fprintf(stderr, "code2? @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2617             if(base3_found) {
2618                 fprintf(stderr,"firmware_init_data_ranges: base3 already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2619                         dst_start,src_start,dst_end);
2620                 continue;
2621             }
2622             base3_found=1;
2623             if(dst_start != 0xbfe10800 && // known digic 6 value (g5x)
2624                dst_start != 0xdffc4900) { // known digic 7 value (m5)
2625                 fprintf(stderr,"firmware_init_data_ranges: guess unknown base3 0x%08x src 0x%08x end 0x%08x\n",
2626                         dst_start,src_start,dst_end);
2627             }
2628             fw_add_adr_range(fw,dst_start,dst_end,src_start,ADR_RANGE_RAM_CODE, ADR_RANGE_FL_TCM);
2629         }
2630         if(fw->data_start && base2_found && base3_found) {
2631             break;
2632         }
2633         // after first, shorter search range in between copies
2634         max_search=16;
2635     }
2636 
2637     // look for BSS init after last found copy
2638     if(data_found_copy) {
2639         int count=0;
2640         uint32_t *eptr=NULL;
2641         uint32_t *dptr=NULL;
2642         disasm_iter_init(fw,is,(data_found_copy-4) | fw->thumb_default);
2643         while(disasm_iter(fw,is) && count < 20) {
2644             uint32_t *pv=LDR_PC2valptr(fw,is->insn);
2645             // not an LDR pc, reset;
2646             if(!pv) {
2647                 //dptr=eptr=NULL;
2648             } else if(!dptr) {
2649                 // TODO older firmwares use reg with ending value from DATA copy
2650                 // should be equal to end pointer of data
2651                 if(*pv == fw->data_start + fw->data_len) {
2652                     dptr=pv;
2653                 }
2654             } else if(!eptr) {
2655                 if(*pv < fw->base) {
2656                     if(*pv != fw->data_start + fw->data_len) {
2657                         eptr=pv;
2658                     }
2659                 } else { // dest end address in ROM, reset
2660                     eptr=dptr=NULL;
2661                 }
2662             }
2663             if(dptr && eptr) {
2664                 // fprintf(stderr, "bss?   @0x%"PRIx64" 0x%08x-0x%08x\n",is->adr,*dptr,*eptr);
2665                 fw->memisostart=*eptr;
2666                 break;
2667             }
2668             count++;
2669         }
2670     }
2671 
2672     find_exception_vec(fw,is);
2673 
2674     // if data found, adjust default code search range
2675     // TODO could use copied code regions too, but after data on known firmwares
2676     if(fw->data_start) {
2677         fw->rom_code_search_max_adr=fw->data_init_start;
2678     }
2679     // if dryos version string found, use as search limit
2680     if(fw->dryos_ver_adr) {
2681         if(fw->dryos_ver_adr < fw->rom_code_search_max_adr) {
2682             fw->rom_code_search_max_adr = fw->dryos_ver_adr;
2683         }
2684     }
2685     disasm_iter_free(is);
2686 }
2687 
2688 // free resources associated with fw
2689 void firmware_unload(firmware *fw)
2690 {
2691     if(!fw) {
2692         return;
2693     }
2694     if(fw->is) {
2695         disasm_iter_free(fw->is);
2696     }
2697     if(fw->cs_handle_arm) {
2698         cs_close(&fw->cs_handle_arm);
2699     }
2700     if(fw->cs_handle_thumb) {
2701         cs_close(&fw->cs_handle_thumb);
2702     }
2703     free(fw->buf8);
2704     memset(fw,0,sizeof(firmware));
2705 }

/* [<][>][^][v][top][bottom][index][help] */