root/tools/firmware_load_ng.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. addBufRange
  2. findRanges
  3. getBufRangeForIndex
  4. find_Nth_str
  5. find_str
  6. find_next_bytes_range
  7. find_bytes_all
  8. find_next_substr_bytes
  9. find_next_str_bytes_range
  10. find_next_str_bytes_main_fw
  11. find_str_bytes_main_fw
  12. find_next_str_bytes
  13. find_str_bytes
  14. isASCIIstring
  15. adr_get_range
  16. adr_get_range_type
  17. ptr2adr
  18. adr2ptr
  19. adr2ptr_with_data
  20. adr_range_type_str
  21. adr_range_desc_str
  22. adr_is_var
  23. adr_is_main_fw_code
  24. find_u32_adr_range
  25. find_u32_adr
  26. fw_u32
  27. fw_memcmp
  28. adr_hist_reset
  29. adr_hist_index
  30. adr_hist_add
  31. adr_hist_get
  32. isARM
  33. isLDR_PC
  34. isLDR_PC_PC
  35. isSUBW_PC
  36. isADDW_PC
  37. isADD_PC
  38. isSUB_PC
  39. isRETx
  40. isPUSH_LR
  41. isPOP_LR
  42. isPOP_PC
  43. isADDx_imm
  44. isSUBx_imm
  45. isADRx
  46. LDR_PC2valptr_thumb
  47. LDR_PC2valptr_arm
  48. LDR_PC2valptr
  49. LDR_PC2adr
  50. ADRx2adr
  51. ADR2adr
  52. ADR2valptr
  53. LDR_PC2val
  54. LDR_PC_PC_target
  55. B_target
  56. CBx_target
  57. BLXimm_target
  58. BL_target
  59. B_BL_target
  60. B_BL_BLXimm_target
  61. BX_PC_target
  62. get_TBx_PC_info
  63. disasm_iter_new
  64. disasm_iter_free
  65. disasm_iter_set
  66. disasm_iter_init
  67. disasm_iter
  68. disasm_iter_redo
  69. fw_disasm_iter_start
  70. fw_disasm_iter
  71. fw_disasm_iter_single
  72. fw_disasm_adr
  73. fw_search_insn
  74. search_disasm_const_ref
  75. search_disasm_str_ref
  76. search_disasm_calls
  77. search_calls_multi_end
  78. search_disasm_calls_multi
  79. search_disasm_calls_veneer_multi
  80. get_call_const_args
  81. get_direct_jump_target
  82. get_branch_call_insn_target
  83. find_and_get_var_ldr
  84. find_const_ref_call
  85. check_simple_func
  86. find_last_call_from_func
  87. insn_match_seq
  88. reg_in_range
  89. insn_match
  90. insn_match_any
  91. insn_match_find_next
  92. insn_match_find_nth
  93. insn_match_find_next_seq
  94. fw_search_bytes
  95. fw_add_adr_range
  96. find_dryos_vers
  97. firmware_load
  98. do_blx_check
  99. firmware_init_capstone
  100. find_startup_copy
  101. find_exception_vec
  102. firmware_init_data_ranges
  103. firmware_unload

   1 #include <inttypes.h>
   2 #include <stdio.h>
   3 #include <stdint.h>
   4 #include <string.h>
   5 
   6 #include <capstone.h>
   7 
   8 #include "stubs_load.h" // needed for sv in fw struct
   9 #include "firmware_load_ng.h"
  10 
  11 
  12 // Add a valid range to the list
  13 static void addBufRange(firmware *fw, int o, int l)
  14 {
  15     BufRange *n = malloc(sizeof(BufRange));
  16     n->p = fw->buf32 + o;
  17     n->off = o;
  18     n->len = l;
  19     n->next = 0;
  20     if (fw->br == 0)
  21     {
  22         fw->br = n;
  23     }
  24     else
  25     {
  26         fw->last->next = n;
  27     }
  28     fw->last = n;
  29 }
  30 
  31 // Find valid ranges for the firmware dump
  32 static void findRanges(firmware *fw)
  33 {
  34     int i, j, k;
  35 
  36     // Find all the valid ranges for checking (skips over large blocks of 0xFFFFFFFF)
  37     fw->br = 0; fw->last = 0;
  38     k = -1; j = 0;
  39     for (i = 0; i < fw->size32; i++)
  40     {
  41         if (fw->buf32[i] == 0xFFFFFFFF)   // Possible start of block to skip
  42         {
  43             if (k == -1)            // Mark start of possible skip block
  44             {
  45                 k = i;
  46             }
  47         }
  48         else                        // Found end of block ?
  49         {
  50             if (k != -1)
  51             {
  52                 if (i - k > 32)     // If block more than 32 words then we want to skip it
  53                 {
  54                     if (k - j > 8)
  55                     {
  56                         // Add a range record for the previous valid range (ignore short ranges)
  57                         addBufRange(fw,j,k - j);
  58                     }
  59                     j = i;          // Reset valid range start to current position
  60                 }
  61                 k = -1;             // Reset marker for skip block
  62             }
  63         }
  64     }
  65     // Add range for last valid block
  66     if (k != -1)
  67     {
  68         if (k - j > 8)
  69         {
  70             addBufRange(fw,j,k - j);
  71         }
  72     }
  73     else
  74     {
  75         if (i - j > 8)
  76         {
  77             addBufRange(fw,j,i - j);
  78         }
  79     }
  80 }
  81 
  82 // return the buffrange for a given offset or null if not found
  83 BufRange *getBufRangeForIndex(firmware *fw,int i)
  84 {
  85     BufRange *br = fw->br;
  86     while (br) {
  87         if(i >= br->off && i < br->off + br->len) {
  88             return br;
  89         }
  90         br = br->next;
  91     }
  92     return NULL;
  93 }
  94 
  95 // Find the index of a string in the firmware
  96 // Assumes the string starts on a 32bit boundary.
  97 // String + terminating zero byte should be at least 4 bytes long
  98 // Handles multiple string instances
  99 int find_Nth_str(firmware *fw, char *str, int N)
 100 {
 101     int nlen = strlen(str);
 102     uint32_t nm0 = *((uint32_t*)str);
 103     uint32_t *p;
 104     int j;
 105 
 106     BufRange *br = fw->br;
 107     while (br)
 108     {
 109         for (p = br->p, j = 0; j < br->len - nlen/4; j++, p++)
 110         {
 111             if ((nm0 == *p) && ((nlen<=4) || (memcmp(p+1,str+4,nlen-4) == 0)) )
 112             {
 113                 if (--N == 0)
 114                     return j+br->off;
 115             }
 116         }
 117         br = br->next;
 118     }
 119 
 120     return -1;
 121 }
 122 
 123 int find_str(firmware *fw, char *str)
 124 {
 125     return find_Nth_str(fw, str, 1);
 126 }
 127 
 128 // find sequence of bytes, starting from star_adr, up to max_adr, any alignment
 129 // returns firmware address or 0
 130 // use repeated calls to find multiple
 131 // NOTE only handles ROM addresses
 132 uint32_t find_next_bytes_range(firmware *fw, const void *bytes, size_t len, uint32_t start_adr, uint32_t max_adr)
 133 {
 134     if(!start_adr) {
 135         start_adr = fw->base;
 136     }
 137     if(start_adr < fw->base || start_adr >= fw->base + fw->size8) {
 138         fprintf(stderr,"find_next_bytes_range invalid start_adr 0x%08x\n",start_adr);
 139         return 0;
 140     }
 141     if(!max_adr) {
 142         max_adr = fw->base + fw->size8-1;
 143     }
 144     if(max_adr < fw->base || max_adr >= fw->base + fw->size8) {
 145         fprintf(stderr,"find_next_bytes_range invalid max_adr 0x%08x\n",max_adr);
 146         return 0;
 147     }
 148     int end_k = (max_adr - fw->base);
 149     BufRange *p = getBufRangeForIndex(fw,(start_adr - fw->base)/4);
 150     if(!p) {
 151         return 0;
 152     }
 153     int k = start_adr - fw->base;
 154 
 155     while (k < end_k)
 156     {
 157         for (; k < (p->off + p->len)*4; k++)
 158         {
 159             if (memcmp(fw->buf8+k,bytes,len) == 0) {
 160                 return fw->base+k;
 161             }
 162         }
 163         p = p->next;
 164         if(!p) {
 165             break;
 166         }
 167         k = p->off*4;
 168     }
 169     return 0;
 170 }
 171 
 172 // find up to max matching byte sequences, storing addresses in result
 173 // returns count
 174 int find_bytes_all(firmware *fw, const void *bytes, size_t len, uint32_t adr, uint32_t *result, int max)
 175 {
 176     int i;
 177     for(i=0,adr=find_next_bytes_range(fw,bytes,len,0,0); adr && (i < max); adr=find_next_bytes_range(fw,bytes,len,adr+len,0),i++) {
 178         result[i] = adr;
 179     }
 180     return i;
 181 }
 182 
 183 uint32_t find_next_substr_bytes(firmware *fw, const char *str, uint32_t adr)
 184 {
 185     //fprintf(stderr,"find_next_substr_bytes 0x%08x\n",adr);
 186     // strlen excludes null
 187     return find_next_bytes_range(fw,str,strlen(str),adr,0);
 188 }
 189 
 190 uint32_t find_next_str_bytes_range(firmware *fw, const char *str, uint32_t adr,uint32_t max_adr)
 191 {
 192     // +1 to include the null in memcmp
 193     return find_next_bytes_range(fw,str,strlen(str)+1,adr,max_adr);
 194 }
 195 
 196 uint32_t find_next_str_bytes_main_fw(firmware *fw, const char *str, uint32_t adr)
 197 {
 198     // max is end of fw code + 4096, assuming it fits in fw
 199     // while early code could technically load from base - 1k, unlikely
 200     uint32_t max_adr;
 201     if(fw->base + fw->size8 - 4096 > fw->rom_code_search_max_adr) {
 202         max_adr = fw->rom_code_search_max_adr + 4096;
 203     } else {
 204         max_adr = fw->base + fw->size8;
 205     }
 206     return find_next_bytes_range(fw,str,strlen(str)+1,adr,max_adr);
 207 }
 208 
 209 // find a string within range of LDR pc or ADR, starting from main fw
 210 uint32_t find_str_bytes_main_fw(firmware *fw, const char *str)
 211 {
 212     return find_next_str_bytes_main_fw(fw,str,fw->rom_code_search_min_adr);
 213 }
 214 
 215 uint32_t find_next_str_bytes(firmware *fw, const char *str, uint32_t adr)
 216 {
 217     // +1 to include the null in memcmp
 218     return find_next_bytes_range(fw,str,strlen(str)+1,adr,0);
 219 }
 220 
 221 // Find the index of a string in the firmware, can start at any address
 222 // returns firmware address
 223 uint32_t find_str_bytes(firmware *fw, const char *str)
 224 {
 225     return find_next_str_bytes(fw,str,fw->base);
 226 }
 227 
 228 int isASCIIstring(firmware *fw, uint32_t adr)
 229 {
 230     unsigned char *p = (unsigned char*)adr2ptr_with_data(fw, adr);
 231     if(!p) {
 232         return 0;
 233     }
 234     // TODO should avoid running off end of dump
 235     int i;
 236     for (i = 0; (i < 100) && (p[i] != 0); i++)
 237     {
 238         if (!((p[i] == '\r') || (p[i] == '\n') || (p[i] == '\t') || ((p[i] >= 0x20) && (p[i] <= 0x7f))))
 239         {
 240             return 0;
 241         }
 242     }
 243     if ((i >= 2) && (p[i] == 0))
 244         return 1;
 245     return 0;
 246 }
 247 
 248 // return address range struct for adr, or NULL if not in known range
 249 adr_range_t *adr_get_range(firmware *fw, uint32_t adr)
 250 {
 251     int i;
 252     adr_range_t *r=fw->adr_ranges;
 253     for(i=0;i<fw->adr_range_count;i++) {
 254         if(adr >= r->start && adr < r->start + r->bytes) {
 255             return r;
 256         }
 257         r++;
 258     }
 259     return NULL;
 260 }
 261 
 262 // return what kind of range adr is in
 263 int adr_get_range_type(firmware *fw, uint32_t adr)
 264 {
 265     adr_range_t *r=adr_get_range(fw,adr);
 266     if(!r) {
 267         return ADR_RANGE_INVALID;
 268     }
 269     return r->type;
 270 }
 271 
 272 uint32_t ptr2adr(firmware *fw, uint8_t *ptr)
 273 {
 274     // TODO handle copied, or maybe another func to convert?
 275     return (ptr-fw->buf8)+fw->base;
 276 }
 277 
 278 uint8_t* adr2ptr(firmware *fw, uint32_t adr)
 279 {
 280     adr_range_t *r=adr_get_range(fw,adr);
 281     if(!r) {
 282         return NULL;
 283     }
 284     switch(r->type) {
 285         case ADR_RANGE_RAM_CODE:
 286         case ADR_RANGE_ROM:
 287             return (r->buf)+(adr - r->start);
 288         default:
 289             return NULL;
 290     }
 291 }
 292 
 293 uint8_t* adr2ptr_with_data(firmware *fw, uint32_t adr)
 294 {
 295     adr_range_t *r=adr_get_range(fw,adr);
 296     if(!r) {
 297         return NULL;
 298     }
 299     switch(r->type) {
 300         case ADR_RANGE_RAM_CODE:
 301         case ADR_RANGE_INIT_DATA:
 302         case ADR_RANGE_ROM:
 303             return (r->buf)+(adr - r->start);
 304         default:
 305             return NULL;
 306     }
 307 }
 308 
 309 // return constant string describing type
 310 const char* adr_range_type_str(int type)
 311 {
 312     switch(type) {
 313         case ADR_RANGE_INVALID:
 314             return "(invalid)";
 315         case ADR_RANGE_ROM:
 316             return "ROM";
 317         case ADR_RANGE_RAM_CODE:
 318             return "RAM code";
 319         case ADR_RANGE_INIT_DATA:
 320             return "RAM data";
 321         default:
 322             return "(unknown)";
 323     }
 324 }
 325 
 326 // return constant string describing type and flags
 327 const char* adr_range_desc_str(adr_range_t *r)
 328 {
 329     switch(r->type) {
 330         case ADR_RANGE_INVALID:
 331             return "(invalid)";
 332         case ADR_RANGE_ROM:
 333             return "ROM";
 334         case ADR_RANGE_RAM_CODE:
 335             if(r->flags & ADR_RANGE_FL_EVEC) {
 336                 return "EVEC";
 337             } else if(r->flags & ADR_RANGE_FL_TCM) {
 338                 return "TCM code";
 339             }
 340             return "RAM code";
 341         case ADR_RANGE_INIT_DATA:
 342             return "RAM data";
 343         default:
 344             return "(unknown)";
 345     }
 346 }
 347 
 348 // return true if adr is in firmware DATA or BSS
 349 int adr_is_var(firmware *fw, uint32_t adr)
 350 {
 351     return (adr > fw->data_start && adr < fw->memisostart);
 352 }
 353 
 354 // return true if adr is in the ROM search range, or one of the copied RAM code regions
 355 int adr_is_main_fw_code(firmware *fw, uint32_t adr)
 356 {
 357     int adr_type = adr_get_range_type(fw,adr);
 358     if(adr_type == ADR_RANGE_RAM_CODE) {
 359         return 1;
 360     }
 361     if(adr_type != ADR_RANGE_ROM) {
 362         return 0;
 363     }
 364     if(adr < fw->rom_code_search_min_adr  || adr > fw->rom_code_search_max_adr) {
 365         return 0;
 366     }
 367     return 1;
 368 }
 369 
 370 /*
 371 return firmware address of 32 bit value, starting at address "start", up to max
 372 */
 373 uint32_t find_u32_adr_range(firmware *fw, uint32_t val, uint32_t start,uint32_t maxadr)
 374 {
 375     // TODO
 376     if(start == 0) {
 377         start=fw->base;
 378     }
 379     if(start & 3) {
 380         fprintf(stderr,"find_u32_adr unaligned start 0x%08x\n",start);
 381         return 0;
 382     }
 383     uint32_t *p=(uint32_t *)adr2ptr(fw,start);
 384     if(!p) {
 385         fprintf(stderr,"find_u32_adr bad start 0x%08x\n",start);
 386         return 0;
 387     }
 388     uint32_t *p_end;
 389     if(maxadr) {
 390         p_end = (uint32_t *)adr2ptr(fw,maxadr);
 391     } else {
 392         p_end = fw->buf32 + fw->size32 - 1;
 393     }
 394     // TODO should use buf ranges
 395     while(p<=p_end) {
 396         if(*p==val) {
 397             return ptr2adr(fw,(uint8_t *)p);
 398         }
 399         p++;
 400     }
 401     return 0;
 402 }
 403 
 404 // as above, full to end of fw
 405 uint32_t find_u32_adr(firmware *fw, uint32_t val, uint32_t start)
 406 {
 407     return find_u32_adr_range(fw,val,start, fw->base + (fw->size8 -4));
 408 }
 409 
 410 // return u32 value at adr
 411 uint32_t fw_u32(firmware *fw, uint32_t adr)
 412 {
 413     uint32_t *p=(uint32_t *)adr2ptr(fw,adr);
 414     if(!p) {
 415         fprintf(stderr,"fw_u32 bad adr 0x%08x\n",adr);
 416         return 0;
 417     }
 418     return *p;
 419 }
 420 
 421 // memcmp, but using a firmware address, returning 1 adr/size out of range
 422 int fw_memcmp(firmware *fw, uint32_t adr,const void *cmp, size_t n)
 423 {
 424     uint32_t *p=(uint32_t *)adr2ptr(fw,adr);
 425     if(!p) {
 426         return 1;
 427     }
 428     if(n >= fw->size8 - (adr - fw->base)) {
 429         return 1;
 430     }
 431     return memcmp(p,cmp,n);
 432 }
 433 
 434 
 435 // ****** address history functions ******
 436 // reset address history to empty
 437 void adr_hist_reset(adr_hist_t *ah)
 438 {
 439     ah->cur=0;
 440     ah->count=0;
 441     // memset shouldn't be needed
 442     // memset(ah->adrs,0,ADR_HIST_SIZE*4);
 443 }
 444 
 445 // return the index of current entry + i. may be negative or positive, wraps. Does not check validity
 446 int adr_hist_index(adr_hist_t *ah, int i)
 447 {
 448     int r=(ah->cur+i)%ADR_HIST_SIZE;
 449     if(r < 0) {
 450         return ADR_HIST_SIZE + r;
 451     }
 452     return r;
 453 }
 454 
 455 // add an entry to address history
 456 void adr_hist_add(adr_hist_t *ah, uint32_t adr)
 457 {
 458     ah->cur=adr_hist_index(ah,1);
 459     ah->adrs[ah->cur]=adr;
 460     if(ah->count < ADR_HIST_SIZE)  {
 461         ah->count++;
 462     }
 463 }
 464 
 465 // return the i'th previous entry in this history, or 0 if not valid (maybe should be -1?)
 466 // i= 0 = most recently disassembled instruction, if any
 467 uint32_t adr_hist_get(adr_hist_t *ah, int i)
 468 {
 469     if(!ah->count || i > ah->count) {
 470         return 0;
 471     }
 472     return ah->adrs[adr_hist_index(ah,-i)];
 473 }
 474 
 475 // ****** instruction analysis utilities ******
 476 // is insn an ARM instruction?
 477 // like cs_insn_group(cs_handle,insn,ARM_GRP_ARM) but doesn't require handle and doesn't check or report errors
 478 int isARM(cs_insn *insn)
 479 {
 480     int i;
 481     for(i=0;i<insn->detail->groups_count;i++) {
 482         if(insn->detail->groups[i] == ARM_GRP_ARM) {
 483             return 1;
 484         }
 485     }
 486     return 0;
 487 }
 488 
 489 /*
 490 is insn a PC relative load?
 491 */
 492 int isLDR_PC(cs_insn *insn)
 493 {
 494     return insn->id == ARM_INS_LDR
 495            && insn->detail->arm.op_count == 2
 496            && insn->detail->arm.operands[0].type == ARM_OP_REG
 497            && insn->detail->arm.operands[1].type == ARM_OP_MEM
 498            && insn->detail->arm.operands[1].mem.base == ARM_REG_PC;
 499 
 500 }
 501 
 502 /*
 503 is insn a PC relative load to PC?
 504 */
 505 int isLDR_PC_PC(cs_insn *insn)
 506 {
 507     if(!isLDR_PC(insn)) {
 508         return 0;
 509     }
 510     return (insn->detail->arm.operands[0].reg == ARM_REG_PC);
 511 }
 512 
 513 //  subw    rd, pc, #x?
 514 int isSUBW_PC(cs_insn *insn)
 515 {
 516     return(insn->id == ARM_INS_SUBW
 517        && insn->detail->arm.op_count == 3
 518        && insn->detail->arm.operands[0].type == ARM_OP_REG
 519        && insn->detail->arm.operands[0].reg != ARM_REG_PC
 520        && insn->detail->arm.operands[1].type == ARM_OP_REG
 521        && insn->detail->arm.operands[1].reg == ARM_REG_PC
 522        && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 523 }
 524 
 525 //  addw    rd, pc, #x?
 526 int isADDW_PC(cs_insn *insn)
 527 {
 528     return(insn->id == ARM_INS_ADDW
 529        && insn->detail->arm.op_count == 3
 530        && insn->detail->arm.operands[0].type == ARM_OP_REG
 531        && insn->detail->arm.operands[0].reg != ARM_REG_PC
 532        && insn->detail->arm.operands[1].type == ARM_OP_REG
 533        && insn->detail->arm.operands[1].reg == ARM_REG_PC
 534        && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 535 }
 536 
 537 // is insn ADD rd, pc, #x  (only generated for ARM in capstone)
 538 int isADD_PC(cs_insn *insn)
 539 {
 540     return (insn->id == ARM_INS_ADD
 541             && insn->detail->arm.op_count == 3
 542             && insn->detail->arm.operands[0].reg != ARM_REG_PC
 543             && insn->detail->arm.operands[1].type == ARM_OP_REG
 544             && insn->detail->arm.operands[1].reg == ARM_REG_PC
 545             && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 546 }
 547 
 548 // is insn SUB rd, pc, #x  (only generated for ARM in capstone)
 549 int isSUB_PC(cs_insn *insn)
 550 {
 551     return (insn->id == ARM_INS_SUB
 552             && insn->detail->arm.op_count == 3
 553             && insn->detail->arm.operands[0].reg != ARM_REG_PC
 554             && insn->detail->arm.operands[1].type == ARM_OP_REG
 555             && insn->detail->arm.operands[1].reg == ARM_REG_PC
 556             && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 557 }
 558 
 559 // does insn look like a function return?
 560 int isRETx(cs_insn *insn)
 561 {
 562     // BX LR
 563     if(insn->id == ARM_INS_BX
 564             && insn->detail->arm.op_count == 1
 565             && insn->detail->arm.operands[0].type == ARM_OP_REG
 566             && insn->detail->arm.operands[0].reg == ARM_REG_LR) {
 567         return 1;
 568     }
 569 
 570     // TODO LDR pc, [sp], imm is somewhat common, but could also be function pointer call
 571 
 572     // POP. capstone translates LDMFD   SP!,... in arm code to pop
 573     if(insn->id == ARM_INS_POP) {
 574         int i;
 575         for(i=0; i < insn->detail->arm.op_count; i++) {
 576             if(insn->detail->arm.operands[i].type == ARM_OP_REG
 577                 && insn->detail->arm.operands[i].reg == ARM_REG_PC) {
 578                 return 1;
 579             }
 580         }
 581     }
 582     // MOV PC, LR (some tools translate this to RET)
 583     if(insn->id == ARM_INS_MOV
 584             && insn->detail->arm.operands[0].type == ARM_OP_REG
 585             && insn->detail->arm.operands[0].reg == ARM_REG_PC
 586             && insn->detail->arm.operands[1].type == ARM_OP_REG
 587             && insn->detail->arm.operands[1].reg == ARM_REG_LR) {
 588         return 1;
 589     }
 590     return 0;
 591 }
 592 
 593 // does insn push LR (function start -ish)
 594 int isPUSH_LR(cs_insn *insn)
 595 {
 596     if(insn->id != ARM_INS_PUSH) {
 597         return 0;
 598     }
 599     int i;
 600     for(i=0; i < insn->detail->arm.op_count; i++) {
 601         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 602             && insn->detail->arm.operands[i].reg == ARM_REG_LR) {
 603             return 1;
 604         }
 605     }
 606     return 0;
 607 }
 608 
 609 // does insn pop LR (func end before tail call)
 610 int isPOP_LR(cs_insn *insn)
 611 {
 612     if(insn->id != ARM_INS_POP) {
 613         return 0;
 614     }
 615     int i;
 616     for(i=0; i < insn->detail->arm.op_count; i++) {
 617         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 618             && insn->detail->arm.operands[i].reg == ARM_REG_LR) {
 619             return 1;
 620         }
 621     }
 622     return 0;
 623 }
 624 
 625 // does insn pop PC
 626 int isPOP_PC(cs_insn *insn)
 627 {
 628     if(insn->id != ARM_INS_POP) {
 629         return 0;
 630     }
 631     int i;
 632     for(i=0; i < insn->detail->arm.op_count; i++) {
 633         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 634             && insn->detail->arm.operands[i].reg == ARM_REG_PC) {
 635             return 1;
 636         }
 637     }
 638     return 0;
 639 }
 640 
 641 // is the instruction ADD* rx, imm
 642 int isADDx_imm(cs_insn *insn)
 643 {
 644     return ((insn->id == ARM_INS_ADD || insn->id == ARM_INS_ADDW) && insn->detail->arm.operands[1].type == ARM_OP_IMM);
 645 }
 646 // is the instruction SUB* rx, imm
 647 int isSUBx_imm(cs_insn *insn)
 648 {
 649     return (IS_INSN_ID_SUBx(insn->id) && insn->detail->arm.operands[1].type == ARM_OP_IMM);
 650 }
 651 
 652 // is the instruction an ADR or ADR-like instruction?
 653 int isADRx(cs_insn *insn)
 654 {
 655     return ((insn->id == ARM_INS_ADR)
 656         || isSUBW_PC(insn)
 657         || isADDW_PC(insn)
 658         || (isARM(insn) && (isADD_PC(insn) || isSUB_PC(insn))));
 659 }
 660 
 661 // if insn is LDR Rn, [pc,#x] return pointer to value, otherwise null
 662 uint32_t* LDR_PC2valptr_thumb(firmware *fw, cs_insn *insn)
 663 {
 664     if(!isLDR_PC(insn)) {
 665         return NULL;
 666     }
 667     uint32_t adr;
 668     // TODO NOTE doesn't do anything with scale (which can supposedly be neg?),
 669     // appears correct for examples seen so far
 670     adr=(insn->address&~3)+4+insn->detail->arm.operands[1].mem.disp;
 671     return (uint32_t *)adr2ptr(fw,adr);
 672 }
 673 
 674 uint32_t* LDR_PC2valptr_arm(firmware *fw, cs_insn *insn)
 675 {
 676     if(!isLDR_PC(insn)) {
 677         return NULL;
 678     }
 679     uint32_t adr;
 680     // TODO NOTE doesn't do anything with scale (which can supposedly be neg?),
 681     // appears correct for examples seen so far
 682     adr=insn->address+8+insn->detail->arm.operands[1].mem.disp;
 683     return (uint32_t *)adr2ptr(fw,adr);
 684 }
 685 
 686 uint32_t* LDR_PC2valptr(firmware *fw, cs_insn *insn)
 687 {
 688     if(isARM(insn)) {
 689        return LDR_PC2valptr_arm(fw,insn);
 690     } else {
 691        return LDR_PC2valptr_thumb(fw,insn);
 692     }
 693 }
 694 
 695 // return the address of value loaded by LDR rd, [pc, #x] or 0 if not LDR PC
 696 uint32_t LDR_PC2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 697 {
 698     if(!isLDR_PC(insn)) {
 699         return 0;
 700     }
 701     if(isARM(insn)) {
 702        return insn->address+8+insn->detail->arm.operands[1].mem.disp;
 703     } else {
 704        return (insn->address&~3)+4+insn->detail->arm.operands[1].mem.disp;
 705     }
 706 }
 707 
 708 // return value generated by an ADR or ADR-like instruction, or 0 (which should be rarely generated by ADR)
 709 uint32_t ADRx2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 710 {
 711     if(insn->id == ARM_INS_ADR) {
 712         return (insn->address&~3)+4+insn->detail->arm.operands[1].imm;
 713     }
 714     if(isSUBW_PC(insn)) {
 715         return (insn->address&~3)+4-insn->detail->arm.operands[2].imm;
 716     }
 717     if(isADDW_PC(insn)) {
 718         return (insn->address&~3)+4+insn->detail->arm.operands[2].imm;
 719     }
 720     if(isARM(insn)) {
 721         if(isADD_PC(insn)) {
 722             return insn->address+8+insn->detail->arm.operands[2].imm;
 723         }
 724         if(isSUB_PC(insn)) {
 725             return insn->address+8-insn->detail->arm.operands[2].imm;
 726         }
 727     }
 728     return 0;
 729 }
 730 
 731 // return the value generated by an ADR (ie, the location of the value as a firmware address)
 732 // NOTE not checked if it is in dump
 733 uint32_t ADR2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 734 {
 735     if(insn->id != ARM_INS_ADR) {
 736         return 0;
 737     }
 738     // TODO - capstone doesn't appear to generate ADR for ARM
 739     /*
 740     if(cs_insn_group(fw->cs_handle,insn,ARM_GRP_ARM)) {
 741        return 0;
 742     }
 743     */
 744     return (insn->address&~3)+4+insn->detail->arm.operands[1].imm;
 745 }
 746 
 747 // if insn is adr/ AKA ADD Rn, pc,#x return pointer to value, otherwise null
 748 uint32_t* ADR2valptr(firmware *fw, cs_insn *insn)
 749 {
 750     uint32_t adr=ADR2adr(fw,insn);
 751     return (uint32_t *)adr2ptr(fw,adr);
 752 }
 753 
 754 // return value loaded by PC relative LDR instruction, or 0 if out of range
 755 uint32_t LDR_PC2val(firmware *fw, cs_insn *insn)
 756 {
 757     uint32_t *p=LDR_PC2valptr(fw,insn);
 758     if(p) {
 759         return *p;
 760     }
 761     return 0;
 762 }
 763 
 764 // return value loaded by PC relative LDR pc..., or 0 if not matching or out of range
 765 uint32_t LDR_PC_PC_target(firmware *fw, cs_insn *insn)
 766 {
 767     if(!isLDR_PC_PC(insn)) {
 768         return 0;
 769     }
 770     return LDR_PC2val(fw,insn);
 771 }
 772 
 773 // return the target of B instruction, or 0 if current instruction isn't BL
 774 uint32_t B_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 775 {
 776     if(insn->id == ARM_INS_B) {
 777         return insn->detail->arm.operands[0].imm;
 778     }
 779     return 0; // TODO could be valid
 780 }
 781 
 782 
 783 // return the target of CBZ / CBNZ instruction, or 0 if current instruction isn't CBx
 784 uint32_t CBx_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 785 {
 786     if(insn->id == ARM_INS_CBZ || insn->id == ARM_INS_CBNZ) {
 787         return insn->detail->arm.operands[1].imm;
 788     }
 789     return 0; // TODO could be valid
 790 }
 791 
 792 // return the target of BLX instruction, or 0 if current instruction isn't BLX imm
 793 uint32_t BLXimm_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 794 {
 795     if(insn->id == ARM_INS_BLX && insn->detail->arm.operands[0].type == ARM_OP_IMM) {
 796         return insn->detail->arm.operands[0].imm;
 797     }
 798     return 0; // TODO could be valid
 799 }
 800 
 801 
 802 // return the target of BL instruction, or 0 if current instruction isn't BL
 803 uint32_t BL_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 804 {
 805     if(insn->id == ARM_INS_BL) {
 806         return insn->detail->arm.operands[0].imm;
 807     }
 808     return 0; // TODO could be valid
 809 }
 810 
 811 // as above, but also including B for tail calls
 812 uint32_t B_BL_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 813 {
 814     if(insn->id == ARM_INS_B || insn->id == ARM_INS_BL) {
 815         return insn->detail->arm.operands[0].imm;
 816     }
 817     return 0; // TODO could be valid
 818 }
 819 
 820 //
 821 // as above, but also including BLX imm
 822 uint32_t B_BL_BLXimm_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 823 {
 824     if(insn->id == ARM_INS_B
 825         || insn->id == ARM_INS_BL
 826         || (insn->id == ARM_INS_BLX && insn->detail->arm.operands[0].type == ARM_OP_IMM)) {
 827         return insn->detail->arm.operands[0].imm;
 828     }
 829     return 0; // TODO could be valid
 830 }
 831 
 832 // BX PC (mode change, small jump) Does NOT set thumb bit
 833 uint32_t BX_PC_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 834 {
 835     if(insn->id == ARM_INS_BX
 836         && insn->detail->arm.operands[0].type == ARM_OP_REG
 837         && insn->detail->arm.operands[0].reg == ARM_REG_PC) {
 838         if(insn->size == 2) { // thumb
 839             // per arms docs, thumb bx pc from unaligned address is "undefined"
 840             // assume non-instruction
 841             if((insn->address & 2) == 2) {
 842                 return 0;
 843             }
 844             return (uint32_t)(insn->address) + 4;
 845         } else {
 846             return (uint32_t)(insn->address) + 8;
 847         }
 848     }
 849     return 0;
 850 }
 851 
 852 // get the (likely) range of jumptable entries from a pc relative TBB or TBH instruction
 853 // returns 0 on error or if instruction is not TBB/TBH
 854 // returns 1 if instruction is TBB/TBH [PC,...]
 855 int get_TBx_PC_info(firmware *fw,iter_state_t *is, tbx_info_t *ti)
 856 {
 857     if(!(is->insn->id == ARM_INS_TBH || is->insn->id == ARM_INS_TBB) || is->insn->detail->arm.operands[0].mem.base != ARM_REG_PC) {
 858         return 0;
 859     }
 860     ti->start=(uint32_t)is->adr; // after current instruction
 861     ti->first_target=0;
 862     ti->bytes=(is->insn->id == ARM_INS_TBH)?2:1;
 863 
 864     uint32_t max_adr;
 865     // max possible (assuming jumptable is contiguous)
 866     if(ti->bytes==1) {
 867         max_adr=ti->start+(2*255);
 868     } else {
 869         max_adr=ti->start+(2*65535);
 870     }
 871     arm_reg i_reg=is->insn->detail->arm.operands[0].mem.index;
 872     // backtrack looking for
 873     // cmp index reg,#imm
 874     // ...
 875     // bhs ...
 876     int max_backtrack = 8;
 877     if(is->ah.count - 1 < max_backtrack) {
 878         max_backtrack = is->ah.count-1;
 879     }
 880 
 881     int max_count=0;
 882     int found_bhs=0;
 883     int i;
 884     for(i=1;i<=max_backtrack;i++) {
 885         fw_disasm_iter_single(fw,adr_hist_get(&is->ah,i)); // thumb state comes from hist
 886         if(fw->is->insn->id == ARM_INS_B && fw->is->insn->detail->arm.cc == ARM_CC_HS) {
 887             found_bhs=1;
 888             continue;
 889         }
 890         // TODO lots of other ways condition code or reg could be changed in between
 891         if(found_bhs && fw->is->insn->id == ARM_INS_CMP) {
 892             // cmp with correct operands, assume number of jumptable entries
 893             if((arm_reg)fw->is->insn->detail->arm.operands[0].reg == i_reg
 894                 || fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
 895                 max_count = fw->is->insn->detail->arm.operands[1].imm;
 896             }
 897             // otherwise, give up
 898             break;
 899         }
 900     }
 901     if(max_count) {
 902         max_adr = ti->start+max_count*ti->bytes;
 903         //printf("get_TBx_PC_info: max_count %d start 0x%08x max_adr=0x%08x\n",max_count,ti->start,max_adr);
 904     }
 905     uint32_t adr=ti->start;
 906     while(adr < max_adr) {
 907         uint8_t *p=adr2ptr(fw,adr);
 908         if(!p) {
 909             fprintf(stderr,"get_TBx_PC_info: jumptable outside of valid address range at 0x%08x\n",adr);
 910             return 0;
 911         }
 912         uint16_t off;
 913         if(ti->bytes==1) {
 914             off=(uint16_t)*p;
 915         } else {
 916             off=*(uint16_t *)p;
 917         }
 918 
 919         // 0, probably padding at the end (could probably break here)
 920         // note shouldn't be padding on tbh, since aligned for thumb
 921         if(!off) {
 922             break;
 923         }
 924         uint32_t target = ti->start+2*off;
 925         // may indicate non-jumptable entry, if count not found, so don't increment adr
 926         if(target <= adr) {
 927             fprintf(stderr,"get_TBx_PC_info: jumptable target 0x%08x inside jumptable %d at 0x%08x\n",target,off,adr);
 928             break;
 929         }
 930         if(!ti->first_target || target < ti->first_target) {
 931             ti->first_target=target;
 932             if(target < max_adr) {
 933                 max_adr=target; // assume jump table ends at/before first target
 934             }
 935         }
 936         adr+=ti->bytes;
 937     }
 938     // if found count, assume it's right
 939     if(max_count) {
 940         ti->count=max_count;
 941     } else {
 942         // otherwise, use final address
 943         ti->count=(adr-ti->start)/ti->bytes;
 944     }
 945     return 1;
 946 }
 947 
 948 // TODO should have variants of above including LDR pc, [pc, #x] for some of the above
 949 
 950 // ****** disassembly iterator utilities ******
 951 // allocate a new iterator state, optionally initializing at adr (0/invalid OK)
 952 iter_state_t *disasm_iter_new(firmware *fw, uint32_t adr)
 953 {
 954     iter_state_t *is=(iter_state_t *)malloc(sizeof(iter_state_t));
 955     // it doesn't currently appear to matter which handle is used to allocate
 956     // only used for overridable malloc functions and error reporting
 957     is->insn=cs_malloc(fw->cs_handle_arm);
 958     disasm_iter_init(fw,is,adr);
 959     return is;
 960 }
 961 
 962 // free iterator state and associated resources
 963 void disasm_iter_free(iter_state_t *is)
 964 {
 965     cs_free(is->insn,1);
 966     free(is);
 967     return;
 968 }
 969 
 970 // set iterator to adr, without clearing history (for branch following)
 971 // thumb bit in adr sets mode
 972 int disasm_iter_set(firmware *fw, iter_state_t *is, uint32_t adr)
 973 {
 974     // set handle based on thumb bit to allow disassembly
 975     if(ADR_IS_THUMB(adr)) {
 976         is->cs_handle=fw->cs_handle_thumb;
 977         is->thumb=1;
 978         is->insn_min_size=2;
 979         adr=ADR_CLEAR_THUMB(adr);// ADR used for iteration must not contain thumb bit
 980     } else {
 981         is->cs_handle=fw->cs_handle_arm;
 982         is->thumb=0;
 983         is->insn_min_size=4;
 984         if(!ADR_IS_ALIGN4(adr)) {
 985             fprintf(stderr,"disasm_iter_set: unaligned ARM address 0x%08x\n",adr);
 986             is->code=NULL;
 987             is->size=0;
 988             is->adr=0;
 989             return 0;
 990         }
 991     }
 992     uint8_t *p=adr2ptr(fw,adr);
 993     if(!p) {
 994 // TODO invalid currently allowed, for new
 995 //        fprintf(stderr,"disasm_iter_set: bad address 0x%08x\n",adr);
 996         is->code=NULL; // make first iter fail
 997         is->size=0;
 998         is->adr=0;
 999         return 0;
1000     }
1001     // TODO should maybe mark is.insn invalid?
1002     is->code=p;
1003     is->size=fw->size8 - (p-fw->buf8);
1004     is->adr=adr;
1005     return 1;
1006 }
1007 
1008 // initialize iterator state at adr, clearing history
1009 int disasm_iter_init(__attribute__ ((unused))firmware *fw, iter_state_t *is, uint32_t adr)
1010 {
1011     adr_hist_reset(&is->ah);
1012     return disasm_iter_set(fw,is,adr);
1013 }
1014 
1015 // disassemble next instruction, recording address in history
1016 // returns false if state invalid or disassembly fails
1017 // if disassembly fails, is->adr is not incremented
1018 int disasm_iter(__attribute__ ((unused))firmware *fw, iter_state_t *is)
1019 {
1020     // iter_start not called or invalid
1021     if(!is->code) {
1022         return 0;
1023     }
1024     adr_hist_add(&is->ah,(uint32_t)is->adr | is->thumb); // record thumb state to allow backtracking through state changes
1025     return cs_disasm_iter(is->cs_handle, &is->code, &is->size, &is->adr, is->insn);
1026 }
1027 
1028 // re-disassemble the current instruction
1029 // could be useful if turning detail off/on but doesn't seem to help perf much
1030 // NOTE out of date
1031 #if 0
1032 int disasm_iter_redo(firmware *fw,iter_state_t *is) {
1033     if(!is->code || !is->ah.count) {
1034         return 0;
1035     }
1036     is->code -= is->insn->size;
1037     is->adr -= is->insn->size;
1038     is->size += is->insn->size;
1039     // call iter directly, to avoid touching history
1040     return cs_disasm_iter(is->cs_handle, &is->code, &is->size, &is->adr, is->insn);
1041 }
1042 #endif
1043 
1044 // ***** disassembly utilities operating on the default iterator state *****
1045 /*
1046 initialize iter state to begin iterating at adr
1047 history is cleared
1048 */
1049 int fw_disasm_iter_start(firmware *fw, uint32_t adr)
1050 {
1051     return disasm_iter_init(fw,fw->is,adr);
1052 }
1053 
1054 // disassemble the next instruction, updating cached state
1055 int fw_disasm_iter(firmware *fw)
1056 {
1057     return disasm_iter(fw,fw->is);
1058 }
1059 
1060 // disassemble single instruction at given adr, updating cached values
1061 // history is cleared
1062 int fw_disasm_iter_single(firmware *fw, uint32_t adr)
1063 {
1064     fw_disasm_iter_start(fw,adr);
1065     return fw_disasm_iter(fw);
1066 }
1067 
1068 
1069 // ****** standalone disassembly without an iter_state ******
1070 /*
1071 disassemble up to count instructions starting at firmware address adr
1072 allocates and returns insns in insn, can be freed with cs_free(insn, count)
1073 */
1074 #if 0
1075 size_t fw_disasm_adr(firmware *fw, uint32_t adr, unsigned count, cs_insn **insn)
1076 {
1077     uint8_t *p=adr2ptr(fw,adr);
1078     if(!p) {
1079         *insn=NULL; // ?
1080         return 0;
1081     }
1082     return cs_disasm(fw->cs_handle, p, fw->size8 - (p-fw->buf8), adr, count, insn);
1083 }
1084 #endif
1085 
1086 // ***** utilities for searching disassembly over large ranges ******
1087 /*
1088 iterate over firmware disassembling, calling callback described above after each
1089 successful disassembly iteration.  If disassembly fails, the iter state is advanced
1090 minimum instruction size without calling the callback.
1091 starts at address is taken from the iter_state, which should be initialized with
1092 disasm_iter_new(), disasm_iter_init(), or a previous search or iter call.
1093 end defaults to end of ram code or rom code (before init data, if known), based on start
1094 v1 and udata are provided to the callback
1095 */
1096 uint32_t fw_search_insn(firmware *fw, iter_state_t *is, search_insn_fn f, uint32_t v1, void *udata, uint32_t adr_end)
1097 {
1098     uint32_t adr_start=is->adr;
1099     adr_range_t *r_start=adr_get_range(fw,adr_start);
1100     if(!r_start) {
1101         fprintf(stderr,"fw_search_insn: invalid start address 0x%08x\n",adr_start);
1102         return 0;
1103     }
1104 
1105     // default to end of start range
1106     if(!adr_end) {
1107         if(r_start->type == ADR_RANGE_ROM) {
1108             adr_end = fw->rom_code_search_max_adr;
1109         } else {
1110             adr_end=r_start->start + r_start->bytes - is->insn_min_size;
1111         }
1112     }
1113     adr_range_t *r_end=adr_get_range(fw,adr_end);
1114 
1115     if(!r_end) {
1116         fprintf(stderr,"fw_search_insn: invalid end address 0x%08x\n",adr_end);
1117         return 0;
1118     }
1119     // ignore thumb bit on end adr
1120     adr_end=ADR_CLEAR_THUMB(adr_end);
1121 
1122     if((r_start != r_end) || (adr_end < adr_start)) {
1123         fprintf(stderr,"fw_search_insn: invalid address range 0x%08x 0x%08x\n",adr_start,adr_end);
1124         return 0;
1125     }
1126 
1127     uint32_t adr=adr_start;
1128     // don't bother with buf ranges for RAM code
1129     if(r_start->type != ADR_RANGE_ROM) {
1130         while(adr < adr_end) {
1131             if(disasm_iter(fw,is)) {
1132                 uint32_t r=f(fw,is,v1,udata);
1133                 if(r) {
1134                     return r;
1135                 }
1136                 adr=(uint32_t)is->adr; // adr was updated by iter or called sub
1137             } else {
1138                 // disassembly failed
1139                 // increment by minimum instruction size and re-init
1140                 adr=adr+is->insn_min_size;
1141                 if(!disasm_iter_init(fw,is,adr|is->thumb)) {
1142                     fprintf(stderr,"fw_search_insn: disasm_iter_init failed\n");
1143                     return 0;
1144                 }
1145              }
1146         }
1147         return 0;
1148     }
1149     BufRange *br=fw->br;
1150     // TODO might want to (optionally?) turn off details? For now, caller can set, doesn't seem to help perf much
1151     // TODO when searching ROM, could skip over RAM copied areas (currently just limit default range)
1152     while(br && adr < adr_end) {
1153         uint32_t *p_adr=(uint32_t *)adr2ptr(fw,(uint32_t)adr);
1154         uint32_t *br_end = br->p + br->len;
1155         uint32_t adr_chunk_end = ptr2adr(fw,(uint8_t*)br_end);
1156         if(adr_end < adr_chunk_end) {
1157             adr_chunk_end = adr_end;
1158         }
1159         // address is before start of current range, adjust
1160         if(p_adr < br->p) {
1161             adr=ptr2adr(fw,(uint8_t *)br->p);
1162             if(!disasm_iter_init(fw,is,(uint32_t)adr | is->thumb)) {
1163                 return 0;
1164             }
1165             p_adr=(uint32_t *)adr2ptr(fw,(uint32_t)adr);
1166         }
1167         //printf("br:0x%08x-0x%08x\n",ptr2adr(fw,(uint8_t *)br->p),ptr2adr(fw,(uint8_t *)(br->p+br->len)));
1168         while(adr < adr_chunk_end) {
1169             if(disasm_iter(fw,is)) {
1170                 uint32_t r=f(fw,is,v1,udata);
1171                 if(r) {
1172                     return r;
1173                 }
1174                 adr=(uint32_t)is->adr; // adr was updated by iter or called sub
1175             } else {
1176                 // disassembly failed. cs_disarm_iter does not update address
1177                 // increment by half word and re-init
1178                 adr=adr+is->insn_min_size;
1179                 if(!disasm_iter_init(fw,is,adr|is->thumb)) {
1180                     fprintf(stderr,"fw_search_insn: disasm_iter_init failed\n");
1181                     return 0;
1182                 }
1183              }
1184         }
1185         // next range
1186         br=br->next;
1187     }
1188     return 0;
1189 }
1190 
1191 // ****** callbacks for use with fw_search_insn ******
1192 
1193 // search for constant references
1194 uint32_t search_disasm_const_ref(firmware *fw, iter_state_t *is, uint32_t val, __attribute__ ((unused))void *unused)
1195 {
1196 //    printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1197     uint32_t av=ADRx2adr(fw,is->insn);
1198     if(av) {
1199 //        printf("adr 0x%08x\n",av);
1200         if(av == val) {
1201             return (uint32_t)is->insn->address;
1202         }
1203         return 0;
1204     }
1205     uint32_t *pv=LDR_PC2valptr(fw,is->insn);
1206     if(pv) {
1207 //        printf("ldr 0x%08x\n",*pv);
1208         if(*pv == val) {
1209             return (uint32_t)is->insn->address;
1210         }
1211     }
1212     return 0;
1213 }
1214 
1215 // search for string ref
1216 uint32_t search_disasm_str_ref(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t val, void *udata)
1217 {
1218     const char *str=(const char *)udata;
1219 //    printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1220     uint32_t av=ADRx2adr(fw,is->insn);
1221     if(av) {
1222 //        printf("adr 0x%08x\n",av);
1223         char *cmp=(char *)adr2ptr_with_data(fw,av);
1224         if(cmp && (strcmp(cmp,str) == 0)) {
1225             return (uint32_t)is->insn->address;
1226         }
1227         return 0;
1228     }
1229     uint32_t *pv=LDR_PC2valptr(fw,is->insn);
1230     if(pv) {
1231 //        printf("ldr 0x%08x\n",*pv);
1232         char *cmp=(char *)adr2ptr_with_data(fw,*pv);
1233         if(cmp && (strcmp(cmp,str) == 0)) {
1234             return (uint32_t)is->insn->address;
1235         }
1236     }
1237     return 0;
1238 }
1239 
1240 // search for calls/jumps to immediate addresses
1241 // thumb bit in address should be set appropriately
1242 // returns 1 if found, address can be obtained from insn
1243 uint32_t search_disasm_calls(firmware *fw, iter_state_t *is, uint32_t val, __attribute__ ((unused))void *unused)
1244 {
1245     //printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1246     uint32_t sub=get_branch_call_insn_target(fw,is);
1247     if(sub) {
1248         if(sub == val) {
1249             return 1;
1250         }
1251     }
1252     return 0;
1253 }
1254 
1255 // a search_calls_multi_fn that just returns 1
1256 int search_calls_multi_end(__attribute__ ((unused))firmware *fw, __attribute__ ((unused))iter_state_t *is, __attribute__ ((unused))uint32_t adr) {
1257     return 1;
1258 }
1259 
1260 
1261 // Search for calls to multiple functions (more efficient than multiple passes)
1262 // if adr is found in null terminated search_calls_multi_data array, returns fn return value
1263 // otherwise 0
1264 uint32_t search_disasm_calls_multi(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t unused, void *userdata)
1265 {
1266     search_calls_multi_data_t *data=(search_calls_multi_data_t *)userdata;
1267     uint32_t sub=get_branch_call_insn_target(fw,is);
1268     if(sub) {
1269         while(data->adr) {
1270             if(data->adr == sub) {
1271                 return data->fn(fw,is,sub);
1272             }
1273             data++;
1274         }
1275     }
1276     return 0;
1277 }
1278 
1279 // as above, but check for single level of veneer
1280 uint32_t search_disasm_calls_veneer_multi(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t unused, void *userdata)
1281 {
1282     search_calls_multi_data_t *data=(search_calls_multi_data_t *)userdata;
1283     uint32_t sub=get_branch_call_insn_target(fw,is);
1284     if(sub) {
1285         while(data->adr) {
1286             if(data->adr == sub) {
1287                 return data->fn(fw,is,sub);
1288             }
1289             data++;
1290         }
1291         uint32_t veneer=0;
1292         fw_disasm_iter_single(fw,sub);
1293         veneer=get_branch_call_insn_target(fw,fw->is);
1294         data=(search_calls_multi_data_t *)userdata;
1295         while(data->adr) {
1296             if(data->adr == veneer) {
1297                 return data->fn(fw,is,sub);
1298             }
1299             data++;
1300         }
1301     }
1302     return 0;
1303 }
1304 
1305 // ****** utilities for extracting register values ******
1306 /*
1307 backtrack through is_init state history picking up constants loaded into r0-r3
1308 return bitmask of regs with values found
1309 affects fw->is, does not affect is_init
1310 
1311 NOTE values may be inaccurate for many reasons, doesn't track all reg affecting ops,
1312 doesn't account for branches landing in the middle of inspected code
1313 doesn't account for many conditional cases
1314 */
1315 int get_call_const_args(firmware *fw, iter_state_t *is_init, int max_backtrack, uint32_t *res)
1316 {
1317     int i;
1318     /*
1319     static int dbg_count=0;
1320     if(is_init->insn->address==...) {
1321         dbg_count=1;
1322     } else {
1323         dbg_count=0;
1324     }
1325     */
1326 
1327     // init regs to zero (to support adds etc)
1328     for (i=0;i<4;i++) {
1329         res[i]=0;
1330     }
1331 
1332     // count includes current instruction (i.e. BL of call)
1333     if(is_init->ah.count <= 1) {
1334         return 0;
1335     }
1336     if(is_init->ah.count - 1 < max_backtrack) {
1337         /*
1338         if(dbg_count > 0) {
1339             printf("max_backtrack %d hist count %d\n",max_backtrack,is_init->ah.count);
1340         }
1341         */
1342         max_backtrack = is_init->ah.count-1;
1343     }
1344     uint32_t found_bits=0; // registers with known const values
1345     uint32_t known_bits=0; // registers with some value
1346 
1347     for(i=1;i<=max_backtrack && known_bits !=0xf;i++) {
1348         // TODO going backwards and calling start each time inefficient
1349         // forward could also find multi-instruction constants in some cases (e.g mov + add, movw + movt)
1350         fw_disasm_iter_single(fw,adr_hist_get(&is_init->ah,i)); // thumb state comes from hist
1351         /*
1352         if(dbg_count > 0) {
1353             printf("backtrack %d:%d  ",dbg_count,i);
1354             printf("%"PRIx64" %s %s\n",fw->is->insn->address,fw->is->insn->mnemonic, fw->is->insn->op_str);
1355         }
1356         */
1357         arm_insn insn_id = fw->is->insn->id;
1358         // BL, BLX etc will trash r0-r3
1359         // only break on unconditional - optimistic, could produce incorrect results
1360         if((insn_id == ARM_INS_BL || insn_id == ARM_INS_BLX
1361             // B/BX could mean execution goes somewhere totally different, but in practice it often just skipping over a word of data...
1362              /*|| insn_id == ARM_INS_B || insn_id == ARM_INS_BX*/)
1363              && fw->is->insn->detail->arm.cc == ARM_CC_AL) {
1364             break;
1365         }
1366 
1367         // if the first op isn't REG, continue
1368         // TODO lots of instructions could affect reg even if not first op
1369         if(fw->is->insn->detail->arm.operands[0].type != ARM_OP_REG) {
1370             continue;
1371         }
1372         arm_reg rd = fw->is->insn->detail->arm.operands[0].reg;
1373         // capstone arm.h regs enum R0-R12 are ordered
1374         // enum has entries before R0
1375         if(rd < ARM_REG_R0 || rd > ARM_REG_R3) {
1376             continue;
1377         }
1378 
1379         int rd_i = rd - ARM_REG_R0;
1380         uint32_t rd_bit = 1 << rd_i;
1381         // if we don't already have something for this reg
1382         if(!(known_bits & rd_bit)) {
1383             // know something has been done to this reg
1384             // note doesn't account for conditionals
1385             known_bits |=rd_bit;
1386             // is it an LDR
1387             uint32_t *pv=LDR_PC2valptr(fw,fw->is->insn);
1388             if(pv) {
1389                 res[rd_i] += *pv;
1390 //                if(dbg_count) printf("found ldr r%d,=0x%08x\n",rd_i,res[rd_i]);
1391                 found_bits |=rd_bit;
1392                 continue;
1393             }
1394             uint32_t v=ADRx2adr(fw,fw->is->insn); // assumes ADR doesn't generate 0, probably safe
1395             if(v) {
1396                 res[rd_i] += v;
1397 //                 if(dbg_count) printf("found adrx r%d,0x%08x\n",rd_i,res[rd_i]);
1398                 found_bits |=rd_bit;
1399                 continue;
1400             }
1401             // immediate MOV note MOVT combinations, not accounted for, some handled ADDs below
1402             if( IS_INSN_ID_MOVx(insn_id)
1403                 && fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
1404                 res[rd_i] += fw->is->insn->detail->arm.operands[1].imm;
1405 //                if(dbg_count) printf("found move r%d,#0x%08x\n",rd_i,res[rd_i]);
1406                 found_bits |=rd_bit;
1407             } else if(isADDx_imm(fw->is->insn)) {
1408                 res[rd_i] += fw->is->insn->detail->arm.operands[1].imm;
1409 //                if(dbg_count) printf("found add r%d,#0x%08x\n",rd_i,res[rd_i]);
1410                 // pretend reg is not known
1411                 known_bits ^=rd_bit;
1412                 // do not set found bit here
1413             } else if(isSUBx_imm(fw->is->insn)) {
1414                 res[rd_i] = (int)(res[rd_i]) - fw->is->insn->detail->arm.operands[1].imm;
1415 //                if(dbg_count) printf("found add r%d,#0x%08x\n",rd_i,res[rd_i]);
1416                 // pretend reg is not known
1417                 known_bits ^=rd_bit;
1418                 // do not set found bit here
1419             }/* else {
1420             }
1421             */
1422         }
1423     }
1424 //    if(dbg_count) printf("get_call_const_args found 0x%08x\n",found_bits);
1425     return found_bits;
1426 }
1427 
1428 /*
1429 starting from is_init, look for a direct jump, such as
1430  B <target>
1431  LDR PC, [pc, #x]
1432  BX PC
1433  movw ip, #x
1434  movt ip, #x
1435  bx ip
1436 if found, return target address with thumb bit set appropriately
1437 NOTE does not check for conditional
1438 uses fw->is
1439 does not check CBx, since it would generally be part of a function not a veneer
1440 */
1441 uint32_t get_direct_jump_target(firmware *fw, iter_state_t *is_init)
1442 {
1443     uint32_t adr=B_target(fw,is_init->insn);
1444     // B ... return with thumb set to current mode
1445     if(adr) {
1446         return (adr | is_init->thumb);
1447     }
1448     adr=LDR_PC_PC_target(fw,is_init->insn);
1449     // LDR pc #... thumb is set in the loaded address
1450     if(adr) {
1451         return adr;
1452     }
1453     // BX PC
1454     adr=BX_PC_target(fw,is_init->insn);
1455     if(adr) {
1456         // bx swaps mode
1457         if(is_init->thumb) {
1458             return ADR_CLEAR_THUMB(adr);
1459         } else {
1460             return ADR_SET_THUMB(adr);
1461         }
1462     }
1463     // an immediate move to ip (R12), candidate for multi-instruction veneer
1464     if((is_init->insn->id == ARM_INS_MOV || is_init->insn->id == ARM_INS_MOVW)
1465         && is_init->insn->detail->arm.operands[0].reg == ARM_REG_IP
1466         && is_init->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
1467         adr = is_init->insn->detail->arm.operands[1].imm;
1468         // iter in default state, starting from is_init
1469         if(!fw_disasm_iter_single(fw,is_init->adr | is_init->thumb)) {
1470             fprintf(stderr,"get_direct_jump_target: disasm single failed at 0x%"PRIx64"\n",fw->is->insn->address);
1471             return 0;
1472         }
1473         // check for MOVT ip, #x
1474         if(!(fw->is->insn->id == ARM_INS_MOVT
1475             && fw->is->insn->detail->arm.operands[0].reg == ARM_REG_IP
1476             && fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM)) {
1477 // doesn't match second two insn veneer, not really an error
1478 //            fprintf(stderr,"get_direct_jump_target: not 2 insn ip veneer 0x%"PRIx64"\n",fw->is->insn->address);
1479             return 0;
1480         }
1481         // thumb set in loaded adr
1482         adr = (fw->is->insn->detail->arm.operands[1].imm << 16) | (adr&0xFFFF);
1483         if(!fw_disasm_iter(fw)) {
1484             fprintf(stderr,"get_direct_jump_target: disasm 2 failed at 0x%"PRIx64"\n",fw->is->insn->address);
1485             return 0;
1486         }
1487         // BX ip ?
1488         if(fw->is->insn->id == ARM_INS_BX
1489             && fw->is->insn->detail->arm.operands[0].type == ARM_OP_REG
1490             && fw->is->insn->detail->arm.operands[0].reg == ARM_REG_IP) {
1491             return adr;
1492         }
1493     }
1494     return 0;
1495 }
1496 
1497 /*
1498 return target of any single instruction branch or function call instruction,
1499 with thumb bit set appropriately
1500 returns 0 if current instruction not branch/call
1501 */
1502 uint32_t get_branch_call_insn_target(firmware *fw, iter_state_t *is)
1503 {
1504     uint32_t adr=B_BL_target(fw,is->insn);
1505     if(adr) {
1506         return (adr | is->thumb);
1507     }
1508     // CBx only exists in thumb
1509     if(is->thumb) {
1510         adr=CBx_target(fw,is->insn);
1511         if(adr) {
1512             return ADR_SET_THUMB(adr);
1513         }
1514     }
1515 
1516     adr=BLXimm_target(fw,is->insn);
1517     if(adr) {
1518         if(is->thumb) {
1519             return adr;
1520         } else {
1521             return adr | is->thumb;
1522         }
1523     }
1524 
1525     adr=LDR_PC_PC_target(fw,is->insn);
1526     if(adr) {
1527         return adr;
1528     }
1529     adr=BX_PC_target(fw,is->insn);
1530     if(adr) {
1531         // bx swaps mode
1532         if(is->thumb) {
1533             return ADR_CLEAR_THUMB(adr);
1534         } else {
1535             return ADR_SET_THUMB(adr);
1536         }
1537     }
1538     return 0;
1539 }
1540 
1541 /*
1542 search up to max_search_ins for first LDR, =value
1543 and then match up to max_seq_insns for a sequence like
1544 LDR Rbase,=adr
1545 ... possible intervening ins
1546 SUB Rbase,#adj // optional, may be any add/sub variant
1547 ... possible intervening ins
1548 LDR Rval,[Rbase + #off]
1549 
1550 returns 1 if found, 0 if not
1551 stores registers and constants in *result if successful
1552 
1553 NOTE bad values are possible with intervening ins, short sequences recommended
1554 
1555 TODO similar code for STR would be useful, but in many cases would have to handle load or move into reg_val
1556 */
1557 int find_and_get_var_ldr(firmware *fw,
1558                             iter_state_t *is,
1559                             int max_search_insns,
1560                             int max_seq_insns,
1561                             arm_reg match_val_reg, // ARM_REG_INVALID for any
1562                             var_ldr_desc_t *result)
1563 
1564 {
1565     if(!insn_match_find_next(fw,is,max_search_insns,match_ldr_pc)) {
1566         // printf("find_and_get_var_ldr: LDR PC not found\n");
1567         return 0;
1568     }
1569     var_ldr_desc_t r;
1570     memset(&r,0,sizeof(r));
1571     r.reg_base=is->insn->detail->arm.operands[0].reg;
1572     r.adr_base=LDR_PC2val(fw,is->insn);
1573     int seq_count=1;
1574 
1575     while(seq_count < max_seq_insns) {
1576         // disassembly failed, no match (could ignore..)
1577         if(!disasm_iter(fw,is)) {
1578             return 0;
1579         }
1580         // assume first encountered LDR x,[pc] is the one to use
1581         // give up if we encounter another. Don't know beforehand which reg is base
1582         // NOTE: backward search would allow matching base that eventually ends up in desired reg
1583         if(isLDR_PC(is->insn)) {
1584             // printf("find_and_get_var_ldr: second ldr pc\n");
1585             return  0;
1586         }
1587         seq_count++;
1588         // firmware may use add/sub to get actual firmware base address
1589         if(isADDx_imm(is->insn) || isSUBx_imm(is->insn)) {
1590             if((arm_reg)is->insn->detail->arm.operands[0].reg != r.reg_base) {
1591                 continue;
1592             }
1593             if(isADDx_imm(is->insn)) {
1594                 r.adj=is->insn->detail->arm.operands[1].imm;
1595             } else {
1596                 r.adj=-is->insn->detail->arm.operands[1].imm;
1597             }
1598             if(!disasm_iter(fw,is)) {
1599                 return 0;
1600             }
1601             seq_count++;
1602         } else {
1603             r.adj=0;
1604         }
1605         // try to bail out if base reg trashed
1606         // BL, BLX etc will trash r0-r3, B, BX go somewhere else
1607         // only break on unconditional - optimistic, could produce incorrect results
1608         // can't account for branches into searched code
1609         if((r.reg_base >= ARM_REG_R0 && r.reg_base <= ARM_REG_R3)
1610                 && (is->insn->id == ARM_INS_BL || is->insn->id == ARM_INS_BLX
1611                     || is->insn->id == ARM_INS_B || is->insn->id == ARM_INS_BX)
1612                 && is->insn->detail->arm.cc == ARM_CC_AL) {
1613             // printf("find_and_get_var_ldr: bail B*\n");
1614             return 0;
1615         }
1616         if(is->insn->id != ARM_INS_LDR || (arm_reg)is->insn->detail->arm.operands[1].reg != r.reg_base) {
1617             // other operation on with base reg as first operand, give up
1618             // simplistic, many other things could affect reg
1619             if(is->insn->detail->arm.operands[0].type == ARM_OP_REG && (arm_reg)is->insn->detail->arm.operands[0].reg == r.reg_base) {
1620                 // printf("find_and_get_var_ldr: bail mod base\n");
1621                 return 0;
1622             }
1623             continue;
1624         }
1625         r.reg_val = is->insn->detail->arm.operands[0].reg;
1626         if(match_val_reg != ARM_REG_INVALID && (r.reg_val != match_val_reg)) {
1627             continue;
1628         }
1629         r.off = is->insn->detail->arm.operands[1].mem.disp;
1630         r.adr_adj = r.adr_base + r.adj;
1631         r.adr_final = r.adr_adj + r.off;
1632         memcpy(result,&r,sizeof(r));
1633         return 1;
1634     }
1635     return 0;
1636 }
1637 
1638 /*
1639 find call that receives specified constant in specified r0-r3 reg
1640 search starting from is to max_search_bytes
1641 allow up to max_gap_insns between constant load and call, generally small (4-8 max)
1642 returns address of call with thumb bit set according to mode, or 0 on failure
1643 */
1644 int find_const_ref_call(firmware *fw,
1645                             iter_state_t *is,
1646                             int max_search_bytes,
1647                             int max_gap_insns,
1648                             arm_reg match_reg, // must be R0-R3
1649                             uint32_t val)
1650 
1651 {
1652     if(match_reg < ARM_REG_R0 || match_reg > ARM_REG_R3) {
1653         fprintf(stderr,"find_const_ref_call: invalid match_reg %d\n",match_reg);
1654         return 0;
1655     }
1656     if(max_gap_insns >= ADR_HIST_SIZE) {
1657         fprintf(stderr,"find_const_ref_call: invalid max_gap_insns %d\n",max_gap_insns);
1658         return 0;
1659     }
1660     // search for a ref to constant
1661     while(fw_search_insn(fw,is,search_disasm_const_ref,val,NULL,(uint32_t)(is->adr+max_search_bytes))) {
1662         uint32_t next_adr = (uint32_t)is->adr;
1663         // search for next bl / blx
1664         // could search include b for tail calls, but hard to distinguish
1665         if(insn_match_find_next(fw,is,max_gap_insns,match_bl_blximm)) {
1666             uint32_t reg_num = match_reg - ARM_REG_R0;
1667             uint32_t reg_bit = 1 << reg_num;
1668             uint32_t regs[4];
1669             // backtrack to find out if const ref ends up in desired reg
1670             if((get_call_const_args(fw,is,max_gap_insns,regs)&reg_bit)==reg_bit) {
1671                 if(regs[reg_num] == val) {
1672                     return iter_state_adr(is);
1673                 }
1674             }
1675         }
1676         // not matched, restore is and advance one instruction
1677         disasm_iter_init(fw,is,next_adr | is->thumb);
1678     }
1679     return 0;
1680 }
1681 
1682 /*
1683 check for, and optionally return information about
1684 functions with return values that can be completely determined
1685 from disassembly
1686 uses fw->is
1687 */
1688 // constants below may  as flags on input, and as return valaue
1689 // no simple function found
1690 #define MATCH_SIMPLE_FUNC_NONE    0x0
1691 // immediately returns, with no value
1692 #define MATCH_SIMPLE_FUNC_NULLSUB 0x1
1693 // immediately returns with a MOV constant
1694 #define MATCH_SIMPLE_FUNC_IMM     0x2
1695 // TODO LDR pc, =const,  ADR
1696 // TODO could also do pointer derefs and return pointer info without val
1697 #define MATCH_SIMPLE_FUNC_ANY     0x3
1698 int check_simple_func(firmware *fw, uint32_t adr, int match_ftype, simple_func_desc_t *info)
1699 {
1700     const insn_match_t match_mov_r0_imm[]={
1701         {MATCH_INS(MOV,   2),  {MATCH_OP_REG(R0),  MATCH_OP_IMM_ANY}},
1702 #if CS_API_MAJOR < 4
1703         {MATCH_INS(MOVS,  2),  {MATCH_OP_REG(R0),  MATCH_OP_IMM_ANY}},
1704 #endif
1705         {ARM_INS_ENDING}
1706     };
1707 
1708     int found = 0;
1709     int found_val = 0;
1710     if(info) {
1711         info->ftype = MATCH_SIMPLE_FUNC_NONE;
1712         info->retval = 0;
1713     }
1714     if(!fw_disasm_iter_single(fw,adr)) {
1715         //fprintf(stderr,"check_simple_func: disasm_iter_single failed 0x%x\n",adr);
1716         return 0;
1717     }
1718     if(match_ftype & MATCH_SIMPLE_FUNC_IMM) {
1719         // check mov r0, #imm
1720         if(insn_match_any(fw->is->insn,match_mov_r0_imm)) {
1721             found_val = fw->is->insn->detail->arm.operands[1].imm;
1722             found = MATCH_SIMPLE_FUNC_IMM;
1723             // fprintf(stderr,"check_simple_func: found IMM\n");
1724             if(!fw_disasm_iter(fw)) {
1725                 //fprintf(stderr,"check_simple_func: disasm_iter failed 0x%x\n",adr);
1726                 return 0;
1727             }
1728         }
1729     }
1730     if(!isRETx(fw->is->insn)) {
1731         // fprintf(stderr,"check_simple_func: no ret\n");
1732         return 0;
1733     }
1734     // no previous found, check if ret alone
1735     if(!found && (match_ftype & MATCH_SIMPLE_FUNC_NULLSUB)) {
1736         found = MATCH_SIMPLE_FUNC_NULLSUB;
1737         // fprintf(stderr,"check_simple_func: found nullsub\n");
1738     }
1739     if(found) {
1740         if(info) {
1741             info->ftype = found;
1742             info->retval = found_val;
1743         }
1744     }
1745     return found;
1746 }
1747 
1748 /*
1749 advance iter_state is trying to find the last function called by a function
1750 function assumed to PUSH LR, POP LR or PC (many small functions don't!)
1751 either the last BL/BLXimm before pop {... PC}
1752 or B after POP {... LR}
1753 MOV or LDR to R0-R3 are allowed between POP LR and the final B
1754 If a POP occurs before min_insns, the match fails
1755 Calls before min_insns are ignored
1756 */
1757 uint32_t find_last_call_from_func(firmware *fw, iter_state_t *is,int min_insns, int max_insns)
1758 {
1759     int push_found=0;
1760     uint32_t last_adr=0;
1761     int count;
1762     for(count=0; count < max_insns; count++) {
1763         if(!disasm_iter(fw,is)) {
1764             fprintf(stderr,"find_last_call_from_func: disasm failed 0x%"PRIx64"\n",is->adr);
1765             return 0;
1766         }
1767         // TODO could match push regs with pop
1768         if(isPUSH_LR(is->insn)) {
1769             // already found a PUSH LR, probably in new function
1770             if(push_found) {
1771                 //printf("find_last_call_from_func: second push pc 0x%"PRIx64"\n",is->adr);
1772                 return 0;
1773             }
1774             push_found=1;
1775             continue;
1776         }
1777         // ignore everything before push (could be some mov/ldr, shouldn't be any calls)
1778         // TODO may want to allow starting in the middle of a function
1779         if(!push_found) {
1780             continue;
1781         }
1782         // found a potential call, store
1783         if(insn_match_any(is->insn,match_bl_blximm) && count >= min_insns) {
1784             //printf("find_last_call_from_func: found call 0x%"PRIx64"\n",is->adr);
1785             last_adr=get_branch_call_insn_target(fw,is);
1786             continue;
1787         }
1788         // found pop PC, can only be stored call if present
1789         if(isPOP_PC(is->insn)) {
1790             // printf("find_last_call_from_func: found pop PC 0x%"PRIx64"\n",is->adr);
1791             if(last_adr) {
1792                 return last_adr;
1793             }
1794             // no call found, or not found within min
1795             return 0;
1796         }
1797         // found pop LR, check if next is allowed tail sequence followed by unconditional B
1798         if(isPOP_LR(is->insn)) {
1799             // hit func end with less than min, no match
1800             if(count < min_insns) {
1801                 // printf("find_last_call_from_func: pop before min 0x%"PRIx64"\n",is->adr);
1802                 return 0;
1803             }
1804             if(!disasm_iter(fw,is)) {
1805                 fprintf(stderr,"find_last_call_from_func: disasm failed 0x%"PRIx64"\n",is->adr);
1806                 return 0;
1807             }
1808             // allow instructions likely to appear between pop and tail call
1809             // MOV or LDR to r0-r3
1810             // others are possible e.g arithmetic or LDR r4,=const; LDR r0,[r4, #offset]
1811             const insn_match_t match_tail[]={
1812                 {MATCH_INS(MOV, MATCH_OPCOUNT_ANY), {MATCH_OP_REG_RANGE(R0,R3), MATCH_OP_REST_ANY}},
1813 // MOVS unlikely to be valid, though possible if followed by additional conditional instructions
1814 // in any case, want to match capstone 4 behavior
1815 #if CS_API_MAJOR < 4
1816                 {MATCH_INS(MOV, MATCH_OPCOUNT_ANY), {MATCH_OP_REG_RANGE(R0,R3), MATCH_OP_REST_ANY}},
1817 #endif
1818 
1819                 {MATCH_INS(LDR, 2), {MATCH_OP_REG_RANGE(R0,R3), MATCH_OP_ANY}},
1820                 {ARM_INS_ENDING}
1821             };
1822             while(insn_match_any(is->insn,match_tail) && count < max_insns) {
1823                 if(!disasm_iter(fw,is)) {
1824                     fprintf(stderr,"find_last_call_from_func: disasm failed 0x%"PRIx64"\n",is->adr);
1825                     return 0;
1826                 }
1827                 count++;
1828             }
1829             if(is->insn->id == ARM_INS_B && is->insn->detail->arm.cc == ARM_CC_AL) {
1830                 return get_branch_call_insn_target(fw,is);
1831             }
1832             // don't go more than one insn after pop (could be more, but uncommon)
1833             // printf("find_last_call_from_func: more than one insn after pop 0x%"PRIx64"\n",is->adr);
1834             return 0;
1835         }
1836         // found another kind of ret, give up
1837         if(isRETx(is->insn)) {
1838             // printf("find_last_call_from_func: other ret 0x%"PRIx64"\n",is->adr);
1839             return 0;
1840         }
1841     }
1842     // printf("find_last_call_from_func: no match in range 0x%"PRIx64"\n",is->adr);
1843     return 0;
1844 }
1845 
1846 // ****** utilities for matching instructions and instruction sequences ******
1847 
1848 // some common matches for insn_match_find_next
1849 const insn_match_t match_b[]={
1850     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1851     {ARM_INS_ENDING}
1852 };
1853 const insn_match_t match_bl[]={
1854     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1855     {ARM_INS_ENDING}
1856 };
1857 const insn_match_t match_b_bl[]={
1858     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1859     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1860     {ARM_INS_ENDING}
1861 };
1862 
1863 const insn_match_t match_b_bl_blximm[]={
1864     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1865     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1866     {MATCH_INS(BLX, 1), {MATCH_OP_IMM_ANY}},
1867     {ARM_INS_ENDING}
1868 };
1869 
1870 const insn_match_t match_bl_blximm[]={
1871     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1872     {MATCH_INS(BLX, 1), {MATCH_OP_IMM_ANY}},
1873     {ARM_INS_ENDING}
1874 };
1875 
1876 const insn_match_t match_bxlr[]={
1877     {MATCH_INS(BX, 1), {MATCH_OP_REG(LR)}},
1878     {ARM_INS_ENDING}
1879 };
1880 
1881 const insn_match_t match_ldr_pc[]={
1882     {MATCH_INS(LDR, 2), {MATCH_OP_REG_ANY,  MATCH_OP_MEM_BASE(PC)}},
1883     {ARM_INS_ENDING}
1884 };
1885 
1886 // iterate as long as sequence of instructions matches sequence defined in match
1887 int insn_match_seq(firmware *fw, iter_state_t *is, const insn_match_t *match)
1888 {
1889     //printf("%"PRIx64" insn_match_seq %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1890     while(match->id != ARM_INS_ENDING && disasm_iter(fw,is) && insn_match(is->insn,match)) {
1891         //printf("%"PRIx64" insn_match_seq next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1892         match++;
1893     }
1894     return (match->id == ARM_INS_ENDING);
1895 }
1896 
1897 // capstone enum isn't in numeric order, (SP through PC in capstone 4, but probably shouldn't assume)
1898 static const arm_reg reg_order[] = {
1899     ARM_REG_R0,
1900     ARM_REG_R1,
1901     ARM_REG_R2,
1902     ARM_REG_R3,
1903     ARM_REG_R4,
1904     ARM_REG_R5,
1905     ARM_REG_R6,
1906     ARM_REG_R7,
1907     ARM_REG_R8,
1908     ARM_REG_R9,
1909     ARM_REG_R10,
1910     ARM_REG_R11,
1911     ARM_REG_R12,
1912     ARM_REG_SP,
1913     ARM_REG_LR,
1914     ARM_REG_PC,
1915 };
1916 
1917 int reg_in_range(arm_reg r, arm_reg min_reg, arm_reg max_reg)
1918 {
1919     int c = -1, c_min = -1, c_max = -1;
1920     int i;
1921     for(i=0; i<(int)(sizeof(reg_order)/sizeof(arm_reg)); i++) {
1922         if(reg_order[i] == r) {
1923             c = i;
1924         }
1925         if(reg_order[i] == min_reg) {
1926             c_min = i;
1927         }
1928         if(reg_order[i] == max_reg) {
1929             c_max = i;
1930         }
1931     }
1932     // any invalid / unlisted regs, false
1933     if( c < 0 || c_min < 0 || c_max < 0) {
1934         return 0;
1935     }
1936     return (c >= c_min && c <= c_max);
1937 }
1938 
1939 // check if single insn matches values defined by match
1940 int insn_match(cs_insn *insn,const insn_match_t *match)
1941 {
1942     // specific instruction ID requested, check
1943     if(match->id != ARM_INS_INVALID && insn->id != match->id) {
1944         return 0;
1945     }
1946     // condition code requested, check
1947     if(match->cc != ARM_CC_INVALID && insn->detail->arm.cc != match->cc) {
1948         return 0;
1949     }
1950     // no op checks, done
1951     if(match->op_count == MATCH_OPCOUNT_IGNORE) {
1952         return 1;
1953     }
1954     // operand count requested, check
1955     if(match->op_count >= 0 && insn->detail->arm.op_count != match->op_count) {
1956         return 0;
1957     }
1958     int i;
1959     // operands
1960     for(i=0; i<MATCH_MAX_OPS && i < insn->detail->arm.op_count; i++) {
1961         // specific type requested?
1962         if(match->operands[i].type != ARM_OP_INVALID && insn->detail->arm.operands[i].type != match->operands[i].type) {
1963             return 0;
1964         }
1965         // specific registers requested?
1966         if(match->operands[i].reg1 != ARM_REG_INVALID) {
1967             if(insn->detail->arm.operands[i].type == ARM_OP_REG) {
1968                 // range requested
1969                 if(match->operands[i].reg2 != ARM_REG_INVALID) {
1970                     if(!reg_in_range((arm_reg)insn->detail->arm.operands[i].reg,
1971                                         match->operands[i].reg1, match->operands[i].reg2)) {
1972                         return 0;
1973                     }
1974                 } else if((arm_reg)insn->detail->arm.operands[i].reg != match->operands[i].reg1) {
1975                     return 0;
1976                 }
1977             } else if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
1978                 if(insn->detail->arm.operands[i].mem.base != match->operands[i].reg1) {
1979                     return 0;
1980                 }
1981             } else {
1982                 fprintf(stderr,"insn_match: reg1 match requested on operand not reg or mem %d\n",
1983                         insn->detail->arm.operands[i].type);
1984             }
1985         }
1986         if(match->operands[i].reg2 != ARM_REG_INVALID) {
1987             if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
1988                 if(insn->detail->arm.operands[i].mem.index != match->operands[i].reg2) {
1989                     return 0;
1990                 }
1991             } else if(insn->detail->arm.operands[i].type != ARM_OP_REG) { // reg handled above
1992                 fprintf(stderr,"insn_match: reg2 match requested on operand not reg or mem %d\n",
1993                         insn->detail->arm.operands[i].type);
1994             }
1995         }
1996         if(match->operands[i].flags & MATCH_OP_FL_IMM) {
1997             if(insn->detail->arm.operands[i].type == ARM_OP_IMM
1998                     || insn->detail->arm.operands[i].type == ARM_OP_PIMM
1999                     || insn->detail->arm.operands[i].type == ARM_OP_CIMM) {
2000                 if(insn->detail->arm.operands[i].imm != match->operands[i].imm) {
2001                     return  0;
2002                 }
2003             } else if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
2004                 if(insn->detail->arm.operands[i].mem.disp != match->operands[i].imm) {
2005                     return  0;
2006                 }
2007             } else {
2008                 fprintf(stderr,"insn_match: imm match requested on operand not imm or mem %d\n",
2009                         insn->detail->arm.operands[i].type);
2010             }
2011         }
2012         if(match->operands[i].flags & MATCH_OP_FL_LAST) {
2013             break;
2014         }
2015     }
2016     return 1;
2017 }
2018 
2019 // check if single insn matches any of the provided matches
2020 int insn_match_any(cs_insn *insn,const insn_match_t *match)
2021 {
2022     const insn_match_t *m;
2023     // check matches
2024     for(m=match;m->id != ARM_INS_ENDING;m++) {
2025         if(insn_match(insn,m)) {
2026             return 1;
2027         }
2028     }
2029     return 0;
2030 }
2031 
2032 // iterate is until current instruction matches any of the provided matches or until limit reached
2033 int insn_match_find_next(firmware *fw, iter_state_t *is, int max_insns, const insn_match_t *match)
2034 {
2035     int i=0;
2036     while(i < max_insns) {
2037         // disassembly failed, no match (could ignore..)
2038         if(!disasm_iter(fw,is)) {
2039             return 0;
2040         }
2041         // printf("%"PRIx64" insn_match_find_next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
2042         if(insn_match_any(is->insn,match)) {
2043             return 1;
2044         }
2045         i++;
2046     }
2047     // limit hit
2048     return 0;
2049 }
2050 
2051 // iterate is until current has matched any of the provided matches N times or until max_insns reached
2052 int insn_match_find_nth(firmware *fw, iter_state_t *is, int max_insns, int num_to_match, const insn_match_t *match)
2053 {
2054     int i=0;
2055     int num_matched=0;
2056     while(i < max_insns) {
2057         // disassembly failed, no match (could ignore..)
2058         if(!disasm_iter(fw,is)) {
2059             return 0;
2060         }
2061         // printf("%"PRIx64" insn_match_find_next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
2062 
2063         const insn_match_t *m;
2064         // check matches
2065         for(m=match;m->id != ARM_INS_ENDING;m++) {
2066             if(insn_match(is->insn,m)) {
2067                 num_matched++;
2068             }
2069         }
2070         if(num_matched == num_to_match) {
2071             return 1;
2072         }
2073         i++;
2074     }
2075     // limit hit
2076     return 0;
2077 }
2078 
2079 // find next matching sequence starting within max_insns
2080 int insn_match_find_next_seq(firmware *fw, iter_state_t *is, int max_insns, const insn_match_t *match)
2081 {
2082     int count=0;
2083     while(count < max_insns) {
2084         const insn_match_t *m=match;
2085         //printf("%"PRIx64" insn_match_find_next_seq %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
2086         while(m->id != ARM_INS_ENDING && disasm_iter(fw,is) && insn_match(is->insn,m)) {
2087             m++;
2088             count++;
2089         }
2090         if(m->id == ARM_INS_ENDING) {
2091             return 1;
2092         }
2093         // non-matching
2094         count++;
2095     }
2096     return 0;
2097 }
2098 
2099 
2100 // Search the firmware for something. The desired matching is performed using the supplied 'func' function.
2101 // Continues searching until 'func' returns non-zero - then returns 1
2102 // otherwise returns 0.
2103 // Uses the BufRange structs to speed up searching
2104 // Note: this version searches byte by byte in the firmware dump instead of by words
2105 int fw_search_bytes(firmware *fw, search_bytes_fn func)
2106 {
2107     BufRange *p = fw->br;
2108     while (p)
2109     {
2110         int k;
2111         for (k = p->off*4; k < (p->off + p->len)*4; k++)
2112         {
2113             if (func(fw,k))
2114                 return 1;
2115         }
2116         p = p->next;
2117     }
2118     return 0;
2119 }
2120 
2121 
2122 // ****** firmware loading / initialization / de-allocation ******
2123 // add given address range
2124 void fw_add_adr_range(firmware *fw, uint32_t start, uint32_t end, uint32_t src_start, int type, int flags)
2125 {
2126     if(fw->adr_range_count == FW_MAX_ADR_RANGES) {
2127         fprintf(stderr,"fw_add_adr_range: FW_MAX_ADR_RANGES hit\n");
2128         return;
2129     }
2130     if(src_start < fw->base) {
2131         fprintf(stderr,"fw_add_adr_range: src_start 0x%08x < base 0x%08x\n",src_start,fw->base);
2132         return;
2133     }
2134     if(src_start >= fw->base+fw->size8) {
2135         fprintf(stderr,"fw_add_adr_range: src_start 0x%08x outside dump end 0x%08x\n",src_start,fw->base+fw->size8);
2136         return;
2137     }
2138     if(end <= start) {
2139         fprintf(stderr,"fw_add_adr_range: end 0x%08x <= start 0x%08x\n",end,start);
2140         return;
2141     }
2142     uint32_t len=end-start;
2143     if(len > 0xFFFFFFFF - src_start) {
2144         fprintf(stderr,"fw_add_adr_range: range too long %d\n",len);
2145         return;
2146     }
2147     if(len > fw->size8 - (start - fw->base)) {
2148         fprintf(stderr,"fw_add_adr_range: range outside of dump %d\n",len);
2149         return;
2150     }
2151     adr_range_t *r=&fw->adr_ranges[fw->adr_range_count];
2152     // TODO some firmware copies (i.e. g5x code 2) may end on non-word aligned address even though copy is words
2153     r->start=start;
2154     r->src_start=src_start;
2155     r->bytes=len;
2156     r->type=type;
2157     r->flags=flags;
2158     r->buf=fw->buf8 + (r->src_start - fw->base);
2159 
2160     fw->adr_range_count++;
2161 }
2162 
2163 void find_dryos_vers(firmware *fw)
2164 {
2165     const char *sig="DRYOS version 2.3, release #";
2166     fw->dryos_ver_count = find_bytes_all(fw,sig,strlen(sig),fw->base,fw->dryos_ver_list,FW_MAX_DRYOS_VERS);
2167     /*
2168     int i;
2169     for(i=0;i<fw->dryos_ver_count;i++) {
2170         fprintf(stderr,"found %s (%d) @0x%08x\n",
2171             (char *)adr2ptr(fw,fw->dryos_ver_list[i]),
2172             atoi((char *)adr2ptr(fw,fw->dryos_ver_list[i]+strlen(sig))),
2173             fw->dryos_ver_list[i]);
2174     }
2175     */
2176     if(fw->dryos_ver_count) {
2177         if(fw->dryos_ver_count == FW_MAX_DRYOS_VERS) {
2178             fprintf(stderr,"WARNING hit FW_MAX_DRYOS_VERS\n");
2179         }
2180         uint32_t i;
2181         int match_i;
2182         uint32_t min_adr = 0xFFFFFFFF;
2183 
2184         // ref should easily be in the first 8M (most near start but g7x2 at >0x500000)
2185         uint32_t maxadr = (fw->rom_code_search_max_adr - 0x800000 > fw->base)?fw->base + 0x800000:fw->rom_code_search_max_adr;
2186         // look for pointer to dryos version nearest to main ROM start, before the string itself
2187         // NOTE it's the *pointer* that must be nearest, the string may not be the first
2188         for(i=0; i<fw->dryos_ver_count; i++) {
2189             // TODO could limit range more, ctypes should be ref'd a lot
2190             // could sanity check not a random value that happens to match
2191             uint32_t adr = find_u32_adr_range(fw,fw->dryos_ver_list[i],fw->rom_code_search_min_adr,maxadr);
2192             if(adr && adr < min_adr) {
2193                 min_adr = adr;
2194                 match_i = i;
2195             }
2196         }
2197         if(min_adr == 0xFFFFFFFF) {
2198             fprintf(stderr,"WARNING dryos version pointer not found, defaulting to first\n");
2199             match_i = 0;
2200             min_adr = 0;
2201         }
2202         fw->dryos_ver_str = (const char *)adr2ptr(fw,fw->dryos_ver_list[match_i]);
2203         const char *s = (const char *)adr2ptr(fw,fw->dryos_ver_list[match_i]+strlen(sig));
2204         fw->dryos_ver = atoi(s);
2205         if(s[4] == '+' && s[5] == 'p') {
2206             fw->dryos_ver_patch = atoi(s+6);
2207             if(fw->dryos_ver_patch >= FW_DRYOS_VER_MUL) {
2208                 fprintf(stderr,"WARNING unexpected patch revision %d\n",fw->dryos_ver_patch);
2209             }
2210         } else {
2211             fw->dryos_ver_patch = 0;
2212         }
2213         fw->dryos_ver_full = fw->dryos_ver * FW_DRYOS_VER_MUL + fw->dryos_ver_patch;
2214         fw->dryos_ver_adr = fw->dryos_ver_list[match_i];
2215         fw->dryos_ver_ref_adr = min_adr;
2216         // fprintf(stderr,"main firmware version %s @ 0x%08x ptr 0x%08x\n",fw->dryos_ver_str,fw->dryos_ver_adr,min_adr);
2217     } else {
2218         fw->dryos_ver = 0;
2219         fw->dryos_ver_patch = 0;
2220         fw->dryos_ver_full = 0;
2221         fw->dryos_ver_str = NULL;
2222         fw->dryos_ver_adr = 0;
2223     }
2224 }
2225 
2226 // load firmware and initialize stuff that doesn't require disassembly
2227 void firmware_load(firmware *fw, const char *filename, uint32_t base_adr,int fw_arch)
2228 {
2229     FILE *f = fopen(filename, "rb");
2230     if (f == NULL)
2231     {
2232         fprintf(stderr,"Error opening %s\n",filename);
2233         exit(1);
2234     }
2235     fseek(f,0,SEEK_END);
2236     fw->size8 = ftell(f);
2237     fseek(f,0,SEEK_SET);
2238     // dumps should be an integral number of 32 bit words
2239     // ensures accessing as 32 bit ints safe
2240     if(fw->size8&3) {
2241         fprintf(stderr,"WARNING: dump size %d is not divisible by 4, truncating\n",fw->size8);
2242         fw->size8 &= ~3;
2243     }
2244 
2245     // adjust to ensure base_adr + size doesn't overflow
2246     if((int)(0xFFFFFFFF - base_adr) < fw->size8) {
2247         fprintf(stderr,"adjusted dump size 0x%08x->",fw->size8);
2248         fw->size8 = 0xFFFFFFFC - base_adr;
2249         fprintf(stderr,"0x%08x\n",fw->size8);
2250     }
2251 
2252     fw->arch=fw_arch;
2253     fw->size32=fw->size8/4;
2254 
2255     fw->base = base_adr;
2256 
2257     fw->buf8 = malloc(fw->size8);
2258     if(!fw->buf8) {
2259         fprintf(stderr,"malloc %d failed\n",fw->size8);
2260         exit(1);
2261     }
2262     fread(fw->buf8, 1, fw->size8, f);
2263     fclose(f);
2264     findRanges(fw);
2265 
2266     fw->adr_range_count=0;
2267     // add ROM
2268     fw_add_adr_range(fw,fw->base, fw->base+fw->size8, fw->base, ADR_RANGE_ROM, ADR_RANGE_FL_NONE);
2269 
2270     fw->main_offs = 0;
2271     int k = find_str(fw, "gaonisoy");
2272     // assume firmware start is 32 bit jump over goanisoy
2273     if(k == -1) {
2274         // suppress warning on vxworks, main firmware start is always offset 0
2275         if(find_str(fw,"VxWorks") == -1) {
2276             fprintf(stderr,"WARNING gaonisoy string not found, assuming code start offset 0\n");
2277         }
2278     } else if (k != 1) {
2279         // check at 0x20004 - note doesn't just use offset of first gaonisoy, because could be ref'd in romstarter
2280         if(fw_memcmp(fw,fw->base+0x20004,"gaonisoy",8) == 0) {
2281             fw->main_offs = 0x20000;
2282         } else if (fw_memcmp(fw,fw->base+0x10004,"gaonisoy",8) == 0) { // newer armv5 firmwares base ff81000 start at ff820000
2283             fw->main_offs = 0x10000;
2284         } else {
2285             fprintf(stderr,"WARNING code start offset not found, assuming 0\n");
2286         }
2287     }
2288 
2289     fw->rom_code_search_min_adr = fw->base + fw->main_offs; // 0 if not found
2290     fw->rom_code_search_max_adr=fw->base+fw->size8 - 4; // default == end of fw, may be adjusted by firmware_init_data_ranges
2291 
2292     find_dryos_vers(fw);
2293 
2294     fw->firmware_ver_str = 0;
2295     k = find_str(fw, "Firmware Ver ");
2296     if (k != -1)
2297     {
2298         fw->firmware_ver_str = (char *)fw->buf8 + k*4;
2299     }
2300     // set expected instruction set
2301     if(fw->arch==FW_ARCH_ARMv5) {
2302         fw->thumb_default = 0;
2303     } else if(fw->arch==FW_ARCH_ARMv7) {
2304         fw->thumb_default = 1;
2305     } else {
2306         fprintf(stderr,"firmware_init_capstone: invalid arch\n");
2307     }
2308 }
2309 
2310 // test to verify thumb blx bug is patched in linked capstone
2311 int do_blx_check(firmware *fw)
2312 {
2313 /*
2314 test code blxbork.S
2315 .syntax unified
2316 .globl arm_code
2317 .globl _start
2318 _start:
2319 .code 16
2320 blx arm_code
2321 movs r0, #1
2322 blx arm_code
2323 .align 4
2324 .code 32
2325 arm_code:
2326 bx lr
2327 
2328 arm-none-eabi-gcc -nostdlib blxbork.S -o blxbork.elf
2329 */
2330 
2331 static const uint8_t code[]=
2332     "\x00\xf0\x06\xe8" // blx arm_code (start + 0x10)
2333     "\x01\x20" // movs r0,#1, to cause non-word align
2334     "\x00\xf0\x04\xe8" // blx arm_code
2335 ;
2336     cs_insn *insn;
2337     size_t count;
2338     count = cs_disasm(fw->cs_handle_thumb, code, sizeof(code), 0xFF000000, 3, &insn);
2339 
2340     if(!(count == 3 && insn[0].id == ARM_INS_BLX && insn[2].id == ARM_INS_BLX)) {
2341         fprintf(stderr,"do_blx_check: disassembly failed\n");
2342         return 0;
2343     }
2344 
2345     int r=(insn[0].detail->arm.operands[0].imm == insn[2].detail->arm.operands[0].imm);
2346 
2347 
2348     if(!r) {
2349         fprintf(stderr,"WARNING! Incorrect disassembly is likely\n");
2350     }
2351     cs_free(insn,count);
2352     return r;
2353 }
2354 
2355 // initialize capstone state for loaded fw
2356 int firmware_init_capstone(firmware *fw)
2357 {
2358     if (cs_open(CS_ARCH_ARM, CS_MODE_ARM, &fw->cs_handle_arm) != CS_ERR_OK) {
2359         fprintf(stderr,"cs_open ARM failed\n");
2360         return 0;
2361     }
2362     cs_option(fw->cs_handle_arm, CS_OPT_DETAIL, CS_OPT_ON);
2363     if (cs_open(CS_ARCH_ARM, CS_MODE_THUMB, &fw->cs_handle_thumb) != CS_ERR_OK) {
2364         fprintf(stderr,"cs_open thumb failed\n");
2365         return 0;
2366     }
2367     cs_option(fw->cs_handle_thumb, CS_OPT_DETAIL, CS_OPT_ON);
2368     fw->is=disasm_iter_new(fw,0);
2369     do_blx_check(fw);
2370     return 1;
2371 }
2372 
2373 /*
2374 look for
2375 ldr rx, =ROM ADR
2376 ldr ry, =non-rom adr
2377 ldr rz, =non ROM adr > ry
2378 leave is pointing at last LDR, or last checked instruction
2379 */
2380 
2381 int find_startup_copy(firmware *fw,
2382                          iter_state_t *is,
2383                          int max_search,
2384                          uint32_t *src_start,
2385                          uint32_t *dst_start,
2386                          uint32_t *dst_end)
2387 {
2388     int count=0;
2389     uint32_t *fptr = NULL;
2390     uint32_t *dptr = NULL;
2391     uint32_t *eptr = NULL;
2392     *src_start=0;
2393     *dst_start=0;
2394     *dst_end=0;
2395 
2396     while(disasm_iter(fw,is) && count < max_search) {
2397         uint32_t *pv=LDR_PC2valptr(fw,is->insn);
2398         // not an LDR pc, reset
2399         // TODO some firmwares might use other instructions
2400         if(!pv) {
2401             fptr=dptr=eptr=NULL;
2402         }else if(!fptr) {
2403             // only candidate if in ROM
2404             if(*pv > fw->base) {
2405                 fptr=pv;
2406             }
2407         } else if(!dptr) {
2408             if(*pv < fw->base) {
2409                 dptr=pv;
2410             } else {
2411                 fptr=NULL; // dest address in ROM, reset
2412             }
2413         } else if(!eptr) {
2414             if(*pv < fw->base && *pv > *dptr) {
2415                 eptr=pv;
2416             } else { // dest end address in ROM, or before source, reset
2417                     // TODO maybe should swap instead if < source
2418                 fptr=dptr=NULL;
2419             }
2420         }
2421         if(fptr && dptr && eptr) {
2422             *src_start=*fptr;
2423             *dst_start=*dptr;
2424             *dst_end=*eptr;
2425             return 1;
2426         }
2427         count++;
2428     }
2429     return 0;
2430 }
2431 
2432 void find_exception_vec(firmware *fw, iter_state_t *is)
2433 {
2434     // check for exception vector, d7 id
2435     // only on thumb2 for now
2436     if(fw->arch != FW_ARCH_ARMv7) {
2437         return;
2438     }
2439 
2440     const insn_match_t match_bl_mcr[]={
2441         {MATCH_INS(BL,  1), {MATCH_OP_IMM_ANY}},
2442         // Vector Base Address Register MCR p15, 0, <Rt>, c12, c0, 0 - not present on PMSA
2443         {MATCH_INS(MCR, 6), {MATCH_OP_PIMM(15),MATCH_OP_IMM(0),MATCH_OP_REG_ANY,MATCH_OP_CIMM(12),MATCH_OP_CIMM(0),MATCH_OP_IMM(0)}},
2444         {ARM_INS_ENDING}
2445     };
2446 
2447     // reset to main fw start
2448     disasm_iter_init(fw, is, fw->base + fw->main_offs + 12 + fw->thumb_default);
2449     if(!insn_match_find_next(fw,is,4,match_bl_mcr)) {
2450         // printf("no match!\n");
2451         return;
2452     }
2453     // check which instruction we matched
2454     uint32_t faddr = get_branch_call_insn_target(fw,is);
2455     if(faddr) {
2456         // bl = digic6, has function to set up exception vector
2457         disasm_iter_init(fw, is, faddr);
2458         disasm_iter(fw, is);
2459         int ra,rb;
2460         uint32_t va, vb;
2461         if(!IS_INSN_ID_MOVx(is->insn->id) || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2462             return;
2463         }
2464         ra = is->insn->detail->arm.operands[0].reg;
2465         va = is->insn->detail->arm.operands[1].imm;
2466         disasm_iter(fw, is);
2467         if(is->insn->id != ARM_INS_MOVT
2468             || is->insn->detail->arm.operands[0].reg != ra
2469             || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2470             return;
2471         }
2472         va = (is->insn->detail->arm.operands[1].imm << 16) | (va & 0xFFFF);
2473         // fw has BIC
2474         va = va & ~1;
2475         if(adr_get_range_type(fw,va) != ADR_RANGE_ROM) {
2476             return;
2477         }
2478         disasm_iter(fw, is);
2479         if(!IS_INSN_ID_MOVx(is->insn->id) || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2480             return;
2481         }
2482         rb = is->insn->detail->arm.operands[0].reg;
2483         vb = is->insn->detail->arm.operands[1].imm;
2484         disasm_iter(fw, is);
2485         if(is->insn->id != ARM_INS_MOVT
2486             || is->insn->detail->arm.operands[0].reg != rb
2487             || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2488             return;
2489         }
2490         vb = (is->insn->detail->arm.operands[1].imm << 16) | (vb & 0xFFFF);
2491         vb = vb & ~1;
2492         if(adr_get_range_type(fw,vb) != ADR_RANGE_ROM) {
2493             return;
2494         }
2495         if(va >= vb) {
2496             return;
2497         }
2498         fw_add_adr_range(fw,0,vb - va, va, ADR_RANGE_RAM_CODE, ADR_RANGE_FL_EVEC | ADR_RANGE_FL_TCM);
2499         // printf("ex vec 0x%08x-0x%08x\n",va,vb);
2500 
2501     } else if(is->insn->id == ARM_INS_MCR) {
2502         // digic 7 = mcr ...
2503         fw->arch_flags |= FW_ARCH_FL_VMSA;
2504         // rewind 1
2505         disasm_iter_init(fw, is, adr_hist_get(&is->ah,1));
2506         disasm_iter(fw, is);
2507         // uint32_t ex_vec = LDR_PC2val(fw,is->insn);
2508         //printf("found MCR @ 0x%"PRIx64" ex vec at 0x%08x\n",is->insn->address,ex_vec);
2509     }
2510 }
2511 
2512 // init basic copied RAM code / data ranges
2513 void firmware_init_data_ranges(firmware *fw)
2514 {
2515 //TODO maybe should return status
2516     uint32_t src_start, dst_start, dst_end;
2517     uint32_t data_found_copy = 0;
2518 
2519     // start at fw start  + 12 (32 bit jump, gaonisoy)
2520     iter_state_t *is=disasm_iter_new(fw, fw->base + fw->main_offs + 12 + fw->thumb_default);
2521 
2522     fw->data_init_start=0;
2523     fw->data_start=0;
2524     fw->data_len=0;
2525 
2526     fw->memisostart=0;
2527 
2528     int base2_found=0;
2529     int base3_found=0;
2530 
2531     // TODO  pre-d6 ROMs have a lot more stuff before first copy
2532     int max_search=100;
2533     while(find_startup_copy(fw,is,max_search,&src_start,&dst_start,&dst_end)) {
2534         // all known copied code is 3f1000 or higher, guess data
2535         if(dst_start < 0x100000) {
2536             // fprintf(stderr, "data?  @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2537             if(fw->data_init_start) {
2538                 fprintf(stderr,"firmware_init_data_ranges: data already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2539                         dst_start,src_start,dst_end);
2540                 continue;
2541             }
2542 
2543             // not a known value, warn
2544             if(dst_start != 0x1900 && dst_start != 0x8000) {
2545                 fprintf(stderr,"firmware_init_data_ranges: guess unknown ROM data_start 0x%08x src 0x%08x end 0x%08x\n",
2546                         dst_start,src_start,dst_end);
2547             }
2548             fw->data_init_start=src_start;
2549             fw->data_start=dst_start;
2550             fw->data_len=dst_end-dst_start;
2551             fw_add_adr_range(fw,dst_start,dst_end,src_start, ADR_RANGE_INIT_DATA, ADR_RANGE_FL_NONE);
2552             data_found_copy=is->adr;
2553         } else if(dst_start < 0x08000000) { /// highest known first copied ram code 0x01900000
2554             // fprintf(stderr,"code1? @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2555             if(base2_found) {
2556                 fprintf(stderr,"firmware_init_data_ranges: base2 already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2557                         dst_start,src_start,dst_end);
2558                 continue;
2559             }
2560             base2_found=1;
2561             // known values
2562             if( dst_start != 0x003f1000 &&
2563                 dst_start != 0x00431000 &&
2564                 dst_start != 0x00471000 &&
2565                 dst_start != 0x00685000 &&
2566                 dst_start != 0x00671000 &&
2567                 dst_start != 0x006b1000 &&
2568                 dst_start != 0x010c1000 &&
2569                 dst_start != 0x010e1000 &&
2570                 dst_start != 0x01900000) {
2571                 fprintf(stderr,"firmware_init_data_ranges: guess unknown base2 0x%08x src 0x%08x end 0x%08x\n",
2572                         dst_start,src_start,dst_end);
2573             }
2574             fw_add_adr_range(fw,dst_start,dst_end,src_start,ADR_RANGE_RAM_CODE, ADR_RANGE_FL_NONE);
2575         } else { // know < ROM based on match, assume second copied code
2576             // fprintf(stderr, "code2? @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2577             if(base3_found) {
2578                 fprintf(stderr,"firmware_init_data_ranges: base3 already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2579                         dst_start,src_start,dst_end);
2580                 continue;
2581             }
2582             base3_found=1;
2583             if(dst_start != 0xbfe10800 && // known digic 6 value (g5x)
2584                dst_start != 0xdffc4900) { // known digic 7 value (m5)
2585                 fprintf(stderr,"firmware_init_data_ranges: guess unknown base3 0x%08x src 0x%08x end 0x%08x\n",
2586                         dst_start,src_start,dst_end);
2587             }
2588             fw_add_adr_range(fw,dst_start,dst_end,src_start,ADR_RANGE_RAM_CODE, ADR_RANGE_FL_TCM);
2589         }
2590         if(fw->data_start && base2_found && base3_found) {
2591             break;
2592         }
2593         // after first, shorter search range in between copies
2594         max_search=16;
2595     }
2596 
2597     // look for BSS init after last found copy
2598     if(data_found_copy) {
2599         int count=0;
2600         uint32_t *eptr=NULL;
2601         uint32_t *dptr=NULL;
2602         disasm_iter_init(fw,is,(data_found_copy-4) | fw->thumb_default);
2603         while(disasm_iter(fw,is) && count < 20) {
2604             uint32_t *pv=LDR_PC2valptr(fw,is->insn);
2605             // not an LDR pc, reset;
2606             if(!pv) {
2607                 //dptr=eptr=NULL;
2608             } else if(!dptr) {
2609                 // TODO older firmwares use reg with ending value from DATA copy
2610                 // should be equal to end pointer of data
2611                 if(*pv == fw->data_start + fw->data_len) {
2612                     dptr=pv;
2613                 }
2614             } else if(!eptr) {
2615                 if(*pv < fw->base) {
2616                     if(*pv != fw->data_start + fw->data_len) {
2617                         eptr=pv;
2618                     }
2619                 } else { // dest end address in ROM, reset
2620                     eptr=dptr=NULL;
2621                 }
2622             }
2623             if(dptr && eptr) {
2624                 // fprintf(stderr, "bss?   @0x%"PRIx64" 0x%08x-0x%08x\n",is->adr,*dptr,*eptr);
2625                 fw->memisostart=*eptr;
2626                 break;
2627             }
2628             count++;
2629         }
2630     }
2631 
2632     find_exception_vec(fw,is);
2633 
2634     // if data found, adjust default code search range
2635     // TODO could use copied code regions too, but after data on known firmwares
2636     if(fw->data_start) {
2637         fw->rom_code_search_max_adr=fw->data_init_start;
2638     }
2639     // if dryos version string found, use as search limit
2640     if(fw->dryos_ver_adr) {
2641         if(fw->dryos_ver_adr < fw->rom_code_search_max_adr) {
2642             fw->rom_code_search_max_adr = fw->dryos_ver_adr;
2643         }
2644     }
2645     disasm_iter_free(is);
2646 }
2647 
2648 // free resources associated with fw
2649 void firmware_unload(firmware *fw)
2650 {
2651     if(!fw) {
2652         return;
2653     }
2654     if(fw->is) {
2655         disasm_iter_free(fw->is);
2656     }
2657     if(fw->cs_handle_arm) {
2658         cs_close(&fw->cs_handle_arm);
2659     }
2660     if(fw->cs_handle_thumb) {
2661         cs_close(&fw->cs_handle_thumb);
2662     }
2663     free(fw->buf8);
2664     memset(fw,0,sizeof(firmware));
2665 }

/* [<][>][^][v][top][bottom][index][help] */